Begin refactoring Elastic backend to use helper functions

This commit is contained in:
Mark Veidemanis 2022-11-21 19:43:23 +00:00
parent 61f93390d9
commit 39ae1203be
Signed by: m
GPG Key ID: 5ACFCEED46C0904F
3 changed files with 102 additions and 108 deletions

View File

@ -58,8 +58,7 @@ DRILLDOWN_DEFAULT_PARAMS = {
# # Delay results by this many days
# DELAY_DURATION = 10
ELASTICSEARCH_BLACKLISTED = {
}
ELASTICSEARCH_BLACKLISTED = {}
# URLs\

View File

@ -1,6 +1,7 @@
import random
import string
import time
from abc import ABC, abstractmethod
from datetime import datetime
from math import floor, log10
@ -14,7 +15,7 @@ from core.util import logs
from core.views import helpers
class StorageBackend(object):
class StorageBackend(ABC):
def __init__(self, name):
self.log = logs.get_logger(name)
self.log.info(f"Initialising storage backend {name}")
@ -22,8 +23,9 @@ class StorageBackend(object):
self.initialise_caching()
self.initialise()
@abstractmethod
def initialise(self, **kwargs):
raise NotImplementedError
pass
def initialise_caching(self):
hash_key = r.get("cache_hash_key")
@ -37,11 +39,13 @@ class StorageBackend(object):
self.log.debug(f"Decoded hash key: {hash_key}")
self.hash_key = hash_key
@abstractmethod
def construct_query(self, **kwargs):
raise NotImplementedError
pass
@abstractmethod
def run_query(self, **kwargs):
raise NotImplementedError
pass
def parse_size(self, query_params, sizes):
if "size" in query_params:
@ -308,8 +312,9 @@ class StorageBackend(object):
time_took_rounded = round(time_took, 3 - int(floor(log10(abs(time_took)))) - 1)
return {"object_list": response_parsed, "took": time_took_rounded}
@abstractmethod
def query_results(self, **kwargs):
raise NotImplementedError
pass
def process_results(self, response, **kwargs):
if kwargs.get("annotate"):
@ -321,5 +326,6 @@ class StorageBackend(object):
dedup_fields = ["msg", "nick", "ident", "host", "net", "channel"]
response = helpers.dedup_list(response, dedup_fields)
@abstractmethod
def parse(self, response):
raise NotImplementedError
pass

View File

@ -10,7 +10,7 @@ from core.db import StorageBackend
# from json import dumps
# pp = lambda x: print(dumps(x, indent=2))
from core.db.processing import annotate_results, parse_results
from core.views.helpers import dedup_list
from core.views import helpers
class ElasticsearchBackend(StorageBackend):
@ -133,16 +133,9 @@ class ElasticsearchBackend(StorageBackend):
reverse=False,
dedup=False,
dedup_fields=None,
lookup_hashes=True,
tags=None,
):
"""
API helper to alter the Elasticsearch return format into something
a bit better to parse.
Accept a HTTP request object. Run the query, and annotate the
results with the other data we have.
"""
# is_anonymous = isinstance(request.user, AnonymousUser)
query = None
message = None
message_class = None
@ -152,42 +145,95 @@ class ElasticsearchBackend(StorageBackend):
sort = None
query_created = False
# Lookup the hash values but don't disclose them to the user
# denied = []
# if lookup_hashes:
# if settings.HASHING:
# query_params = deepcopy(query_params)
# denied_q = hash_lookup(request.user, query_params)
# denied.extend(denied_q)
# if tags:
# denied_t = hash_lookup(request.user, tags, query_params)
# denied.extend(denied_t)
helpers.add_defaults(query_params)
# message = "Permission denied: "
# for x in denied:
# if isinstance(x, SearchDenied):
# message += f"Search({x.key}: {x.value}) "
# elif isinstance(x, LookupDenied):
# message += f"Lookup({x.key}: {x.value}) "
# if denied:
# # message = [f"{i}" for i in message]
# # message = "\n".join(message)
# message_class = "danger"
# return {"message": message, "class": message_class}
# Now, run the helpers for SIQTSRSS/ADR
# S - Size
# I - Index
# Q - Query
# T - Tags
# S - Source
# R - Ranges
# S - Sort
# S - Sentiment
# A - Annotate
# D - Dedup
# R - Reverse
# S - Size
if request.user.is_anonymous:
sizes = settings.MAIN_SIZES_ANON
else:
sizes = settings.MAIN_SIZES
if not size:
if "size" in query_params:
size = query_params["size"]
if size not in sizes:
message = "Size is not permitted"
message_class = "danger"
return {"message": message, "class": message_class}
size = self.parse_size(query_params, sizes)
if isinstance(size, dict):
return size
# I - Index
if "index" in query_params:
index = query_params["index"]
if index == "main":
index = settings.INDEX_MAIN
else:
size = 20
if not request.user.has_perm(f"core.index_{index}"):
message = "Not permitted to search by this index"
message_class = "danger"
return {
"message": message,
"class": message_class,
}
if index == "meta":
index = settings.INDEX_META
elif index == "internal":
index = settings.INDEX_INT
else:
message = "Index is not valid."
message_class = "danger"
return {
"message": message,
"class": message_class,
}
else:
index = settings.INDEX_MAIN
# Q/T - Query/Tags
# Only one of query or query_full can be active at once
# We prefer query because it's simpler
if "query" in query_params:
query = query_params["query"]
search_query = self.construct_query(query, size, tokens=True)
query_created = True
elif "query_full" in query_params:
query_full = query_params["query_full"]
# if request.user.has_perm("core.query_search"):
search_query = self.construct_query(query_full, size)
query_created = True
# else:
# message = "You cannot search by query string"
# message_class = "danger"
# return {"message": message, "class": message_class}
else:
if custom_query:
search_query = custom_query
if tags:
# Get a blank search query
if not query_created:
search_query = self.construct_query(None, size, use_query_string=False)
query_created = True
for tagname, tagvalue in tags.items():
add_bool.append({tagname: tagvalue})
required_any = ["query_full", "query", "tags"]
if not any([field in query_params.keys() for field in required_any]):
if not custom_query:
message = "Empty query!"
message_class = "warning"
return {"message": message, "class": message_class}
# S - Sources
source = None
if "source" in query_params:
source = query_params["source"]
@ -218,6 +264,7 @@ class ElasticsearchBackend(StorageBackend):
add_top_tmp["bool"]["should"].append({"match_phrase": {"src": source_iter}})
add_top.append(add_top_tmp)
# R - Ranges
# date_query = False
if set({"from_date", "to_date", "from_time", "to_time"}).issubset(
query_params.keys()
@ -267,6 +314,7 @@ class ElasticsearchBackend(StorageBackend):
# }
# add_top.append(range_query)
# S - Sort
if "sorting" in query_params:
sorting = query_params["sorting"]
if sorting not in ("asc", "desc", "none"):
@ -282,6 +330,7 @@ class ElasticsearchBackend(StorageBackend):
}
]
# S - Sentiment
if "check_sentiment" in query_params:
if "sentiment_method" not in query_params:
message = "No sentiment method"
@ -315,40 +364,6 @@ class ElasticsearchBackend(StorageBackend):
range_query_precise["match"]["sentiment"] = 0
add_top_negative.append(range_query_precise)
# Only one of query or query_full can be active at once
# We prefer query because it's simpler
if "query" in query_params:
query = query_params["query"]
search_query = self.construct_query(query, size, tokens=True)
query_created = True
elif "query_full" in query_params:
query_full = query_params["query_full"]
# if request.user.has_perm("core.query_search"):
search_query = self.construct_query(query_full, size)
query_created = True
# else:
# message = "You cannot search by query string"
# message_class = "danger"
# return {"message": message, "class": message_class}
else:
if custom_query:
search_query = custom_query
if tags:
# Get a blank search query
if not query_created:
search_query = self.construct_query(None, size, use_query_string=False)
query_created = True
for tagname, tagvalue in tags.items():
add_bool.append({tagname: tagvalue})
required_any = ["query_full", "query", "tags"]
if not any([field in query_params.keys() for field in required_any]):
if not custom_query:
message = "Empty query!"
message_class = "warning"
return {"message": message, "class": message_class}
if add_bool:
# if "bool" not in search_query["query"]:
# search_query["query"]["bool"] = {}
@ -369,33 +384,6 @@ class ElasticsearchBackend(StorageBackend):
if sort:
search_query["sort"] = sort
if "index" in query_params:
index = query_params["index"]
if index == "main":
index = settings.INDEX_MAIN
else:
if not request.user.has_perm(f"core.index_{index}"):
message = "Not permitted to search by this index"
message_class = "danger"
return {
"message": message,
"class": message_class,
}
if index == "meta":
index = settings.INDEX_META
elif index == "internal":
index = settings.INDEX_INT
else:
message = "Index is not valid."
message_class = "danger"
return {
"message": message,
"class": message_class,
}
else:
index = settings.INDEX_MAIN
results = self.query(
request.user, # passed through run_main_query to filter_blacklisted
search_query,
@ -416,6 +404,7 @@ class ElasticsearchBackend(StorageBackend):
results_parsed = parse_results(results)
# A/D/R - Annotate/Dedup/Reverse
if annotate:
annotate_results(results_parsed)
if "dedup" in query_params:
@ -432,7 +421,7 @@ class ElasticsearchBackend(StorageBackend):
if dedup:
if not dedup_fields:
dedup_fields = ["msg", "nick", "ident", "host", "net", "channel"]
results_parsed = dedup_list(results_parsed, dedup_fields)
results_parsed = helpers.dedup_list(results_parsed, dedup_fields)
# if source not in settings.SAFE_SOURCES:
# if settings.ENCRYPTION: