Begin refactoring Elastic backend to use helper functions

This commit is contained in:
Mark Veidemanis 2022-11-21 19:43:23 +00:00
parent 61f93390d9
commit 39ae1203be
Signed by: m
GPG Key ID: 5ACFCEED46C0904F
3 changed files with 102 additions and 108 deletions

View File

@ -58,8 +58,7 @@ DRILLDOWN_DEFAULT_PARAMS = {
# # Delay results by this many days # # Delay results by this many days
# DELAY_DURATION = 10 # DELAY_DURATION = 10
ELASTICSEARCH_BLACKLISTED = { ELASTICSEARCH_BLACKLISTED = {}
}
# URLs\ # URLs\
@ -103,4 +102,4 @@ META_MAX_CHUNK_SIZE = 500
META_QUERY_SIZE = 10000 META_QUERY_SIZE = 10000
DEBUG = True DEBUG = True
PROFILER = False PROFILER = False

View File

@ -1,6 +1,7 @@
import random import random
import string import string
import time import time
from abc import ABC, abstractmethod
from datetime import datetime from datetime import datetime
from math import floor, log10 from math import floor, log10
@ -14,7 +15,7 @@ from core.util import logs
from core.views import helpers from core.views import helpers
class StorageBackend(object): class StorageBackend(ABC):
def __init__(self, name): def __init__(self, name):
self.log = logs.get_logger(name) self.log = logs.get_logger(name)
self.log.info(f"Initialising storage backend {name}") self.log.info(f"Initialising storage backend {name}")
@ -22,8 +23,9 @@ class StorageBackend(object):
self.initialise_caching() self.initialise_caching()
self.initialise() self.initialise()
@abstractmethod
def initialise(self, **kwargs): def initialise(self, **kwargs):
raise NotImplementedError pass
def initialise_caching(self): def initialise_caching(self):
hash_key = r.get("cache_hash_key") hash_key = r.get("cache_hash_key")
@ -37,11 +39,13 @@ class StorageBackend(object):
self.log.debug(f"Decoded hash key: {hash_key}") self.log.debug(f"Decoded hash key: {hash_key}")
self.hash_key = hash_key self.hash_key = hash_key
@abstractmethod
def construct_query(self, **kwargs): def construct_query(self, **kwargs):
raise NotImplementedError pass
@abstractmethod
def run_query(self, **kwargs): def run_query(self, **kwargs):
raise NotImplementedError pass
def parse_size(self, query_params, sizes): def parse_size(self, query_params, sizes):
if "size" in query_params: if "size" in query_params:
@ -308,8 +312,9 @@ class StorageBackend(object):
time_took_rounded = round(time_took, 3 - int(floor(log10(abs(time_took)))) - 1) time_took_rounded = round(time_took, 3 - int(floor(log10(abs(time_took)))) - 1)
return {"object_list": response_parsed, "took": time_took_rounded} return {"object_list": response_parsed, "took": time_took_rounded}
@abstractmethod
def query_results(self, **kwargs): def query_results(self, **kwargs):
raise NotImplementedError pass
def process_results(self, response, **kwargs): def process_results(self, response, **kwargs):
if kwargs.get("annotate"): if kwargs.get("annotate"):
@ -321,5 +326,6 @@ class StorageBackend(object):
dedup_fields = ["msg", "nick", "ident", "host", "net", "channel"] dedup_fields = ["msg", "nick", "ident", "host", "net", "channel"]
response = helpers.dedup_list(response, dedup_fields) response = helpers.dedup_list(response, dedup_fields)
@abstractmethod
def parse(self, response): def parse(self, response):
raise NotImplementedError pass

View File

@ -10,7 +10,7 @@ from core.db import StorageBackend
# from json import dumps # from json import dumps
# pp = lambda x: print(dumps(x, indent=2)) # pp = lambda x: print(dumps(x, indent=2))
from core.db.processing import annotate_results, parse_results from core.db.processing import annotate_results, parse_results
from core.views.helpers import dedup_list from core.views import helpers
class ElasticsearchBackend(StorageBackend): class ElasticsearchBackend(StorageBackend):
@ -133,16 +133,9 @@ class ElasticsearchBackend(StorageBackend):
reverse=False, reverse=False,
dedup=False, dedup=False,
dedup_fields=None, dedup_fields=None,
lookup_hashes=True,
tags=None, tags=None,
): ):
"""
API helper to alter the Elasticsearch return format into something
a bit better to parse.
Accept a HTTP request object. Run the query, and annotate the
results with the other data we have.
"""
# is_anonymous = isinstance(request.user, AnonymousUser)
query = None query = None
message = None message = None
message_class = None message_class = None
@ -152,42 +145,95 @@ class ElasticsearchBackend(StorageBackend):
sort = None sort = None
query_created = False query_created = False
# Lookup the hash values but don't disclose them to the user helpers.add_defaults(query_params)
# denied = []
# if lookup_hashes:
# if settings.HASHING:
# query_params = deepcopy(query_params)
# denied_q = hash_lookup(request.user, query_params)
# denied.extend(denied_q)
# if tags:
# denied_t = hash_lookup(request.user, tags, query_params)
# denied.extend(denied_t)
# message = "Permission denied: " # Now, run the helpers for SIQTSRSS/ADR
# for x in denied: # S - Size
# if isinstance(x, SearchDenied): # I - Index
# message += f"Search({x.key}: {x.value}) " # Q - Query
# elif isinstance(x, LookupDenied): # T - Tags
# message += f"Lookup({x.key}: {x.value}) " # S - Source
# if denied: # R - Ranges
# # message = [f"{i}" for i in message] # S - Sort
# # message = "\n".join(message) # S - Sentiment
# message_class = "danger" # A - Annotate
# return {"message": message, "class": message_class} # D - Dedup
# R - Reverse
# S - Size
if request.user.is_anonymous: if request.user.is_anonymous:
sizes = settings.MAIN_SIZES_ANON sizes = settings.MAIN_SIZES_ANON
else: else:
sizes = settings.MAIN_SIZES sizes = settings.MAIN_SIZES
if not size: if not size:
if "size" in query_params: size = self.parse_size(query_params, sizes)
size = query_params["size"] if isinstance(size, dict):
if size not in sizes: return size
message = "Size is not permitted"
message_class = "danger" # I - Index
return {"message": message, "class": message_class} if "index" in query_params:
index = query_params["index"]
if index == "main":
index = settings.INDEX_MAIN
else: else:
size = 20 if not request.user.has_perm(f"core.index_{index}"):
message = "Not permitted to search by this index"
message_class = "danger"
return {
"message": message,
"class": message_class,
}
if index == "meta":
index = settings.INDEX_META
elif index == "internal":
index = settings.INDEX_INT
else:
message = "Index is not valid."
message_class = "danger"
return {
"message": message,
"class": message_class,
}
else:
index = settings.INDEX_MAIN
# Q/T - Query/Tags
# Only one of query or query_full can be active at once
# We prefer query because it's simpler
if "query" in query_params:
query = query_params["query"]
search_query = self.construct_query(query, size, tokens=True)
query_created = True
elif "query_full" in query_params:
query_full = query_params["query_full"]
# if request.user.has_perm("core.query_search"):
search_query = self.construct_query(query_full, size)
query_created = True
# else:
# message = "You cannot search by query string"
# message_class = "danger"
# return {"message": message, "class": message_class}
else:
if custom_query:
search_query = custom_query
if tags:
# Get a blank search query
if not query_created:
search_query = self.construct_query(None, size, use_query_string=False)
query_created = True
for tagname, tagvalue in tags.items():
add_bool.append({tagname: tagvalue})
required_any = ["query_full", "query", "tags"]
if not any([field in query_params.keys() for field in required_any]):
if not custom_query:
message = "Empty query!"
message_class = "warning"
return {"message": message, "class": message_class}
# S - Sources
source = None source = None
if "source" in query_params: if "source" in query_params:
source = query_params["source"] source = query_params["source"]
@ -218,6 +264,7 @@ class ElasticsearchBackend(StorageBackend):
add_top_tmp["bool"]["should"].append({"match_phrase": {"src": source_iter}}) add_top_tmp["bool"]["should"].append({"match_phrase": {"src": source_iter}})
add_top.append(add_top_tmp) add_top.append(add_top_tmp)
# R - Ranges
# date_query = False # date_query = False
if set({"from_date", "to_date", "from_time", "to_time"}).issubset( if set({"from_date", "to_date", "from_time", "to_time"}).issubset(
query_params.keys() query_params.keys()
@ -267,6 +314,7 @@ class ElasticsearchBackend(StorageBackend):
# } # }
# add_top.append(range_query) # add_top.append(range_query)
# S - Sort
if "sorting" in query_params: if "sorting" in query_params:
sorting = query_params["sorting"] sorting = query_params["sorting"]
if sorting not in ("asc", "desc", "none"): if sorting not in ("asc", "desc", "none"):
@ -282,6 +330,7 @@ class ElasticsearchBackend(StorageBackend):
} }
] ]
# S - Sentiment
if "check_sentiment" in query_params: if "check_sentiment" in query_params:
if "sentiment_method" not in query_params: if "sentiment_method" not in query_params:
message = "No sentiment method" message = "No sentiment method"
@ -315,40 +364,6 @@ class ElasticsearchBackend(StorageBackend):
range_query_precise["match"]["sentiment"] = 0 range_query_precise["match"]["sentiment"] = 0
add_top_negative.append(range_query_precise) add_top_negative.append(range_query_precise)
# Only one of query or query_full can be active at once
# We prefer query because it's simpler
if "query" in query_params:
query = query_params["query"]
search_query = self.construct_query(query, size, tokens=True)
query_created = True
elif "query_full" in query_params:
query_full = query_params["query_full"]
# if request.user.has_perm("core.query_search"):
search_query = self.construct_query(query_full, size)
query_created = True
# else:
# message = "You cannot search by query string"
# message_class = "danger"
# return {"message": message, "class": message_class}
else:
if custom_query:
search_query = custom_query
if tags:
# Get a blank search query
if not query_created:
search_query = self.construct_query(None, size, use_query_string=False)
query_created = True
for tagname, tagvalue in tags.items():
add_bool.append({tagname: tagvalue})
required_any = ["query_full", "query", "tags"]
if not any([field in query_params.keys() for field in required_any]):
if not custom_query:
message = "Empty query!"
message_class = "warning"
return {"message": message, "class": message_class}
if add_bool: if add_bool:
# if "bool" not in search_query["query"]: # if "bool" not in search_query["query"]:
# search_query["query"]["bool"] = {} # search_query["query"]["bool"] = {}
@ -369,33 +384,6 @@ class ElasticsearchBackend(StorageBackend):
if sort: if sort:
search_query["sort"] = sort search_query["sort"] = sort
if "index" in query_params:
index = query_params["index"]
if index == "main":
index = settings.INDEX_MAIN
else:
if not request.user.has_perm(f"core.index_{index}"):
message = "Not permitted to search by this index"
message_class = "danger"
return {
"message": message,
"class": message_class,
}
if index == "meta":
index = settings.INDEX_META
elif index == "internal":
index = settings.INDEX_INT
else:
message = "Index is not valid."
message_class = "danger"
return {
"message": message,
"class": message_class,
}
else:
index = settings.INDEX_MAIN
results = self.query( results = self.query(
request.user, # passed through run_main_query to filter_blacklisted request.user, # passed through run_main_query to filter_blacklisted
search_query, search_query,
@ -416,6 +404,7 @@ class ElasticsearchBackend(StorageBackend):
results_parsed = parse_results(results) results_parsed = parse_results(results)
# A/D/R - Annotate/Dedup/Reverse
if annotate: if annotate:
annotate_results(results_parsed) annotate_results(results_parsed)
if "dedup" in query_params: if "dedup" in query_params:
@ -432,7 +421,7 @@ class ElasticsearchBackend(StorageBackend):
if dedup: if dedup:
if not dedup_fields: if not dedup_fields:
dedup_fields = ["msg", "nick", "ident", "host", "net", "channel"] dedup_fields = ["msg", "nick", "ident", "host", "net", "channel"]
results_parsed = dedup_list(results_parsed, dedup_fields) results_parsed = helpers.dedup_list(results_parsed, dedup_fields)
# if source not in settings.SAFE_SOURCES: # if source not in settings.SAFE_SOURCES:
# if settings.ENCRYPTION: # if settings.ENCRYPTION: