From 39ae1203beb28702e4af081cd9c11b0bb9da232d Mon Sep 17 00:00:00 2001 From: Mark Veidemanis Date: Mon, 21 Nov 2022 19:43:23 +0000 Subject: [PATCH] Begin refactoring Elastic backend to use helper functions --- app/local_settings.example.py | 5 +- core/db/__init__.py | 18 ++-- core/db/elastic.py | 187 ++++++++++++++++------------------ 3 files changed, 102 insertions(+), 108 deletions(-) diff --git a/app/local_settings.example.py b/app/local_settings.example.py index f0a1bbe..d9d562f 100644 --- a/app/local_settings.example.py +++ b/app/local_settings.example.py @@ -58,8 +58,7 @@ DRILLDOWN_DEFAULT_PARAMS = { # # Delay results by this many days # DELAY_DURATION = 10 -ELASTICSEARCH_BLACKLISTED = { - } +ELASTICSEARCH_BLACKLISTED = {} # URLs\ @@ -103,4 +102,4 @@ META_MAX_CHUNK_SIZE = 500 META_QUERY_SIZE = 10000 DEBUG = True -PROFILER = False \ No newline at end of file +PROFILER = False diff --git a/core/db/__init__.py b/core/db/__init__.py index c8029cd..5364877 100644 --- a/core/db/__init__.py +++ b/core/db/__init__.py @@ -1,6 +1,7 @@ import random import string import time +from abc import ABC, abstractmethod from datetime import datetime from math import floor, log10 @@ -14,7 +15,7 @@ from core.util import logs from core.views import helpers -class StorageBackend(object): +class StorageBackend(ABC): def __init__(self, name): self.log = logs.get_logger(name) self.log.info(f"Initialising storage backend {name}") @@ -22,8 +23,9 @@ class StorageBackend(object): self.initialise_caching() self.initialise() + @abstractmethod def initialise(self, **kwargs): - raise NotImplementedError + pass def initialise_caching(self): hash_key = r.get("cache_hash_key") @@ -37,11 +39,13 @@ class StorageBackend(object): self.log.debug(f"Decoded hash key: {hash_key}") self.hash_key = hash_key + @abstractmethod def construct_query(self, **kwargs): - raise NotImplementedError + pass + @abstractmethod def run_query(self, **kwargs): - raise NotImplementedError + pass def parse_size(self, query_params, sizes): if "size" in query_params: @@ -308,8 +312,9 @@ class StorageBackend(object): time_took_rounded = round(time_took, 3 - int(floor(log10(abs(time_took)))) - 1) return {"object_list": response_parsed, "took": time_took_rounded} + @abstractmethod def query_results(self, **kwargs): - raise NotImplementedError + pass def process_results(self, response, **kwargs): if kwargs.get("annotate"): @@ -321,5 +326,6 @@ class StorageBackend(object): dedup_fields = ["msg", "nick", "ident", "host", "net", "channel"] response = helpers.dedup_list(response, dedup_fields) + @abstractmethod def parse(self, response): - raise NotImplementedError + pass diff --git a/core/db/elastic.py b/core/db/elastic.py index 768e92c..2eb72dd 100644 --- a/core/db/elastic.py +++ b/core/db/elastic.py @@ -10,7 +10,7 @@ from core.db import StorageBackend # from json import dumps # pp = lambda x: print(dumps(x, indent=2)) from core.db.processing import annotate_results, parse_results -from core.views.helpers import dedup_list +from core.views import helpers class ElasticsearchBackend(StorageBackend): @@ -133,16 +133,9 @@ class ElasticsearchBackend(StorageBackend): reverse=False, dedup=False, dedup_fields=None, - lookup_hashes=True, tags=None, ): - """ - API helper to alter the Elasticsearch return format into something - a bit better to parse. - Accept a HTTP request object. Run the query, and annotate the - results with the other data we have. - """ - # is_anonymous = isinstance(request.user, AnonymousUser) + query = None message = None message_class = None @@ -152,42 +145,95 @@ class ElasticsearchBackend(StorageBackend): sort = None query_created = False - # Lookup the hash values but don't disclose them to the user - # denied = [] - # if lookup_hashes: - # if settings.HASHING: - # query_params = deepcopy(query_params) - # denied_q = hash_lookup(request.user, query_params) - # denied.extend(denied_q) - # if tags: - # denied_t = hash_lookup(request.user, tags, query_params) - # denied.extend(denied_t) + helpers.add_defaults(query_params) - # message = "Permission denied: " - # for x in denied: - # if isinstance(x, SearchDenied): - # message += f"Search({x.key}: {x.value}) " - # elif isinstance(x, LookupDenied): - # message += f"Lookup({x.key}: {x.value}) " - # if denied: - # # message = [f"{i}" for i in message] - # # message = "\n".join(message) - # message_class = "danger" - # return {"message": message, "class": message_class} + # Now, run the helpers for SIQTSRSS/ADR + # S - Size + # I - Index + # Q - Query + # T - Tags + # S - Source + # R - Ranges + # S - Sort + # S - Sentiment + # A - Annotate + # D - Dedup + # R - Reverse + # S - Size if request.user.is_anonymous: sizes = settings.MAIN_SIZES_ANON else: sizes = settings.MAIN_SIZES if not size: - if "size" in query_params: - size = query_params["size"] - if size not in sizes: - message = "Size is not permitted" - message_class = "danger" - return {"message": message, "class": message_class} + size = self.parse_size(query_params, sizes) + if isinstance(size, dict): + return size + + # I - Index + if "index" in query_params: + index = query_params["index"] + if index == "main": + index = settings.INDEX_MAIN else: - size = 20 + if not request.user.has_perm(f"core.index_{index}"): + message = "Not permitted to search by this index" + message_class = "danger" + return { + "message": message, + "class": message_class, + } + if index == "meta": + index = settings.INDEX_META + elif index == "internal": + index = settings.INDEX_INT + else: + message = "Index is not valid." + message_class = "danger" + return { + "message": message, + "class": message_class, + } + + else: + index = settings.INDEX_MAIN + + # Q/T - Query/Tags + # Only one of query or query_full can be active at once + # We prefer query because it's simpler + if "query" in query_params: + query = query_params["query"] + search_query = self.construct_query(query, size, tokens=True) + query_created = True + elif "query_full" in query_params: + query_full = query_params["query_full"] + # if request.user.has_perm("core.query_search"): + search_query = self.construct_query(query_full, size) + query_created = True + # else: + # message = "You cannot search by query string" + # message_class = "danger" + # return {"message": message, "class": message_class} + else: + if custom_query: + search_query = custom_query + + if tags: + # Get a blank search query + if not query_created: + search_query = self.construct_query(None, size, use_query_string=False) + query_created = True + for tagname, tagvalue in tags.items(): + add_bool.append({tagname: tagvalue}) + + required_any = ["query_full", "query", "tags"] + if not any([field in query_params.keys() for field in required_any]): + if not custom_query: + message = "Empty query!" + message_class = "warning" + return {"message": message, "class": message_class} + + # S - Sources source = None if "source" in query_params: source = query_params["source"] @@ -218,6 +264,7 @@ class ElasticsearchBackend(StorageBackend): add_top_tmp["bool"]["should"].append({"match_phrase": {"src": source_iter}}) add_top.append(add_top_tmp) + # R - Ranges # date_query = False if set({"from_date", "to_date", "from_time", "to_time"}).issubset( query_params.keys() @@ -267,6 +314,7 @@ class ElasticsearchBackend(StorageBackend): # } # add_top.append(range_query) + # S - Sort if "sorting" in query_params: sorting = query_params["sorting"] if sorting not in ("asc", "desc", "none"): @@ -282,6 +330,7 @@ class ElasticsearchBackend(StorageBackend): } ] + # S - Sentiment if "check_sentiment" in query_params: if "sentiment_method" not in query_params: message = "No sentiment method" @@ -315,40 +364,6 @@ class ElasticsearchBackend(StorageBackend): range_query_precise["match"]["sentiment"] = 0 add_top_negative.append(range_query_precise) - # Only one of query or query_full can be active at once - # We prefer query because it's simpler - if "query" in query_params: - query = query_params["query"] - search_query = self.construct_query(query, size, tokens=True) - query_created = True - elif "query_full" in query_params: - query_full = query_params["query_full"] - # if request.user.has_perm("core.query_search"): - search_query = self.construct_query(query_full, size) - query_created = True - # else: - # message = "You cannot search by query string" - # message_class = "danger" - # return {"message": message, "class": message_class} - else: - if custom_query: - search_query = custom_query - - if tags: - # Get a blank search query - if not query_created: - search_query = self.construct_query(None, size, use_query_string=False) - query_created = True - for tagname, tagvalue in tags.items(): - add_bool.append({tagname: tagvalue}) - - required_any = ["query_full", "query", "tags"] - if not any([field in query_params.keys() for field in required_any]): - if not custom_query: - message = "Empty query!" - message_class = "warning" - return {"message": message, "class": message_class} - if add_bool: # if "bool" not in search_query["query"]: # search_query["query"]["bool"] = {} @@ -369,33 +384,6 @@ class ElasticsearchBackend(StorageBackend): if sort: search_query["sort"] = sort - if "index" in query_params: - index = query_params["index"] - if index == "main": - index = settings.INDEX_MAIN - else: - if not request.user.has_perm(f"core.index_{index}"): - message = "Not permitted to search by this index" - message_class = "danger" - return { - "message": message, - "class": message_class, - } - if index == "meta": - index = settings.INDEX_META - elif index == "internal": - index = settings.INDEX_INT - else: - message = "Index is not valid." - message_class = "danger" - return { - "message": message, - "class": message_class, - } - - else: - index = settings.INDEX_MAIN - results = self.query( request.user, # passed through run_main_query to filter_blacklisted search_query, @@ -416,6 +404,7 @@ class ElasticsearchBackend(StorageBackend): results_parsed = parse_results(results) + # A/D/R - Annotate/Dedup/Reverse if annotate: annotate_results(results_parsed) if "dedup" in query_params: @@ -432,7 +421,7 @@ class ElasticsearchBackend(StorageBackend): if dedup: if not dedup_fields: dedup_fields = ["msg", "nick", "ident", "host", "net", "channel"] - results_parsed = dedup_list(results_parsed, dedup_fields) + results_parsed = helpers.dedup_list(results_parsed, dedup_fields) # if source not in settings.SAFE_SOURCES: # if settings.ENCRYPTION: