Begin refactoring Elastic backend to use helper functions
This commit is contained in:
parent
61f93390d9
commit
39ae1203be
|
@ -58,8 +58,7 @@ DRILLDOWN_DEFAULT_PARAMS = {
|
|||
# # Delay results by this many days
|
||||
# DELAY_DURATION = 10
|
||||
|
||||
ELASTICSEARCH_BLACKLISTED = {
|
||||
}
|
||||
ELASTICSEARCH_BLACKLISTED = {}
|
||||
|
||||
|
||||
# URLs\
|
||||
|
@ -103,4 +102,4 @@ META_MAX_CHUNK_SIZE = 500
|
|||
META_QUERY_SIZE = 10000
|
||||
|
||||
DEBUG = True
|
||||
PROFILER = False
|
||||
PROFILER = False
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
import random
|
||||
import string
|
||||
import time
|
||||
from abc import ABC, abstractmethod
|
||||
from datetime import datetime
|
||||
from math import floor, log10
|
||||
|
||||
|
@ -14,7 +15,7 @@ from core.util import logs
|
|||
from core.views import helpers
|
||||
|
||||
|
||||
class StorageBackend(object):
|
||||
class StorageBackend(ABC):
|
||||
def __init__(self, name):
|
||||
self.log = logs.get_logger(name)
|
||||
self.log.info(f"Initialising storage backend {name}")
|
||||
|
@ -22,8 +23,9 @@ class StorageBackend(object):
|
|||
self.initialise_caching()
|
||||
self.initialise()
|
||||
|
||||
@abstractmethod
|
||||
def initialise(self, **kwargs):
|
||||
raise NotImplementedError
|
||||
pass
|
||||
|
||||
def initialise_caching(self):
|
||||
hash_key = r.get("cache_hash_key")
|
||||
|
@ -37,11 +39,13 @@ class StorageBackend(object):
|
|||
self.log.debug(f"Decoded hash key: {hash_key}")
|
||||
self.hash_key = hash_key
|
||||
|
||||
@abstractmethod
|
||||
def construct_query(self, **kwargs):
|
||||
raise NotImplementedError
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def run_query(self, **kwargs):
|
||||
raise NotImplementedError
|
||||
pass
|
||||
|
||||
def parse_size(self, query_params, sizes):
|
||||
if "size" in query_params:
|
||||
|
@ -308,8 +312,9 @@ class StorageBackend(object):
|
|||
time_took_rounded = round(time_took, 3 - int(floor(log10(abs(time_took)))) - 1)
|
||||
return {"object_list": response_parsed, "took": time_took_rounded}
|
||||
|
||||
@abstractmethod
|
||||
def query_results(self, **kwargs):
|
||||
raise NotImplementedError
|
||||
pass
|
||||
|
||||
def process_results(self, response, **kwargs):
|
||||
if kwargs.get("annotate"):
|
||||
|
@ -321,5 +326,6 @@ class StorageBackend(object):
|
|||
dedup_fields = ["msg", "nick", "ident", "host", "net", "channel"]
|
||||
response = helpers.dedup_list(response, dedup_fields)
|
||||
|
||||
@abstractmethod
|
||||
def parse(self, response):
|
||||
raise NotImplementedError
|
||||
pass
|
||||
|
|
|
@ -10,7 +10,7 @@ from core.db import StorageBackend
|
|||
# from json import dumps
|
||||
# pp = lambda x: print(dumps(x, indent=2))
|
||||
from core.db.processing import annotate_results, parse_results
|
||||
from core.views.helpers import dedup_list
|
||||
from core.views import helpers
|
||||
|
||||
|
||||
class ElasticsearchBackend(StorageBackend):
|
||||
|
@ -133,16 +133,9 @@ class ElasticsearchBackend(StorageBackend):
|
|||
reverse=False,
|
||||
dedup=False,
|
||||
dedup_fields=None,
|
||||
lookup_hashes=True,
|
||||
tags=None,
|
||||
):
|
||||
"""
|
||||
API helper to alter the Elasticsearch return format into something
|
||||
a bit better to parse.
|
||||
Accept a HTTP request object. Run the query, and annotate the
|
||||
results with the other data we have.
|
||||
"""
|
||||
# is_anonymous = isinstance(request.user, AnonymousUser)
|
||||
|
||||
query = None
|
||||
message = None
|
||||
message_class = None
|
||||
|
@ -152,42 +145,95 @@ class ElasticsearchBackend(StorageBackend):
|
|||
sort = None
|
||||
query_created = False
|
||||
|
||||
# Lookup the hash values but don't disclose them to the user
|
||||
# denied = []
|
||||
# if lookup_hashes:
|
||||
# if settings.HASHING:
|
||||
# query_params = deepcopy(query_params)
|
||||
# denied_q = hash_lookup(request.user, query_params)
|
||||
# denied.extend(denied_q)
|
||||
# if tags:
|
||||
# denied_t = hash_lookup(request.user, tags, query_params)
|
||||
# denied.extend(denied_t)
|
||||
helpers.add_defaults(query_params)
|
||||
|
||||
# message = "Permission denied: "
|
||||
# for x in denied:
|
||||
# if isinstance(x, SearchDenied):
|
||||
# message += f"Search({x.key}: {x.value}) "
|
||||
# elif isinstance(x, LookupDenied):
|
||||
# message += f"Lookup({x.key}: {x.value}) "
|
||||
# if denied:
|
||||
# # message = [f"{i}" for i in message]
|
||||
# # message = "\n".join(message)
|
||||
# message_class = "danger"
|
||||
# return {"message": message, "class": message_class}
|
||||
# Now, run the helpers for SIQTSRSS/ADR
|
||||
# S - Size
|
||||
# I - Index
|
||||
# Q - Query
|
||||
# T - Tags
|
||||
# S - Source
|
||||
# R - Ranges
|
||||
# S - Sort
|
||||
# S - Sentiment
|
||||
# A - Annotate
|
||||
# D - Dedup
|
||||
# R - Reverse
|
||||
|
||||
# S - Size
|
||||
if request.user.is_anonymous:
|
||||
sizes = settings.MAIN_SIZES_ANON
|
||||
else:
|
||||
sizes = settings.MAIN_SIZES
|
||||
if not size:
|
||||
if "size" in query_params:
|
||||
size = query_params["size"]
|
||||
if size not in sizes:
|
||||
message = "Size is not permitted"
|
||||
message_class = "danger"
|
||||
return {"message": message, "class": message_class}
|
||||
size = self.parse_size(query_params, sizes)
|
||||
if isinstance(size, dict):
|
||||
return size
|
||||
|
||||
# I - Index
|
||||
if "index" in query_params:
|
||||
index = query_params["index"]
|
||||
if index == "main":
|
||||
index = settings.INDEX_MAIN
|
||||
else:
|
||||
size = 20
|
||||
if not request.user.has_perm(f"core.index_{index}"):
|
||||
message = "Not permitted to search by this index"
|
||||
message_class = "danger"
|
||||
return {
|
||||
"message": message,
|
||||
"class": message_class,
|
||||
}
|
||||
if index == "meta":
|
||||
index = settings.INDEX_META
|
||||
elif index == "internal":
|
||||
index = settings.INDEX_INT
|
||||
else:
|
||||
message = "Index is not valid."
|
||||
message_class = "danger"
|
||||
return {
|
||||
"message": message,
|
||||
"class": message_class,
|
||||
}
|
||||
|
||||
else:
|
||||
index = settings.INDEX_MAIN
|
||||
|
||||
# Q/T - Query/Tags
|
||||
# Only one of query or query_full can be active at once
|
||||
# We prefer query because it's simpler
|
||||
if "query" in query_params:
|
||||
query = query_params["query"]
|
||||
search_query = self.construct_query(query, size, tokens=True)
|
||||
query_created = True
|
||||
elif "query_full" in query_params:
|
||||
query_full = query_params["query_full"]
|
||||
# if request.user.has_perm("core.query_search"):
|
||||
search_query = self.construct_query(query_full, size)
|
||||
query_created = True
|
||||
# else:
|
||||
# message = "You cannot search by query string"
|
||||
# message_class = "danger"
|
||||
# return {"message": message, "class": message_class}
|
||||
else:
|
||||
if custom_query:
|
||||
search_query = custom_query
|
||||
|
||||
if tags:
|
||||
# Get a blank search query
|
||||
if not query_created:
|
||||
search_query = self.construct_query(None, size, use_query_string=False)
|
||||
query_created = True
|
||||
for tagname, tagvalue in tags.items():
|
||||
add_bool.append({tagname: tagvalue})
|
||||
|
||||
required_any = ["query_full", "query", "tags"]
|
||||
if not any([field in query_params.keys() for field in required_any]):
|
||||
if not custom_query:
|
||||
message = "Empty query!"
|
||||
message_class = "warning"
|
||||
return {"message": message, "class": message_class}
|
||||
|
||||
# S - Sources
|
||||
source = None
|
||||
if "source" in query_params:
|
||||
source = query_params["source"]
|
||||
|
@ -218,6 +264,7 @@ class ElasticsearchBackend(StorageBackend):
|
|||
add_top_tmp["bool"]["should"].append({"match_phrase": {"src": source_iter}})
|
||||
add_top.append(add_top_tmp)
|
||||
|
||||
# R - Ranges
|
||||
# date_query = False
|
||||
if set({"from_date", "to_date", "from_time", "to_time"}).issubset(
|
||||
query_params.keys()
|
||||
|
@ -267,6 +314,7 @@ class ElasticsearchBackend(StorageBackend):
|
|||
# }
|
||||
# add_top.append(range_query)
|
||||
|
||||
# S - Sort
|
||||
if "sorting" in query_params:
|
||||
sorting = query_params["sorting"]
|
||||
if sorting not in ("asc", "desc", "none"):
|
||||
|
@ -282,6 +330,7 @@ class ElasticsearchBackend(StorageBackend):
|
|||
}
|
||||
]
|
||||
|
||||
# S - Sentiment
|
||||
if "check_sentiment" in query_params:
|
||||
if "sentiment_method" not in query_params:
|
||||
message = "No sentiment method"
|
||||
|
@ -315,40 +364,6 @@ class ElasticsearchBackend(StorageBackend):
|
|||
range_query_precise["match"]["sentiment"] = 0
|
||||
add_top_negative.append(range_query_precise)
|
||||
|
||||
# Only one of query or query_full can be active at once
|
||||
# We prefer query because it's simpler
|
||||
if "query" in query_params:
|
||||
query = query_params["query"]
|
||||
search_query = self.construct_query(query, size, tokens=True)
|
||||
query_created = True
|
||||
elif "query_full" in query_params:
|
||||
query_full = query_params["query_full"]
|
||||
# if request.user.has_perm("core.query_search"):
|
||||
search_query = self.construct_query(query_full, size)
|
||||
query_created = True
|
||||
# else:
|
||||
# message = "You cannot search by query string"
|
||||
# message_class = "danger"
|
||||
# return {"message": message, "class": message_class}
|
||||
else:
|
||||
if custom_query:
|
||||
search_query = custom_query
|
||||
|
||||
if tags:
|
||||
# Get a blank search query
|
||||
if not query_created:
|
||||
search_query = self.construct_query(None, size, use_query_string=False)
|
||||
query_created = True
|
||||
for tagname, tagvalue in tags.items():
|
||||
add_bool.append({tagname: tagvalue})
|
||||
|
||||
required_any = ["query_full", "query", "tags"]
|
||||
if not any([field in query_params.keys() for field in required_any]):
|
||||
if not custom_query:
|
||||
message = "Empty query!"
|
||||
message_class = "warning"
|
||||
return {"message": message, "class": message_class}
|
||||
|
||||
if add_bool:
|
||||
# if "bool" not in search_query["query"]:
|
||||
# search_query["query"]["bool"] = {}
|
||||
|
@ -369,33 +384,6 @@ class ElasticsearchBackend(StorageBackend):
|
|||
if sort:
|
||||
search_query["sort"] = sort
|
||||
|
||||
if "index" in query_params:
|
||||
index = query_params["index"]
|
||||
if index == "main":
|
||||
index = settings.INDEX_MAIN
|
||||
else:
|
||||
if not request.user.has_perm(f"core.index_{index}"):
|
||||
message = "Not permitted to search by this index"
|
||||
message_class = "danger"
|
||||
return {
|
||||
"message": message,
|
||||
"class": message_class,
|
||||
}
|
||||
if index == "meta":
|
||||
index = settings.INDEX_META
|
||||
elif index == "internal":
|
||||
index = settings.INDEX_INT
|
||||
else:
|
||||
message = "Index is not valid."
|
||||
message_class = "danger"
|
||||
return {
|
||||
"message": message,
|
||||
"class": message_class,
|
||||
}
|
||||
|
||||
else:
|
||||
index = settings.INDEX_MAIN
|
||||
|
||||
results = self.query(
|
||||
request.user, # passed through run_main_query to filter_blacklisted
|
||||
search_query,
|
||||
|
@ -416,6 +404,7 @@ class ElasticsearchBackend(StorageBackend):
|
|||
|
||||
results_parsed = parse_results(results)
|
||||
|
||||
# A/D/R - Annotate/Dedup/Reverse
|
||||
if annotate:
|
||||
annotate_results(results_parsed)
|
||||
if "dedup" in query_params:
|
||||
|
@ -432,7 +421,7 @@ class ElasticsearchBackend(StorageBackend):
|
|||
if dedup:
|
||||
if not dedup_fields:
|
||||
dedup_fields = ["msg", "nick", "ident", "host", "net", "channel"]
|
||||
results_parsed = dedup_list(results_parsed, dedup_fields)
|
||||
results_parsed = helpers.dedup_list(results_parsed, dedup_fields)
|
||||
|
||||
# if source not in settings.SAFE_SOURCES:
|
||||
# if settings.ENCRYPTION:
|
||||
|
|
Loading…
Reference in New Issue