Begin refactoring Elastic backend to use helper functions
This commit is contained in:
parent
61f93390d9
commit
39ae1203be
|
@ -58,8 +58,7 @@ DRILLDOWN_DEFAULT_PARAMS = {
|
||||||
# # Delay results by this many days
|
# # Delay results by this many days
|
||||||
# DELAY_DURATION = 10
|
# DELAY_DURATION = 10
|
||||||
|
|
||||||
ELASTICSEARCH_BLACKLISTED = {
|
ELASTICSEARCH_BLACKLISTED = {}
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
# URLs\
|
# URLs\
|
||||||
|
|
|
@ -1,6 +1,7 @@
|
||||||
import random
|
import random
|
||||||
import string
|
import string
|
||||||
import time
|
import time
|
||||||
|
from abc import ABC, abstractmethod
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from math import floor, log10
|
from math import floor, log10
|
||||||
|
|
||||||
|
@ -14,7 +15,7 @@ from core.util import logs
|
||||||
from core.views import helpers
|
from core.views import helpers
|
||||||
|
|
||||||
|
|
||||||
class StorageBackend(object):
|
class StorageBackend(ABC):
|
||||||
def __init__(self, name):
|
def __init__(self, name):
|
||||||
self.log = logs.get_logger(name)
|
self.log = logs.get_logger(name)
|
||||||
self.log.info(f"Initialising storage backend {name}")
|
self.log.info(f"Initialising storage backend {name}")
|
||||||
|
@ -22,8 +23,9 @@ class StorageBackend(object):
|
||||||
self.initialise_caching()
|
self.initialise_caching()
|
||||||
self.initialise()
|
self.initialise()
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
def initialise(self, **kwargs):
|
def initialise(self, **kwargs):
|
||||||
raise NotImplementedError
|
pass
|
||||||
|
|
||||||
def initialise_caching(self):
|
def initialise_caching(self):
|
||||||
hash_key = r.get("cache_hash_key")
|
hash_key = r.get("cache_hash_key")
|
||||||
|
@ -37,11 +39,13 @@ class StorageBackend(object):
|
||||||
self.log.debug(f"Decoded hash key: {hash_key}")
|
self.log.debug(f"Decoded hash key: {hash_key}")
|
||||||
self.hash_key = hash_key
|
self.hash_key = hash_key
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
def construct_query(self, **kwargs):
|
def construct_query(self, **kwargs):
|
||||||
raise NotImplementedError
|
pass
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
def run_query(self, **kwargs):
|
def run_query(self, **kwargs):
|
||||||
raise NotImplementedError
|
pass
|
||||||
|
|
||||||
def parse_size(self, query_params, sizes):
|
def parse_size(self, query_params, sizes):
|
||||||
if "size" in query_params:
|
if "size" in query_params:
|
||||||
|
@ -308,8 +312,9 @@ class StorageBackend(object):
|
||||||
time_took_rounded = round(time_took, 3 - int(floor(log10(abs(time_took)))) - 1)
|
time_took_rounded = round(time_took, 3 - int(floor(log10(abs(time_took)))) - 1)
|
||||||
return {"object_list": response_parsed, "took": time_took_rounded}
|
return {"object_list": response_parsed, "took": time_took_rounded}
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
def query_results(self, **kwargs):
|
def query_results(self, **kwargs):
|
||||||
raise NotImplementedError
|
pass
|
||||||
|
|
||||||
def process_results(self, response, **kwargs):
|
def process_results(self, response, **kwargs):
|
||||||
if kwargs.get("annotate"):
|
if kwargs.get("annotate"):
|
||||||
|
@ -321,5 +326,6 @@ class StorageBackend(object):
|
||||||
dedup_fields = ["msg", "nick", "ident", "host", "net", "channel"]
|
dedup_fields = ["msg", "nick", "ident", "host", "net", "channel"]
|
||||||
response = helpers.dedup_list(response, dedup_fields)
|
response = helpers.dedup_list(response, dedup_fields)
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
def parse(self, response):
|
def parse(self, response):
|
||||||
raise NotImplementedError
|
pass
|
||||||
|
|
|
@ -10,7 +10,7 @@ from core.db import StorageBackend
|
||||||
# from json import dumps
|
# from json import dumps
|
||||||
# pp = lambda x: print(dumps(x, indent=2))
|
# pp = lambda x: print(dumps(x, indent=2))
|
||||||
from core.db.processing import annotate_results, parse_results
|
from core.db.processing import annotate_results, parse_results
|
||||||
from core.views.helpers import dedup_list
|
from core.views import helpers
|
||||||
|
|
||||||
|
|
||||||
class ElasticsearchBackend(StorageBackend):
|
class ElasticsearchBackend(StorageBackend):
|
||||||
|
@ -133,16 +133,9 @@ class ElasticsearchBackend(StorageBackend):
|
||||||
reverse=False,
|
reverse=False,
|
||||||
dedup=False,
|
dedup=False,
|
||||||
dedup_fields=None,
|
dedup_fields=None,
|
||||||
lookup_hashes=True,
|
|
||||||
tags=None,
|
tags=None,
|
||||||
):
|
):
|
||||||
"""
|
|
||||||
API helper to alter the Elasticsearch return format into something
|
|
||||||
a bit better to parse.
|
|
||||||
Accept a HTTP request object. Run the query, and annotate the
|
|
||||||
results with the other data we have.
|
|
||||||
"""
|
|
||||||
# is_anonymous = isinstance(request.user, AnonymousUser)
|
|
||||||
query = None
|
query = None
|
||||||
message = None
|
message = None
|
||||||
message_class = None
|
message_class = None
|
||||||
|
@ -152,42 +145,95 @@ class ElasticsearchBackend(StorageBackend):
|
||||||
sort = None
|
sort = None
|
||||||
query_created = False
|
query_created = False
|
||||||
|
|
||||||
# Lookup the hash values but don't disclose them to the user
|
helpers.add_defaults(query_params)
|
||||||
# denied = []
|
|
||||||
# if lookup_hashes:
|
|
||||||
# if settings.HASHING:
|
|
||||||
# query_params = deepcopy(query_params)
|
|
||||||
# denied_q = hash_lookup(request.user, query_params)
|
|
||||||
# denied.extend(denied_q)
|
|
||||||
# if tags:
|
|
||||||
# denied_t = hash_lookup(request.user, tags, query_params)
|
|
||||||
# denied.extend(denied_t)
|
|
||||||
|
|
||||||
# message = "Permission denied: "
|
# Now, run the helpers for SIQTSRSS/ADR
|
||||||
# for x in denied:
|
# S - Size
|
||||||
# if isinstance(x, SearchDenied):
|
# I - Index
|
||||||
# message += f"Search({x.key}: {x.value}) "
|
# Q - Query
|
||||||
# elif isinstance(x, LookupDenied):
|
# T - Tags
|
||||||
# message += f"Lookup({x.key}: {x.value}) "
|
# S - Source
|
||||||
# if denied:
|
# R - Ranges
|
||||||
# # message = [f"{i}" for i in message]
|
# S - Sort
|
||||||
# # message = "\n".join(message)
|
# S - Sentiment
|
||||||
# message_class = "danger"
|
# A - Annotate
|
||||||
# return {"message": message, "class": message_class}
|
# D - Dedup
|
||||||
|
# R - Reverse
|
||||||
|
|
||||||
|
# S - Size
|
||||||
if request.user.is_anonymous:
|
if request.user.is_anonymous:
|
||||||
sizes = settings.MAIN_SIZES_ANON
|
sizes = settings.MAIN_SIZES_ANON
|
||||||
else:
|
else:
|
||||||
sizes = settings.MAIN_SIZES
|
sizes = settings.MAIN_SIZES
|
||||||
if not size:
|
if not size:
|
||||||
if "size" in query_params:
|
size = self.parse_size(query_params, sizes)
|
||||||
size = query_params["size"]
|
if isinstance(size, dict):
|
||||||
if size not in sizes:
|
return size
|
||||||
message = "Size is not permitted"
|
|
||||||
message_class = "danger"
|
# I - Index
|
||||||
return {"message": message, "class": message_class}
|
if "index" in query_params:
|
||||||
|
index = query_params["index"]
|
||||||
|
if index == "main":
|
||||||
|
index = settings.INDEX_MAIN
|
||||||
else:
|
else:
|
||||||
size = 20
|
if not request.user.has_perm(f"core.index_{index}"):
|
||||||
|
message = "Not permitted to search by this index"
|
||||||
|
message_class = "danger"
|
||||||
|
return {
|
||||||
|
"message": message,
|
||||||
|
"class": message_class,
|
||||||
|
}
|
||||||
|
if index == "meta":
|
||||||
|
index = settings.INDEX_META
|
||||||
|
elif index == "internal":
|
||||||
|
index = settings.INDEX_INT
|
||||||
|
else:
|
||||||
|
message = "Index is not valid."
|
||||||
|
message_class = "danger"
|
||||||
|
return {
|
||||||
|
"message": message,
|
||||||
|
"class": message_class,
|
||||||
|
}
|
||||||
|
|
||||||
|
else:
|
||||||
|
index = settings.INDEX_MAIN
|
||||||
|
|
||||||
|
# Q/T - Query/Tags
|
||||||
|
# Only one of query or query_full can be active at once
|
||||||
|
# We prefer query because it's simpler
|
||||||
|
if "query" in query_params:
|
||||||
|
query = query_params["query"]
|
||||||
|
search_query = self.construct_query(query, size, tokens=True)
|
||||||
|
query_created = True
|
||||||
|
elif "query_full" in query_params:
|
||||||
|
query_full = query_params["query_full"]
|
||||||
|
# if request.user.has_perm("core.query_search"):
|
||||||
|
search_query = self.construct_query(query_full, size)
|
||||||
|
query_created = True
|
||||||
|
# else:
|
||||||
|
# message = "You cannot search by query string"
|
||||||
|
# message_class = "danger"
|
||||||
|
# return {"message": message, "class": message_class}
|
||||||
|
else:
|
||||||
|
if custom_query:
|
||||||
|
search_query = custom_query
|
||||||
|
|
||||||
|
if tags:
|
||||||
|
# Get a blank search query
|
||||||
|
if not query_created:
|
||||||
|
search_query = self.construct_query(None, size, use_query_string=False)
|
||||||
|
query_created = True
|
||||||
|
for tagname, tagvalue in tags.items():
|
||||||
|
add_bool.append({tagname: tagvalue})
|
||||||
|
|
||||||
|
required_any = ["query_full", "query", "tags"]
|
||||||
|
if not any([field in query_params.keys() for field in required_any]):
|
||||||
|
if not custom_query:
|
||||||
|
message = "Empty query!"
|
||||||
|
message_class = "warning"
|
||||||
|
return {"message": message, "class": message_class}
|
||||||
|
|
||||||
|
# S - Sources
|
||||||
source = None
|
source = None
|
||||||
if "source" in query_params:
|
if "source" in query_params:
|
||||||
source = query_params["source"]
|
source = query_params["source"]
|
||||||
|
@ -218,6 +264,7 @@ class ElasticsearchBackend(StorageBackend):
|
||||||
add_top_tmp["bool"]["should"].append({"match_phrase": {"src": source_iter}})
|
add_top_tmp["bool"]["should"].append({"match_phrase": {"src": source_iter}})
|
||||||
add_top.append(add_top_tmp)
|
add_top.append(add_top_tmp)
|
||||||
|
|
||||||
|
# R - Ranges
|
||||||
# date_query = False
|
# date_query = False
|
||||||
if set({"from_date", "to_date", "from_time", "to_time"}).issubset(
|
if set({"from_date", "to_date", "from_time", "to_time"}).issubset(
|
||||||
query_params.keys()
|
query_params.keys()
|
||||||
|
@ -267,6 +314,7 @@ class ElasticsearchBackend(StorageBackend):
|
||||||
# }
|
# }
|
||||||
# add_top.append(range_query)
|
# add_top.append(range_query)
|
||||||
|
|
||||||
|
# S - Sort
|
||||||
if "sorting" in query_params:
|
if "sorting" in query_params:
|
||||||
sorting = query_params["sorting"]
|
sorting = query_params["sorting"]
|
||||||
if sorting not in ("asc", "desc", "none"):
|
if sorting not in ("asc", "desc", "none"):
|
||||||
|
@ -282,6 +330,7 @@ class ElasticsearchBackend(StorageBackend):
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
|
|
||||||
|
# S - Sentiment
|
||||||
if "check_sentiment" in query_params:
|
if "check_sentiment" in query_params:
|
||||||
if "sentiment_method" not in query_params:
|
if "sentiment_method" not in query_params:
|
||||||
message = "No sentiment method"
|
message = "No sentiment method"
|
||||||
|
@ -315,40 +364,6 @@ class ElasticsearchBackend(StorageBackend):
|
||||||
range_query_precise["match"]["sentiment"] = 0
|
range_query_precise["match"]["sentiment"] = 0
|
||||||
add_top_negative.append(range_query_precise)
|
add_top_negative.append(range_query_precise)
|
||||||
|
|
||||||
# Only one of query or query_full can be active at once
|
|
||||||
# We prefer query because it's simpler
|
|
||||||
if "query" in query_params:
|
|
||||||
query = query_params["query"]
|
|
||||||
search_query = self.construct_query(query, size, tokens=True)
|
|
||||||
query_created = True
|
|
||||||
elif "query_full" in query_params:
|
|
||||||
query_full = query_params["query_full"]
|
|
||||||
# if request.user.has_perm("core.query_search"):
|
|
||||||
search_query = self.construct_query(query_full, size)
|
|
||||||
query_created = True
|
|
||||||
# else:
|
|
||||||
# message = "You cannot search by query string"
|
|
||||||
# message_class = "danger"
|
|
||||||
# return {"message": message, "class": message_class}
|
|
||||||
else:
|
|
||||||
if custom_query:
|
|
||||||
search_query = custom_query
|
|
||||||
|
|
||||||
if tags:
|
|
||||||
# Get a blank search query
|
|
||||||
if not query_created:
|
|
||||||
search_query = self.construct_query(None, size, use_query_string=False)
|
|
||||||
query_created = True
|
|
||||||
for tagname, tagvalue in tags.items():
|
|
||||||
add_bool.append({tagname: tagvalue})
|
|
||||||
|
|
||||||
required_any = ["query_full", "query", "tags"]
|
|
||||||
if not any([field in query_params.keys() for field in required_any]):
|
|
||||||
if not custom_query:
|
|
||||||
message = "Empty query!"
|
|
||||||
message_class = "warning"
|
|
||||||
return {"message": message, "class": message_class}
|
|
||||||
|
|
||||||
if add_bool:
|
if add_bool:
|
||||||
# if "bool" not in search_query["query"]:
|
# if "bool" not in search_query["query"]:
|
||||||
# search_query["query"]["bool"] = {}
|
# search_query["query"]["bool"] = {}
|
||||||
|
@ -369,33 +384,6 @@ class ElasticsearchBackend(StorageBackend):
|
||||||
if sort:
|
if sort:
|
||||||
search_query["sort"] = sort
|
search_query["sort"] = sort
|
||||||
|
|
||||||
if "index" in query_params:
|
|
||||||
index = query_params["index"]
|
|
||||||
if index == "main":
|
|
||||||
index = settings.INDEX_MAIN
|
|
||||||
else:
|
|
||||||
if not request.user.has_perm(f"core.index_{index}"):
|
|
||||||
message = "Not permitted to search by this index"
|
|
||||||
message_class = "danger"
|
|
||||||
return {
|
|
||||||
"message": message,
|
|
||||||
"class": message_class,
|
|
||||||
}
|
|
||||||
if index == "meta":
|
|
||||||
index = settings.INDEX_META
|
|
||||||
elif index == "internal":
|
|
||||||
index = settings.INDEX_INT
|
|
||||||
else:
|
|
||||||
message = "Index is not valid."
|
|
||||||
message_class = "danger"
|
|
||||||
return {
|
|
||||||
"message": message,
|
|
||||||
"class": message_class,
|
|
||||||
}
|
|
||||||
|
|
||||||
else:
|
|
||||||
index = settings.INDEX_MAIN
|
|
||||||
|
|
||||||
results = self.query(
|
results = self.query(
|
||||||
request.user, # passed through run_main_query to filter_blacklisted
|
request.user, # passed through run_main_query to filter_blacklisted
|
||||||
search_query,
|
search_query,
|
||||||
|
@ -416,6 +404,7 @@ class ElasticsearchBackend(StorageBackend):
|
||||||
|
|
||||||
results_parsed = parse_results(results)
|
results_parsed = parse_results(results)
|
||||||
|
|
||||||
|
# A/D/R - Annotate/Dedup/Reverse
|
||||||
if annotate:
|
if annotate:
|
||||||
annotate_results(results_parsed)
|
annotate_results(results_parsed)
|
||||||
if "dedup" in query_params:
|
if "dedup" in query_params:
|
||||||
|
@ -432,7 +421,7 @@ class ElasticsearchBackend(StorageBackend):
|
||||||
if dedup:
|
if dedup:
|
||||||
if not dedup_fields:
|
if not dedup_fields:
|
||||||
dedup_fields = ["msg", "nick", "ident", "host", "net", "channel"]
|
dedup_fields = ["msg", "nick", "ident", "host", "net", "channel"]
|
||||||
results_parsed = dedup_list(results_parsed, dedup_fields)
|
results_parsed = helpers.dedup_list(results_parsed, dedup_fields)
|
||||||
|
|
||||||
# if source not in settings.SAFE_SOURCES:
|
# if source not in settings.SAFE_SOURCES:
|
||||||
# if settings.ENCRYPTION:
|
# if settings.ENCRYPTION:
|
||||||
|
|
Loading…
Reference in New Issue