Implement Druid DB fetching

This commit is contained in:
2022-09-30 07:22:22 +01:00
parent 202a13cccb
commit bb00475029
5 changed files with 234 additions and 90 deletions

View File

@@ -1,6 +1,7 @@
import random
import string
import time
from datetime import datetime
from math import floor, log10
import orjson
@@ -10,6 +11,7 @@ from siphashc import siphash
from core import r
from core.db.processing import annotate_results
from core.util import logs
from core.views import helpers
class StorageBackend(object):
@@ -71,6 +73,15 @@ class StorageBackend(object):
index = settings.INDEX_META
elif index == "internal":
index = settings.INDEX_INT
elif index == "restricted":
if not user.has_perm("core.restricted_sources"):
message = "Not permitted to search by this index"
message_class = "danger"
return {
"message": message,
"class": message_class,
}
index = settings.INDEX_RESTRICTED
else:
message = "Index is not valid."
message_class = "danger"
@@ -83,6 +94,7 @@ class StorageBackend(object):
return index
def parse_query(self, query_params, tags, size, index, custom_query, add_bool):
query_created = False
if "query" in query_params:
query = query_params["query"]
search_query = self.construct_query(query, size, index)
@@ -90,6 +102,8 @@ class StorageBackend(object):
else:
if custom_query:
search_query = custom_query
else:
search_query = self.construct_query(None, size, index, blank=True)
if tags:
# Get a blank search query
@@ -99,6 +113,13 @@ class StorageBackend(object):
for tagname, tagvalue in tags.items():
add_bool.append({tagname: tagvalue})
valid = self.check_valid_query(query_params, custom_query)
if isinstance(valid, dict):
return valid
return search_query
def check_valid_query(self, query_params, custom_query):
required_any = ["query", "tags"]
if not any([field in query_params.keys() for field in required_any]):
if not custom_query:
@@ -106,8 +127,6 @@ class StorageBackend(object):
message_class = "warning"
return {"message": message, "class": message_class}
return search_query
def parse_source(self, user, query_params):
if "source" in query_params:
source = query_params["source"]
@@ -133,11 +152,59 @@ class StorageBackend(object):
for source_iter in settings.SOURCES_RESTRICTED:
sources.append(source_iter)
if "all" in sources:
sources.remove("all")
return sources
def parse_sort(self, query_params):
sort = None
if "sorting" in query_params:
sorting = query_params["sorting"]
if sorting not in ("asc", "desc", "none"):
message = "Invalid sort"
message_class = "danger"
return {"message": message, "class": message_class}
if sorting == "asc":
sort = "ascending"
elif sorting == "desc":
sort = "descending"
return sort
def parse_date_time(self, query_params):
if set({"from_date", "to_date", "from_time", "to_time"}).issubset(
query_params.keys()
):
from_ts = f"{query_params['from_date']}T{query_params['from_time']}Z"
to_ts = f"{query_params['to_date']}T{query_params['to_time']}Z"
from_ts = datetime.strptime(from_ts, "%Y-%m-%dT%H:%MZ")
to_ts = datetime.strptime(to_ts, "%Y-%m-%dT%H:%MZ")
return (from_ts, to_ts)
return (None, None)
def parse_sentiment(self, query_params):
sentiment = None
if "check_sentiment" in query_params:
if "sentiment_method" not in query_params:
message = "No sentiment method"
message_class = "danger"
return {"message": message, "class": message_class}
if "sentiment" in query_params:
sentiment = query_params["sentiment"]
try:
sentiment = float(sentiment)
except ValueError:
message = "Sentiment is not a float"
message_class = "danger"
return {"message": message, "class": message_class}
sentiment_method = query_params["sentiment_method"]
return (sentiment_method, sentiment)
def filter_blacklisted(self, user, response):
"""
Low level filter to take the raw OpenSearch response and remove
Low level filter to take the raw search response and remove
objects from it we want to keep secret.
Does not return, the object is mutated in place.
"""
@@ -197,11 +264,28 @@ class StorageBackend(object):
cache_hit = r.get(f"query_cache.{user.id}.{hash}")
if cache_hit:
response = orjson.loads(cache_hit)
response["cache"] = True
return response
print("CACHE HIT", response)
time_took = (time.process_time() - start) * 1000
# Round to 3 significant figures
time_took_rounded = round(
time_took, 3 - int(floor(log10(abs(time_took)))) - 1
)
return {
"object_list": response,
"took": time_took_rounded,
"cache": True,
}
response = self.run_query(user, search_query)
if "error" in response and len(response.keys()) == 1:
return response
if "error" in response:
if "errorMessage" in response:
context = {
"message": response["errorMessage"],
"class": "danger",
}
return context
else:
return response
# response = response.to_dict()
# print("RESP", response)
if "took" in response:
@@ -209,15 +293,15 @@ class StorageBackend(object):
return None
self.filter_blacklisted(user, response)
# Write cache
if settings.CACHE:
to_write_cache = orjson.dumps(response)
r.set(f"query_cache.{user.id}.{hash}", to_write_cache)
r.expire(f"query_cache.{user.id}.{hash}", settings.CACHE_TIMEOUT)
# Parse the response
response_parsed = self.parse(response)
# Write cache
if settings.CACHE:
to_write_cache = orjson.dumps(response_parsed)
r.set(f"query_cache.{user.id}.{hash}", to_write_cache)
r.expire(f"query_cache.{user.id}.{hash}", settings.CACHE_TIMEOUT)
time_took = (time.process_time() - start) * 1000
# Round to 3 significant figures
time_took_rounded = round(time_took, 3 - int(floor(log10(abs(time_took)))) - 1)
@@ -226,9 +310,15 @@ class StorageBackend(object):
def query_results(self, **kwargs):
raise NotImplementedError
def process_results(self, **kwargs):
def process_results(self, response, **kwargs):
if kwargs.get("annotate"):
annotate_results(kwargs["results"])
annotate_results(response)
if kwargs.get("dedup"):
response = response[::-1]
if kwargs.get("dedup"):
if not kwargs.get("dedup_fields"):
dedup_fields = ["msg", "nick", "ident", "host", "net", "channel"]
response = helpers.dedup_list(response, dedup_fields)
def parse(self, response):
raise NotImplementedError