|
|
|
@ -1,6 +1,7 @@
|
|
|
|
|
import random
|
|
|
|
|
import string
|
|
|
|
|
import time
|
|
|
|
|
from datetime import datetime
|
|
|
|
|
from math import floor, log10
|
|
|
|
|
|
|
|
|
|
import orjson
|
|
|
|
@ -10,6 +11,7 @@ from siphashc import siphash
|
|
|
|
|
from core import r
|
|
|
|
|
from core.db.processing import annotate_results
|
|
|
|
|
from core.util import logs
|
|
|
|
|
from core.views import helpers
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class StorageBackend(object):
|
|
|
|
@ -71,6 +73,15 @@ class StorageBackend(object):
|
|
|
|
|
index = settings.INDEX_META
|
|
|
|
|
elif index == "internal":
|
|
|
|
|
index = settings.INDEX_INT
|
|
|
|
|
elif index == "restricted":
|
|
|
|
|
if not user.has_perm("core.restricted_sources"):
|
|
|
|
|
message = "Not permitted to search by this index"
|
|
|
|
|
message_class = "danger"
|
|
|
|
|
return {
|
|
|
|
|
"message": message,
|
|
|
|
|
"class": message_class,
|
|
|
|
|
}
|
|
|
|
|
index = settings.INDEX_RESTRICTED
|
|
|
|
|
else:
|
|
|
|
|
message = "Index is not valid."
|
|
|
|
|
message_class = "danger"
|
|
|
|
@ -83,6 +94,7 @@ class StorageBackend(object):
|
|
|
|
|
return index
|
|
|
|
|
|
|
|
|
|
def parse_query(self, query_params, tags, size, index, custom_query, add_bool):
|
|
|
|
|
query_created = False
|
|
|
|
|
if "query" in query_params:
|
|
|
|
|
query = query_params["query"]
|
|
|
|
|
search_query = self.construct_query(query, size, index)
|
|
|
|
@ -90,6 +102,8 @@ class StorageBackend(object):
|
|
|
|
|
else:
|
|
|
|
|
if custom_query:
|
|
|
|
|
search_query = custom_query
|
|
|
|
|
else:
|
|
|
|
|
search_query = self.construct_query(None, size, index, blank=True)
|
|
|
|
|
|
|
|
|
|
if tags:
|
|
|
|
|
# Get a blank search query
|
|
|
|
@ -99,6 +113,13 @@ class StorageBackend(object):
|
|
|
|
|
for tagname, tagvalue in tags.items():
|
|
|
|
|
add_bool.append({tagname: tagvalue})
|
|
|
|
|
|
|
|
|
|
valid = self.check_valid_query(query_params, custom_query)
|
|
|
|
|
if isinstance(valid, dict):
|
|
|
|
|
return valid
|
|
|
|
|
|
|
|
|
|
return search_query
|
|
|
|
|
|
|
|
|
|
def check_valid_query(self, query_params, custom_query):
|
|
|
|
|
required_any = ["query", "tags"]
|
|
|
|
|
if not any([field in query_params.keys() for field in required_any]):
|
|
|
|
|
if not custom_query:
|
|
|
|
@ -106,8 +127,6 @@ class StorageBackend(object):
|
|
|
|
|
message_class = "warning"
|
|
|
|
|
return {"message": message, "class": message_class}
|
|
|
|
|
|
|
|
|
|
return search_query
|
|
|
|
|
|
|
|
|
|
def parse_source(self, user, query_params):
|
|
|
|
|
if "source" in query_params:
|
|
|
|
|
source = query_params["source"]
|
|
|
|
@ -133,11 +152,59 @@ class StorageBackend(object):
|
|
|
|
|
for source_iter in settings.SOURCES_RESTRICTED:
|
|
|
|
|
sources.append(source_iter)
|
|
|
|
|
|
|
|
|
|
if "all" in sources:
|
|
|
|
|
sources.remove("all")
|
|
|
|
|
|
|
|
|
|
return sources
|
|
|
|
|
|
|
|
|
|
def parse_sort(self, query_params):
|
|
|
|
|
sort = None
|
|
|
|
|
if "sorting" in query_params:
|
|
|
|
|
sorting = query_params["sorting"]
|
|
|
|
|
if sorting not in ("asc", "desc", "none"):
|
|
|
|
|
message = "Invalid sort"
|
|
|
|
|
message_class = "danger"
|
|
|
|
|
return {"message": message, "class": message_class}
|
|
|
|
|
if sorting == "asc":
|
|
|
|
|
sort = "ascending"
|
|
|
|
|
elif sorting == "desc":
|
|
|
|
|
sort = "descending"
|
|
|
|
|
return sort
|
|
|
|
|
|
|
|
|
|
def parse_date_time(self, query_params):
|
|
|
|
|
if set({"from_date", "to_date", "from_time", "to_time"}).issubset(
|
|
|
|
|
query_params.keys()
|
|
|
|
|
):
|
|
|
|
|
from_ts = f"{query_params['from_date']}T{query_params['from_time']}Z"
|
|
|
|
|
to_ts = f"{query_params['to_date']}T{query_params['to_time']}Z"
|
|
|
|
|
from_ts = datetime.strptime(from_ts, "%Y-%m-%dT%H:%MZ")
|
|
|
|
|
to_ts = datetime.strptime(to_ts, "%Y-%m-%dT%H:%MZ")
|
|
|
|
|
|
|
|
|
|
return (from_ts, to_ts)
|
|
|
|
|
return (None, None)
|
|
|
|
|
|
|
|
|
|
def parse_sentiment(self, query_params):
|
|
|
|
|
sentiment = None
|
|
|
|
|
if "check_sentiment" in query_params:
|
|
|
|
|
if "sentiment_method" not in query_params:
|
|
|
|
|
message = "No sentiment method"
|
|
|
|
|
message_class = "danger"
|
|
|
|
|
return {"message": message, "class": message_class}
|
|
|
|
|
if "sentiment" in query_params:
|
|
|
|
|
sentiment = query_params["sentiment"]
|
|
|
|
|
try:
|
|
|
|
|
sentiment = float(sentiment)
|
|
|
|
|
except ValueError:
|
|
|
|
|
message = "Sentiment is not a float"
|
|
|
|
|
message_class = "danger"
|
|
|
|
|
return {"message": message, "class": message_class}
|
|
|
|
|
sentiment_method = query_params["sentiment_method"]
|
|
|
|
|
|
|
|
|
|
return (sentiment_method, sentiment)
|
|
|
|
|
|
|
|
|
|
def filter_blacklisted(self, user, response):
|
|
|
|
|
"""
|
|
|
|
|
Low level filter to take the raw OpenSearch response and remove
|
|
|
|
|
Low level filter to take the raw search response and remove
|
|
|
|
|
objects from it we want to keep secret.
|
|
|
|
|
Does not return, the object is mutated in place.
|
|
|
|
|
"""
|
|
|
|
@ -197,11 +264,28 @@ class StorageBackend(object):
|
|
|
|
|
cache_hit = r.get(f"query_cache.{user.id}.{hash}")
|
|
|
|
|
if cache_hit:
|
|
|
|
|
response = orjson.loads(cache_hit)
|
|
|
|
|
response["cache"] = True
|
|
|
|
|
return response
|
|
|
|
|
print("CACHE HIT", response)
|
|
|
|
|
|
|
|
|
|
time_took = (time.process_time() - start) * 1000
|
|
|
|
|
# Round to 3 significant figures
|
|
|
|
|
time_took_rounded = round(
|
|
|
|
|
time_took, 3 - int(floor(log10(abs(time_took)))) - 1
|
|
|
|
|
)
|
|
|
|
|
return {
|
|
|
|
|
"object_list": response,
|
|
|
|
|
"took": time_took_rounded,
|
|
|
|
|
"cache": True,
|
|
|
|
|
}
|
|
|
|
|
response = self.run_query(user, search_query)
|
|
|
|
|
if "error" in response and len(response.keys()) == 1:
|
|
|
|
|
return response
|
|
|
|
|
if "error" in response:
|
|
|
|
|
if "errorMessage" in response:
|
|
|
|
|
context = {
|
|
|
|
|
"message": response["errorMessage"],
|
|
|
|
|
"class": "danger",
|
|
|
|
|
}
|
|
|
|
|
return context
|
|
|
|
|
else:
|
|
|
|
|
return response
|
|
|
|
|
# response = response.to_dict()
|
|
|
|
|
# print("RESP", response)
|
|
|
|
|
if "took" in response:
|
|
|
|
@ -209,15 +293,15 @@ class StorageBackend(object):
|
|
|
|
|
return None
|
|
|
|
|
self.filter_blacklisted(user, response)
|
|
|
|
|
|
|
|
|
|
# Parse the response
|
|
|
|
|
response_parsed = self.parse(response)
|
|
|
|
|
|
|
|
|
|
# Write cache
|
|
|
|
|
if settings.CACHE:
|
|
|
|
|
to_write_cache = orjson.dumps(response)
|
|
|
|
|
to_write_cache = orjson.dumps(response_parsed)
|
|
|
|
|
r.set(f"query_cache.{user.id}.{hash}", to_write_cache)
|
|
|
|
|
r.expire(f"query_cache.{user.id}.{hash}", settings.CACHE_TIMEOUT)
|
|
|
|
|
|
|
|
|
|
# Parse the response
|
|
|
|
|
response_parsed = self.parse(response)
|
|
|
|
|
|
|
|
|
|
time_took = (time.process_time() - start) * 1000
|
|
|
|
|
# Round to 3 significant figures
|
|
|
|
|
time_took_rounded = round(time_took, 3 - int(floor(log10(abs(time_took)))) - 1)
|
|
|
|
@ -226,9 +310,15 @@ class StorageBackend(object):
|
|
|
|
|
def query_results(self, **kwargs):
|
|
|
|
|
raise NotImplementedError
|
|
|
|
|
|
|
|
|
|
def process_results(self, **kwargs):
|
|
|
|
|
def process_results(self, response, **kwargs):
|
|
|
|
|
if kwargs.get("annotate"):
|
|
|
|
|
annotate_results(kwargs["results"])
|
|
|
|
|
annotate_results(response)
|
|
|
|
|
if kwargs.get("dedup"):
|
|
|
|
|
response = response[::-1]
|
|
|
|
|
if kwargs.get("dedup"):
|
|
|
|
|
if not kwargs.get("dedup_fields"):
|
|
|
|
|
dedup_fields = ["msg", "nick", "ident", "host", "net", "channel"]
|
|
|
|
|
response = helpers.dedup_list(response, dedup_fields)
|
|
|
|
|
|
|
|
|
|
def parse(self, response):
|
|
|
|
|
raise NotImplementedError
|
|
|
|
|