neptune/core/db/manticore.py

331 lines
9.8 KiB
Python

import logging
from datetime import datetime
from pprint import pprint
import httpx
import orjson
import requests
from django.conf import settings
from core.db import StorageBackend, add_defaults, dedup_list
from core.db.processing import parse_results
from core.lib.parsing import (
QueryError,
parse_date_time,
parse_index,
parse_rule,
parse_sentiment,
parse_size,
parse_sort,
parse_source,
)
logger = logging.getLogger(__name__)
class ManticoreBackend(StorageBackend):
def __init__(self):
super().__init__("manticore")
def initialise(self, **kwargs):
"""
Initialise the Manticore client
"""
pass # we use requests
async def async_initialise(self, **kwargs):
"""
Initialise the Manticore client in async mode
"""
pass # we use requests
def delete_rule_entries(self, rule_id):
"""
Delete all entries for a given rule.
:param rule_id: The rule ID to delete.
"""
# TODO
def construct_query(self, query, size=None, blank=False, **kwargs):
"""
Accept some query parameters and construct an OpenSearch query.
"""
if not size:
size = 5
query_base = {
"index": kwargs.get("index"),
"limit": size,
"query": {"bool": {"must": []}},
}
print("BASE", query_base)
query_string = {
"query_string": query,
}
if not blank:
query_base["query"]["bool"]["must"].append(query_string)
return query_base
def parse(self, response, **kwargs):
parsed = parse_results(response, **kwargs)
return parsed
def run_query(self, user, search_query, **kwargs):
"""
Low level helper to run Manticore query.
"""
index = kwargs.get("index")
raw = kwargs.get("raw")
if search_query and not raw:
search_query["index"] = index
pprint(search_query)
path = kwargs.get("path", "json/search")
if raw:
response = requests.post(
f"{settings.MANTICORE_URL}/{path}", search_query
)
else:
response = requests.post(
f"{settings.MANTICORE_URL}/{path}", json=search_query
)
return orjson.loads(response.text)
async def async_run_query(self, user, search_query, **kwargs):
"""
Low level helper to run Manticore query asynchronously.
"""
index = kwargs.get("index")
search_query["index"] = index
pprint(search_query)
async with httpx.AsyncClient() as client:
response = await client.post(
f"{settings.MANTICORE_URL}/json/search", json=search_query
)
return orjson.loads(response.text)
async def async_store_matches(self, matches):
"""
Store a list of matches in Manticore.
:param index: The index to store the matches in.
:param matches: A list of matches to store.
"""
# TODO
def store_matches(self, matches):
"""
Store a list of matches in Manticore.
:param index: The index to store the matches in.
:param matches: A list of matches to store.
"""
# TODO
def prepare_schedule_query(self, rule_object):
"""
Helper to run a scheduled query with reduced functionality.
"""
# TODO
def schedule_query_results_test_sync(self, rule_object):
"""
Helper to run a scheduled query test with reduced functionality.
Sync version for running from Django forms.
Does not return results.
"""
# TODO
async def schedule_query_results(self, rule_object):
"""
Helper to run a scheduled query with reduced functionality and async.
"""
# TODO
def query_results(
self,
request,
query_params,
size=None,
annotate=True,
custom_query=False,
reverse=False,
dedup=False,
dedup_fields=None,
tags=None,
):
query = None
message = None
message_class = None
add_bool = []
add_top = []
add_top_negative = []
sort = None
query_created = False
source = None
add_defaults(query_params)
# Now, run the helpers for SIQTSRSS/ADR
# S - Size
# I - Index
# Q - Query
# T - Tags
# S - Source
# R - Ranges
# S - Sort
# S - Sentiment
# A - Annotate
# D - Dedup
# R - Reverse
# S - Size
if request.user.is_anonymous:
sizes = settings.MAIN_SIZES_ANON
else:
sizes = settings.MAIN_SIZES
if not size:
size = parse_size(query_params, sizes)
if isinstance(size, dict):
return size
rule_object = parse_rule(request.user, query_params)
if isinstance(rule_object, dict):
return rule_object
if rule_object is not None:
index = settings.INDEX_RULE_STORAGE
add_bool.append({"rule_id": str(rule_object.id)})
else:
# I - Index
index = parse_index(request.user, query_params)
if isinstance(index, dict):
return index
# Q/T - Query/Tags
search_query = self.parse_query(
query_params, tags, size, custom_query, add_bool
)
# Query should be a dict, so check if it contains message here
if "message" in search_query:
return search_query
# S - Sources
sources = parse_source(request.user, query_params)
if isinstance(sources, dict):
return sources
total_count = len(sources)
# Total is -1 due to the "all" source
total_sources = (
len(settings.MAIN_SOURCES) - 1 + len(settings.SOURCES_RESTRICTED)
)
# If the sources the user has access to are equal to all
# possible sources, then we don't need to add the source
# filter to the query.
if total_count != total_sources:
add_top_tmp = {"bool": {"should": []}}
for source_iter in sources:
add_top_tmp["bool"]["should"].append(
{"match_phrase": {"src": source_iter}}
)
if query_params["source"] != "all":
add_top.append(add_top_tmp)
# R - Ranges
# date_query = False
from_ts, to_ts = parse_date_time(query_params)
if from_ts:
range_query = {
"range": {
"ts": {
"gt": from_ts,
"lt": to_ts,
}
}
}
add_top.append(range_query)
# S - Sort
sort = parse_sort(query_params)
if isinstance(sort, dict):
return sort
if rule_object is not None:
field = "match_ts"
else:
field = "ts"
if sort:
# For Druid compatibility
sort_map = {"ascending": "asc", "descending": "desc"}
sorting = [
{
field: {
"order": sort_map[sort],
}
}
]
search_query["sort"] = sorting
# S - Sentiment
sentiment_r = parse_sentiment(query_params)
if isinstance(sentiment_r, dict):
return sentiment_r
if sentiment_r:
if rule_object is not None:
sentiment_index = "meta.aggs.avg_sentiment.value"
else:
sentiment_index = "sentiment"
sentiment_method, sentiment = sentiment_r
range_query_compare = {"range": {sentiment_index: {}}}
range_query_precise = {
"match": {
sentiment_index: None,
}
}
if sentiment_method == "below":
range_query_compare["range"][sentiment_index]["lt"] = sentiment
add_top.append(range_query_compare)
elif sentiment_method == "above":
range_query_compare["range"][sentiment_index]["gt"] = sentiment
add_top.append(range_query_compare)
elif sentiment_method == "exact":
range_query_precise["match"][sentiment_index] = sentiment
add_top.append(range_query_precise)
elif sentiment_method == "nonzero":
range_query_precise["match"][sentiment_index] = 0
add_top_negative.append(range_query_precise)
# Add in the additional information we already populated
self.add_bool(search_query, add_bool)
self.add_top(search_query, add_top)
self.add_top(search_query, add_top_negative, negative=True)
response = self.query(
request.user,
search_query,
index=index,
)
if not response:
message = "Error running query"
message_class = "danger"
return {"message": message, "class": message_class}
# results = results.to_dict()
if "error" in response:
message = response["error"]
message_class = "danger"
return {"message": message, "class": message_class}
if "message" in response:
return response
# A/D/R - Annotate/Dedup/Reverse
response["object_list"] = self.process_results(
response["object_list"],
annotate=annotate,
dedup=dedup,
dedup_fields=dedup_fields,
reverse=reverse,
)
context = response
return context