import logging from datetime import datetime from pprint import pprint import httpx import orjson import requests from django.conf import settings from core.db import StorageBackend, add_defaults, dedup_list from core.db.processing import parse_results from core.lib.parsing import ( QueryError, parse_date_time, parse_index, parse_rule, parse_sentiment, parse_size, parse_sort, parse_source, ) logger = logging.getLogger(__name__) class ManticoreBackend(StorageBackend): def __init__(self): super().__init__("manticore") def initialise(self, **kwargs): """ Initialise the Manticore client """ pass # we use requests async def async_initialise(self, **kwargs): """ Initialise the Manticore client in async mode """ pass # we use requests def delete_rule_entries(self, rule_id): """ Delete all entries for a given rule. :param rule_id: The rule ID to delete. """ # TODO def construct_query(self, query, size=None, blank=False, **kwargs): """ Accept some query parameters and construct an OpenSearch query. """ if not size: size = 5 query_base = { "index": kwargs.get("index"), "limit": size, "query": {"bool": {"must": []}}, } print("BASE", query_base) query_string = { "query_string": query, } if not blank: query_base["query"]["bool"]["must"].append(query_string) return query_base def parse(self, response, **kwargs): parsed = parse_results(response, **kwargs) return parsed def run_query(self, user, search_query, **kwargs): """ Low level helper to run Manticore query. """ index = kwargs.get("index") raw = kwargs.get("raw") if search_query and not raw: search_query["index"] = index pprint(search_query) path = kwargs.get("path", "json/search") if raw: response = requests.post( f"{settings.MANTICORE_URL}/{path}", search_query ) else: response = requests.post( f"{settings.MANTICORE_URL}/{path}", json=search_query ) return orjson.loads(response.text) async def async_run_query(self, user, search_query, **kwargs): """ Low level helper to run Manticore query asynchronously. """ index = kwargs.get("index") search_query["index"] = index pprint(search_query) async with httpx.AsyncClient() as client: response = await client.post( f"{settings.MANTICORE_URL}/json/search", json=search_query ) return orjson.loads(response.text) async def async_store_matches(self, matches): """ Store a list of matches in Manticore. :param index: The index to store the matches in. :param matches: A list of matches to store. """ # TODO def store_matches(self, matches): """ Store a list of matches in Manticore. :param index: The index to store the matches in. :param matches: A list of matches to store. """ # TODO def prepare_schedule_query(self, rule_object): """ Helper to run a scheduled query with reduced functionality. """ # TODO def schedule_query_results_test_sync(self, rule_object): """ Helper to run a scheduled query test with reduced functionality. Sync version for running from Django forms. Does not return results. """ # TODO async def schedule_query_results(self, rule_object): """ Helper to run a scheduled query with reduced functionality and async. """ # TODO def query_results( self, request, query_params, size=None, annotate=True, custom_query=False, reverse=False, dedup=False, dedup_fields=None, tags=None, ): query = None message = None message_class = None add_bool = [] add_top = [] add_top_negative = [] sort = None query_created = False source = None add_defaults(query_params) # Now, run the helpers for SIQTSRSS/ADR # S - Size # I - Index # Q - Query # T - Tags # S - Source # R - Ranges # S - Sort # S - Sentiment # A - Annotate # D - Dedup # R - Reverse # S - Size if request.user.is_anonymous: sizes = settings.MAIN_SIZES_ANON else: sizes = settings.MAIN_SIZES if not size: size = parse_size(query_params, sizes) if isinstance(size, dict): return size rule_object = parse_rule(request.user, query_params) if isinstance(rule_object, dict): return rule_object if rule_object is not None: index = settings.INDEX_RULE_STORAGE add_bool.append({"rule_id": str(rule_object.id)}) else: # I - Index index = parse_index(request.user, query_params) if isinstance(index, dict): return index # Q/T - Query/Tags search_query = self.parse_query( query_params, tags, size, custom_query, add_bool ) # Query should be a dict, so check if it contains message here if "message" in search_query: return search_query # S - Sources sources = parse_source(request.user, query_params) if isinstance(sources, dict): return sources total_count = len(sources) # Total is -1 due to the "all" source total_sources = ( len(settings.MAIN_SOURCES) - 1 + len(settings.SOURCES_RESTRICTED) ) # If the sources the user has access to are equal to all # possible sources, then we don't need to add the source # filter to the query. if total_count != total_sources: add_top_tmp = {"bool": {"should": []}} for source_iter in sources: add_top_tmp["bool"]["should"].append( {"match_phrase": {"src": source_iter}} ) if query_params["source"] != "all": add_top.append(add_top_tmp) # R - Ranges # date_query = False from_ts, to_ts = parse_date_time(query_params) if from_ts: range_query = { "range": { "ts": { "gt": from_ts, "lt": to_ts, } } } add_top.append(range_query) # S - Sort sort = parse_sort(query_params) if isinstance(sort, dict): return sort if rule_object is not None: field = "match_ts" else: field = "ts" if sort: # For Druid compatibility sort_map = {"ascending": "asc", "descending": "desc"} sorting = [ { field: { "order": sort_map[sort], } } ] search_query["sort"] = sorting # S - Sentiment sentiment_r = parse_sentiment(query_params) if isinstance(sentiment_r, dict): return sentiment_r if sentiment_r: if rule_object is not None: sentiment_index = "meta.aggs.avg_sentiment.value" else: sentiment_index = "sentiment" sentiment_method, sentiment = sentiment_r range_query_compare = {"range": {sentiment_index: {}}} range_query_precise = { "match": { sentiment_index: None, } } if sentiment_method == "below": range_query_compare["range"][sentiment_index]["lt"] = sentiment add_top.append(range_query_compare) elif sentiment_method == "above": range_query_compare["range"][sentiment_index]["gt"] = sentiment add_top.append(range_query_compare) elif sentiment_method == "exact": range_query_precise["match"][sentiment_index] = sentiment add_top.append(range_query_precise) elif sentiment_method == "nonzero": range_query_precise["match"][sentiment_index] = 0 add_top_negative.append(range_query_precise) # Add in the additional information we already populated self.add_bool(search_query, add_bool) self.add_top(search_query, add_top) self.add_top(search_query, add_top_negative, negative=True) response = self.query( request.user, search_query, index=index, ) if not response: message = "Error running query" message_class = "danger" return {"message": message, "class": message_class} # results = results.to_dict() if "error" in response: message = response["error"] message_class = "danger" return {"message": message, "class": message_class} if "message" in response: return response # A/D/R - Annotate/Dedup/Reverse response["object_list"] = self.process_results( response["object_list"], annotate=annotate, dedup=dedup, dedup_fields=dedup_fields, reverse=reverse, ) context = response return context