2022-09-27 14:15:08 +00:00
|
|
|
import logging
|
|
|
|
from datetime import datetime
|
|
|
|
from pprint import pprint
|
2024-12-29 17:37:23 +00:00
|
|
|
import httpx
|
|
|
|
import orjson
|
2022-09-27 14:15:08 +00:00
|
|
|
|
|
|
|
import requests
|
|
|
|
from django.conf import settings
|
|
|
|
|
2023-01-12 07:20:43 +00:00
|
|
|
from core.db import StorageBackend, add_defaults, dedup_list
|
2024-12-29 17:37:23 +00:00
|
|
|
from core.db.processing import parse_results
|
|
|
|
from core.lib.parsing import (
|
|
|
|
QueryError,
|
|
|
|
parse_date_time,
|
|
|
|
parse_index,
|
|
|
|
parse_rule,
|
|
|
|
parse_sentiment,
|
|
|
|
parse_size,
|
|
|
|
parse_sort,
|
|
|
|
parse_source,
|
|
|
|
)
|
2022-09-27 14:15:08 +00:00
|
|
|
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
|
|
|
|
|
|
class ManticoreBackend(StorageBackend):
|
|
|
|
def __init__(self):
|
2023-02-09 19:10:15 +00:00
|
|
|
super().__init__("manticore")
|
2022-09-27 14:15:08 +00:00
|
|
|
|
|
|
|
def initialise(self, **kwargs):
|
|
|
|
"""
|
|
|
|
Initialise the Manticore client
|
|
|
|
"""
|
|
|
|
pass # we use requests
|
|
|
|
|
2024-12-29 17:37:23 +00:00
|
|
|
async def async_initialise(self, **kwargs):
|
|
|
|
"""
|
|
|
|
Initialise the Manticore client in async mode
|
|
|
|
"""
|
|
|
|
pass # we use requests
|
|
|
|
|
|
|
|
def delete_rule_entries(self, rule_id):
|
|
|
|
"""
|
|
|
|
Delete all entries for a given rule.
|
|
|
|
:param rule_id: The rule ID to delete.
|
|
|
|
"""
|
|
|
|
# TODO
|
|
|
|
|
|
|
|
def construct_query(self, query, size=None, blank=False, **kwargs):
|
2022-09-27 14:15:08 +00:00
|
|
|
"""
|
|
|
|
Accept some query parameters and construct an OpenSearch query.
|
|
|
|
"""
|
|
|
|
if not size:
|
|
|
|
size = 5
|
|
|
|
query_base = {
|
2024-12-29 17:37:23 +00:00
|
|
|
"index": kwargs.get("index"),
|
2022-09-27 14:15:08 +00:00
|
|
|
"limit": size,
|
|
|
|
"query": {"bool": {"must": []}},
|
|
|
|
}
|
2024-12-29 17:37:23 +00:00
|
|
|
print("BASE", query_base)
|
2022-09-27 14:15:08 +00:00
|
|
|
query_string = {
|
|
|
|
"query_string": query,
|
|
|
|
}
|
|
|
|
if not blank:
|
|
|
|
query_base["query"]["bool"]["must"].append(query_string)
|
|
|
|
return query_base
|
2024-12-29 17:37:23 +00:00
|
|
|
|
|
|
|
def parse(self, response, **kwargs):
|
|
|
|
parsed = parse_results(response, **kwargs)
|
|
|
|
return parsed
|
2022-09-27 14:15:08 +00:00
|
|
|
|
2024-12-29 17:37:23 +00:00
|
|
|
def run_query(self, user, search_query, **kwargs):
|
|
|
|
"""
|
|
|
|
Low level helper to run Manticore query.
|
|
|
|
"""
|
|
|
|
index = kwargs.get("index")
|
|
|
|
raw = kwargs.get("raw")
|
|
|
|
if search_query and not raw:
|
|
|
|
search_query["index"] = index
|
|
|
|
pprint(search_query)
|
|
|
|
|
|
|
|
|
|
|
|
path = kwargs.get("path", "json/search")
|
|
|
|
if raw:
|
|
|
|
response = requests.post(
|
|
|
|
f"{settings.MANTICORE_URL}/{path}", search_query
|
|
|
|
)
|
|
|
|
else:
|
|
|
|
response = requests.post(
|
|
|
|
f"{settings.MANTICORE_URL}/{path}", json=search_query
|
|
|
|
)
|
|
|
|
|
|
|
|
return orjson.loads(response.text)
|
|
|
|
|
|
|
|
async def async_run_query(self, user, search_query, **kwargs):
|
|
|
|
"""
|
|
|
|
Low level helper to run Manticore query asynchronously.
|
|
|
|
"""
|
|
|
|
index = kwargs.get("index")
|
|
|
|
search_query["index"] = index
|
|
|
|
pprint(search_query)
|
|
|
|
async with httpx.AsyncClient() as client:
|
|
|
|
response = await client.post(
|
|
|
|
f"{settings.MANTICORE_URL}/json/search", json=search_query
|
|
|
|
)
|
|
|
|
return orjson.loads(response.text)
|
|
|
|
|
|
|
|
async def async_store_matches(self, matches):
|
|
|
|
"""
|
|
|
|
Store a list of matches in Manticore.
|
|
|
|
:param index: The index to store the matches in.
|
|
|
|
:param matches: A list of matches to store.
|
|
|
|
"""
|
|
|
|
# TODO
|
|
|
|
|
|
|
|
def store_matches(self, matches):
|
|
|
|
"""
|
|
|
|
Store a list of matches in Manticore.
|
|
|
|
:param index: The index to store the matches in.
|
|
|
|
:param matches: A list of matches to store.
|
|
|
|
"""
|
|
|
|
# TODO
|
|
|
|
|
|
|
|
def prepare_schedule_query(self, rule_object):
|
|
|
|
"""
|
|
|
|
Helper to run a scheduled query with reduced functionality.
|
|
|
|
"""
|
|
|
|
# TODO
|
|
|
|
|
|
|
|
def schedule_query_results_test_sync(self, rule_object):
|
|
|
|
"""
|
|
|
|
Helper to run a scheduled query test with reduced functionality.
|
|
|
|
Sync version for running from Django forms.
|
|
|
|
Does not return results.
|
|
|
|
"""
|
|
|
|
# TODO
|
|
|
|
|
|
|
|
async def schedule_query_results(self, rule_object):
|
|
|
|
"""
|
|
|
|
Helper to run a scheduled query with reduced functionality and async.
|
|
|
|
"""
|
|
|
|
# TODO
|
2022-09-27 14:15:08 +00:00
|
|
|
|
|
|
|
def query_results(
|
|
|
|
self,
|
|
|
|
request,
|
|
|
|
query_params,
|
|
|
|
size=None,
|
|
|
|
annotate=True,
|
|
|
|
custom_query=False,
|
|
|
|
reverse=False,
|
|
|
|
dedup=False,
|
|
|
|
dedup_fields=None,
|
|
|
|
tags=None,
|
|
|
|
):
|
|
|
|
query = None
|
|
|
|
message = None
|
|
|
|
message_class = None
|
|
|
|
add_bool = []
|
|
|
|
add_top = []
|
|
|
|
add_top_negative = []
|
|
|
|
sort = None
|
|
|
|
query_created = False
|
|
|
|
source = None
|
2023-01-12 07:20:43 +00:00
|
|
|
add_defaults(query_params)
|
2024-12-29 17:37:23 +00:00
|
|
|
|
|
|
|
# Now, run the helpers for SIQTSRSS/ADR
|
|
|
|
# S - Size
|
|
|
|
# I - Index
|
|
|
|
# Q - Query
|
|
|
|
# T - Tags
|
|
|
|
# S - Source
|
|
|
|
# R - Ranges
|
|
|
|
# S - Sort
|
|
|
|
# S - Sentiment
|
|
|
|
# A - Annotate
|
|
|
|
# D - Dedup
|
|
|
|
# R - Reverse
|
|
|
|
|
|
|
|
# S - Size
|
2022-09-27 14:15:08 +00:00
|
|
|
if request.user.is_anonymous:
|
2024-12-29 17:37:23 +00:00
|
|
|
sizes = settings.MAIN_SIZES_ANON
|
2022-09-27 14:15:08 +00:00
|
|
|
else:
|
2024-12-29 17:37:23 +00:00
|
|
|
sizes = settings.MAIN_SIZES
|
2022-09-27 14:15:08 +00:00
|
|
|
if not size:
|
2024-12-29 17:37:23 +00:00
|
|
|
size = parse_size(query_params, sizes)
|
|
|
|
if isinstance(size, dict):
|
|
|
|
return size
|
|
|
|
|
|
|
|
rule_object = parse_rule(request.user, query_params)
|
|
|
|
if isinstance(rule_object, dict):
|
|
|
|
return rule_object
|
2022-09-27 14:15:08 +00:00
|
|
|
|
2024-12-29 17:37:23 +00:00
|
|
|
if rule_object is not None:
|
|
|
|
index = settings.INDEX_RULE_STORAGE
|
|
|
|
add_bool.append({"rule_id": str(rule_object.id)})
|
2022-09-27 14:15:08 +00:00
|
|
|
else:
|
2024-12-29 17:37:23 +00:00
|
|
|
# I - Index
|
|
|
|
index = parse_index(request.user, query_params)
|
|
|
|
if isinstance(index, dict):
|
|
|
|
return index
|
2022-09-27 14:15:08 +00:00
|
|
|
|
2024-12-29 17:37:23 +00:00
|
|
|
# Q/T - Query/Tags
|
|
|
|
search_query = self.parse_query(
|
|
|
|
query_params, tags, size, custom_query, add_bool
|
|
|
|
)
|
|
|
|
# Query should be a dict, so check if it contains message here
|
|
|
|
if "message" in search_query:
|
|
|
|
return search_query
|
|
|
|
|
|
|
|
# S - Sources
|
|
|
|
sources = parse_source(request.user, query_params)
|
|
|
|
if isinstance(sources, dict):
|
|
|
|
return sources
|
|
|
|
total_count = len(sources)
|
|
|
|
# Total is -1 due to the "all" source
|
|
|
|
total_sources = (
|
|
|
|
len(settings.MAIN_SOURCES) - 1 + len(settings.SOURCES_RESTRICTED)
|
|
|
|
)
|
|
|
|
|
|
|
|
# If the sources the user has access to are equal to all
|
|
|
|
# possible sources, then we don't need to add the source
|
|
|
|
# filter to the query.
|
|
|
|
if total_count != total_sources:
|
|
|
|
add_top_tmp = {"bool": {"should": []}}
|
|
|
|
for source_iter in sources:
|
|
|
|
add_top_tmp["bool"]["should"].append(
|
|
|
|
{"match_phrase": {"src": source_iter}}
|
|
|
|
)
|
|
|
|
if query_params["source"] != "all":
|
|
|
|
add_top.append(add_top_tmp)
|
|
|
|
|
|
|
|
# R - Ranges
|
|
|
|
# date_query = False
|
|
|
|
from_ts, to_ts = parse_date_time(query_params)
|
|
|
|
if from_ts:
|
2022-09-27 14:15:08 +00:00
|
|
|
range_query = {
|
|
|
|
"range": {
|
|
|
|
"ts": {
|
|
|
|
"gt": from_ts,
|
|
|
|
"lt": to_ts,
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
add_top.append(range_query)
|
|
|
|
|
2024-12-29 17:37:23 +00:00
|
|
|
# S - Sort
|
|
|
|
sort = parse_sort(query_params)
|
|
|
|
if isinstance(sort, dict):
|
|
|
|
return sort
|
|
|
|
|
|
|
|
if rule_object is not None:
|
|
|
|
field = "match_ts"
|
|
|
|
else:
|
|
|
|
field = "ts"
|
|
|
|
if sort:
|
|
|
|
# For Druid compatibility
|
|
|
|
sort_map = {"ascending": "asc", "descending": "desc"}
|
|
|
|
sorting = [
|
|
|
|
{
|
|
|
|
field: {
|
|
|
|
"order": sort_map[sort],
|
2022-09-27 14:15:08 +00:00
|
|
|
}
|
2024-12-29 17:37:23 +00:00
|
|
|
}
|
|
|
|
]
|
|
|
|
search_query["sort"] = sorting
|
|
|
|
|
|
|
|
# S - Sentiment
|
|
|
|
sentiment_r = parse_sentiment(query_params)
|
|
|
|
if isinstance(sentiment_r, dict):
|
|
|
|
return sentiment_r
|
|
|
|
if sentiment_r:
|
|
|
|
if rule_object is not None:
|
|
|
|
sentiment_index = "meta.aggs.avg_sentiment.value"
|
|
|
|
else:
|
|
|
|
sentiment_index = "sentiment"
|
|
|
|
sentiment_method, sentiment = sentiment_r
|
|
|
|
range_query_compare = {"range": {sentiment_index: {}}}
|
2022-09-27 14:15:08 +00:00
|
|
|
range_query_precise = {
|
|
|
|
"match": {
|
2024-12-29 17:37:23 +00:00
|
|
|
sentiment_index: None,
|
2022-09-27 14:15:08 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
if sentiment_method == "below":
|
2024-12-29 17:37:23 +00:00
|
|
|
range_query_compare["range"][sentiment_index]["lt"] = sentiment
|
2022-09-27 14:15:08 +00:00
|
|
|
add_top.append(range_query_compare)
|
|
|
|
elif sentiment_method == "above":
|
2024-12-29 17:37:23 +00:00
|
|
|
range_query_compare["range"][sentiment_index]["gt"] = sentiment
|
2022-09-27 14:15:08 +00:00
|
|
|
add_top.append(range_query_compare)
|
|
|
|
elif sentiment_method == "exact":
|
2024-12-29 17:37:23 +00:00
|
|
|
range_query_precise["match"][sentiment_index] = sentiment
|
2022-09-27 14:15:08 +00:00
|
|
|
add_top.append(range_query_precise)
|
|
|
|
elif sentiment_method == "nonzero":
|
2024-12-29 17:37:23 +00:00
|
|
|
range_query_precise["match"][sentiment_index] = 0
|
2022-09-27 14:15:08 +00:00
|
|
|
add_top_negative.append(range_query_precise)
|
|
|
|
|
2024-12-29 17:37:23 +00:00
|
|
|
# Add in the additional information we already populated
|
|
|
|
self.add_bool(search_query, add_bool)
|
|
|
|
self.add_top(search_query, add_top)
|
|
|
|
self.add_top(search_query, add_top_negative, negative=True)
|
2022-09-27 14:15:08 +00:00
|
|
|
|
2024-12-29 17:37:23 +00:00
|
|
|
response = self.query(
|
|
|
|
request.user,
|
2022-09-27 14:15:08 +00:00
|
|
|
search_query,
|
2024-12-29 17:37:23 +00:00
|
|
|
index=index,
|
2022-09-27 14:15:08 +00:00
|
|
|
)
|
2024-12-29 17:37:23 +00:00
|
|
|
if not response:
|
2022-09-27 14:15:08 +00:00
|
|
|
message = "Error running query"
|
|
|
|
message_class = "danger"
|
|
|
|
return {"message": message, "class": message_class}
|
2024-12-29 17:37:23 +00:00
|
|
|
|
2022-09-27 14:15:08 +00:00
|
|
|
# results = results.to_dict()
|
2024-12-29 17:37:23 +00:00
|
|
|
if "error" in response:
|
|
|
|
message = response["error"]
|
2022-09-27 14:15:08 +00:00
|
|
|
message_class = "danger"
|
|
|
|
return {"message": message, "class": message_class}
|
2024-12-29 17:37:23 +00:00
|
|
|
if "message" in response:
|
|
|
|
return response
|
|
|
|
|
|
|
|
# A/D/R - Annotate/Dedup/Reverse
|
|
|
|
response["object_list"] = self.process_results(
|
|
|
|
response["object_list"],
|
|
|
|
annotate=annotate,
|
|
|
|
dedup=dedup,
|
|
|
|
dedup_fields=dedup_fields,
|
|
|
|
reverse=reverse,
|
|
|
|
)
|
|
|
|
|
|
|
|
context = response
|
2022-09-27 14:15:08 +00:00
|
|
|
return context
|