Implement running scheduled rules and check aggregations
This commit is contained in:
@@ -10,6 +10,15 @@ from core.db import StorageBackend, add_defaults
|
||||
# from json import dumps
|
||||
# pp = lambda x: print(dumps(x, indent=2))
|
||||
from core.db.processing import parse_results
|
||||
from core.lib.parsing import (
|
||||
QueryError,
|
||||
parse_date_time,
|
||||
parse_index,
|
||||
parse_sentiment,
|
||||
parse_size,
|
||||
parse_sort,
|
||||
parse_source,
|
||||
)
|
||||
|
||||
|
||||
class ElasticsearchBackend(StorageBackend):
|
||||
@@ -126,14 +135,16 @@ class ElasticsearchBackend(StorageBackend):
|
||||
)
|
||||
return query
|
||||
|
||||
def construct_query(self, query, size, blank=False):
|
||||
def construct_query(self, query, size=None, blank=False):
|
||||
"""
|
||||
Accept some query parameters and construct an Elasticsearch query.
|
||||
"""
|
||||
query_base = {
|
||||
"size": size,
|
||||
# "size": size,
|
||||
"query": {"bool": {"must": []}},
|
||||
}
|
||||
if size:
|
||||
query_base["size"] = size
|
||||
query_string = {
|
||||
"query_string": {
|
||||
"query": query,
|
||||
@@ -163,8 +174,8 @@ class ElasticsearchBackend(StorageBackend):
|
||||
query_base["query"]["bool"]["must"].append(query_string)
|
||||
return query_base
|
||||
|
||||
def parse(self, response):
|
||||
parsed = parse_results(response)
|
||||
def parse(self, response, **kwargs):
|
||||
parsed = parse_results(response, **kwargs)
|
||||
return parsed
|
||||
|
||||
def run_query(self, user, search_query, **kwargs):
|
||||
@@ -186,6 +197,127 @@ class ElasticsearchBackend(StorageBackend):
|
||||
return err
|
||||
return response
|
||||
|
||||
async def async_run_query(self, user, search_query, **kwargs):
|
||||
"""
|
||||
Low level helper to run an ES query.
|
||||
Accept a user to pass it to the filter, so we can
|
||||
avoid filtering for superusers.
|
||||
Accept fields and size, for the fields we want to match and the
|
||||
number of results to return.
|
||||
"""
|
||||
index = kwargs.get("index")
|
||||
try:
|
||||
response = self.client.search(body=search_query, index=index)
|
||||
except RequestError as err:
|
||||
print("Elasticsearch error", err)
|
||||
return err
|
||||
except NotFoundError as err:
|
||||
print("Elasticsearch error", err)
|
||||
return err
|
||||
return response
|
||||
|
||||
async def schedule_query_results(self, rule_object):
|
||||
"""
|
||||
Helper to run a scheduled query with reduced functionality and async.
|
||||
"""
|
||||
|
||||
data = rule_object.parsed
|
||||
|
||||
if "tags" in data:
|
||||
tags = data["tags"]
|
||||
else:
|
||||
tags = []
|
||||
|
||||
if "query" in data:
|
||||
query = data["query"][0]
|
||||
data["query"] = query
|
||||
|
||||
result_map = {}
|
||||
|
||||
add_bool = []
|
||||
add_top = []
|
||||
if "source" in data:
|
||||
total_count = len(data["source"])
|
||||
total_sources = len(settings.MAIN_SOURCES) + len(
|
||||
settings.SOURCES_RESTRICTED
|
||||
)
|
||||
if total_count != total_sources:
|
||||
add_top_tmp = {"bool": {"should": []}}
|
||||
for source_iter in data["source"]:
|
||||
add_top_tmp["bool"]["should"].append(
|
||||
{"match_phrase": {"src": source_iter}}
|
||||
)
|
||||
add_top.append(add_top_tmp)
|
||||
for field, values in data.items():
|
||||
if field not in ["source", "index", "tags", "query", "sentiment"]:
|
||||
for value in values:
|
||||
add_top.append({"match": {field: value}})
|
||||
search_query = self.parse_query(data, tags, None, False, add_bool)
|
||||
self.add_bool(search_query, add_bool)
|
||||
self.add_top(search_query, add_top)
|
||||
if "sentiment" in data:
|
||||
search_query["aggs"] = {
|
||||
"avg_sentiment": {
|
||||
"avg": {"field": "sentiment"},
|
||||
}
|
||||
}
|
||||
for index in data["index"]:
|
||||
|
||||
if "message" in search_query:
|
||||
self.log.error(f"Error parsing query: {search_query['message']}")
|
||||
continue
|
||||
response = await self.async_run_query(
|
||||
rule_object.user,
|
||||
search_query,
|
||||
index=index,
|
||||
)
|
||||
if isinstance(response, Exception):
|
||||
error = response.info["error"]["root_cause"][0]["reason"]
|
||||
self.log.error(f"Error running scheduled search: {error}")
|
||||
raise QueryError(error)
|
||||
if len(response["hits"]["hits"]) == 0:
|
||||
# No results, skip
|
||||
continue
|
||||
aggs, response = self.parse(response, aggs=True)
|
||||
if "message" in response:
|
||||
self.log.error(f"Error running scheduled search: {response['message']}")
|
||||
continue
|
||||
result_map[index] = (aggs, response)
|
||||
|
||||
# Average aggregation check
|
||||
# Could probably do this in elasticsearch
|
||||
for index, (aggs, result) in result_map.items():
|
||||
# Default to true, if no aggs are found, we still want to match
|
||||
match = True
|
||||
for agg_name, (operator, number) in rule_object.aggs.items():
|
||||
if agg_name in aggs:
|
||||
agg_value = aggs[agg_name]["value"]
|
||||
|
||||
# TODO: simplify this, match is default to True
|
||||
if operator == ">":
|
||||
if agg_value > number:
|
||||
match = True
|
||||
else:
|
||||
match = False
|
||||
elif operator == "<":
|
||||
if agg_value < number:
|
||||
match = True
|
||||
else:
|
||||
match = False
|
||||
elif operator == "=":
|
||||
if agg_value == number:
|
||||
match = True
|
||||
else:
|
||||
match = False
|
||||
else:
|
||||
match = False
|
||||
else:
|
||||
# No aggregation found, but it is required
|
||||
match = False
|
||||
result_map[index][0][agg_name]["match"] = match
|
||||
|
||||
return result_map
|
||||
|
||||
def query_results(
|
||||
self,
|
||||
request,
|
||||
@@ -224,12 +356,12 @@ class ElasticsearchBackend(StorageBackend):
|
||||
else:
|
||||
sizes = settings.MAIN_SIZES
|
||||
if not size:
|
||||
size = self.parse_size(query_params, sizes)
|
||||
size = parse_size(query_params, sizes)
|
||||
if isinstance(size, dict):
|
||||
return size
|
||||
|
||||
# I - Index
|
||||
index = self.parse_index(request.user, query_params)
|
||||
index = parse_index(request.user, query_params)
|
||||
if isinstance(index, dict):
|
||||
return index
|
||||
|
||||
@@ -242,7 +374,7 @@ class ElasticsearchBackend(StorageBackend):
|
||||
return search_query
|
||||
|
||||
# S - Sources
|
||||
sources = self.parse_source(request.user, query_params)
|
||||
sources = parse_source(request.user, query_params)
|
||||
if isinstance(sources, dict):
|
||||
return sources
|
||||
total_count = len(sources)
|
||||
@@ -257,7 +389,7 @@ class ElasticsearchBackend(StorageBackend):
|
||||
|
||||
# R - Ranges
|
||||
# date_query = False
|
||||
from_ts, to_ts = self.parse_date_time(query_params)
|
||||
from_ts, to_ts = parse_date_time(query_params)
|
||||
if from_ts:
|
||||
range_query = {
|
||||
"range": {
|
||||
@@ -270,7 +402,7 @@ class ElasticsearchBackend(StorageBackend):
|
||||
add_top.append(range_query)
|
||||
|
||||
# S - Sort
|
||||
sort = self.parse_sort(query_params)
|
||||
sort = parse_sort(query_params)
|
||||
if isinstance(sort, dict):
|
||||
return sort
|
||||
if sort:
|
||||
@@ -286,7 +418,7 @@ class ElasticsearchBackend(StorageBackend):
|
||||
search_query["sort"] = sorting
|
||||
|
||||
# S - Sentiment
|
||||
sentiment_r = self.parse_sentiment(query_params)
|
||||
sentiment_r = parse_sentiment(query_params)
|
||||
if isinstance(sentiment_r, dict):
|
||||
return sentiment_r
|
||||
if sentiment_r:
|
||||
|
||||
Reference in New Issue
Block a user