Implement running scheduled rules and check aggregations

This commit is contained in:
2023-01-15 17:59:12 +00:00
parent 435d9b5571
commit 6bfa0aa73b
15 changed files with 600 additions and 258 deletions

149
core/lib/parsing.py Normal file
View File

@@ -0,0 +1,149 @@
from datetime import datetime
from django.conf import settings
class QueryError(Exception):
pass
def parse_size(query_params, sizes):
if "size" in query_params:
size = query_params["size"]
if size not in sizes:
message = "Size is not permitted"
message_class = "danger"
return {"message": message, "class": message_class}
size = int(size)
else:
size = 15
return size
def parse_index(user, query_params, raise_error=False):
if "index" in query_params:
index = query_params["index"]
if index == "main":
index = settings.INDEX_MAIN
else:
if not user.has_perm(f"core.index_{index}"):
message = f"Not permitted to search by this index: {index}"
if raise_error:
raise QueryError(message)
message_class = "danger"
return {
"message": message,
"class": message_class,
}
if index == "meta":
index = settings.INDEX_META
elif index == "internal":
index = settings.INDEX_INT
elif index == "restricted":
if not user.has_perm("core.restricted_sources"):
message = f"Not permitted to search by this index: {index}"
if raise_error:
raise QueryError(message)
message_class = "danger"
return {
"message": message,
"class": message_class,
}
index = settings.INDEX_RESTRICTED
else:
message = f"Index is not valid: {index}"
if raise_error:
raise QueryError(message)
message_class = "danger"
return {
"message": message,
"class": message_class,
}
else:
index = settings.INDEX_MAIN
return index
def parse_source(user, query_params, raise_error=False):
source = None
if "source" in query_params:
source = query_params["source"]
if source in settings.SOURCES_RESTRICTED:
if not user.has_perm("core.restricted_sources"):
message = f"Access denied: {source}"
if raise_error:
raise QueryError(message)
message_class = "danger"
return {"message": message, "class": message_class}
elif source not in settings.MAIN_SOURCES:
message = f"Invalid source: {source}"
if raise_error:
raise QueryError(message)
message_class = "danger"
return {"message": message, "class": message_class}
if source == "all":
source = None # the next block will populate it
if source:
sources = [source]
else:
sources = list(settings.MAIN_SOURCES)
if user.has_perm("core.restricted_sources"):
for source_iter in settings.SOURCES_RESTRICTED:
sources.append(source_iter)
if "all" in sources:
sources.remove("all")
return sources
def parse_sort(query_params):
sort = None
if "sorting" in query_params:
sorting = query_params["sorting"]
if sorting not in ("asc", "desc", "none"):
message = "Invalid sort"
message_class = "danger"
return {"message": message, "class": message_class}
if sorting == "asc":
sort = "ascending"
elif sorting == "desc":
sort = "descending"
return sort
def parse_date_time(query_params):
if set({"from_date", "to_date", "from_time", "to_time"}).issubset(
query_params.keys()
):
from_ts = f"{query_params['from_date']}T{query_params['from_time']}Z"
to_ts = f"{query_params['to_date']}T{query_params['to_time']}Z"
from_ts = datetime.strptime(from_ts, "%Y-%m-%dT%H:%MZ")
to_ts = datetime.strptime(to_ts, "%Y-%m-%dT%H:%MZ")
return (from_ts, to_ts)
return (None, None)
def parse_sentiment(query_params):
sentiment = None
if "check_sentiment" in query_params:
if "sentiment_method" not in query_params:
message = "No sentiment method"
message_class = "danger"
return {"message": message, "class": message_class}
if "sentiment" in query_params:
sentiment = query_params["sentiment"]
try:
sentiment = float(sentiment)
except ValueError:
message = "Sentiment is not a float"
message_class = "danger"
return {"message": message, "class": message_class}
sentiment_method = query_params["sentiment_method"]
return (sentiment_method, sentiment)

View File

@@ -2,16 +2,16 @@ from yaml import dump, load
from yaml.parser import ParserError
from yaml.scanner import ScannerError
from core.db.storage import db
from core.models import NotificationRule
try:
from yaml import CDumper as Dumper
from yaml import CLoader as Loader
except ImportError:
from yaml import Loader, Dumper
from asgiref.sync import async_to_sync
from core.lib.notify import sendmsg
from core.lib.parsing import parse_index, parse_source
from core.util import logs
log = logs.get_logger("rules")
@@ -46,81 +46,150 @@ def rule_matched(rule, message, matched):
sendmsg(rule.user, notify_message, **cast)
def process_rules(data):
all_rules = NotificationRule.objects.filter(enabled=True)
for index, index_messages in data.items():
for message in index_messages:
for rule in all_rules:
parsed_rule = rule.parse()
matched = {}
if "index" not in parsed_rule:
continue
if "source" not in parsed_rule:
continue
rule_index = parsed_rule["index"]
rule_source = parsed_rule["source"]
if not type(rule_index) == list:
rule_index = [rule_index]
if not type(rule_source) == list:
rule_source = [rule_source]
if index not in rule_index:
continue
if message["src"] not in rule_source:
continue
matched["index"] = index
matched["source"] = message["src"]
rule_field_length = len(parsed_rule.keys())
matched_field_number = 0
for field, value in parsed_rule.items():
if not type(value) == list:
value = [value]
if field == "src":
continue
if field == "tokens":
for token in value:
if "tokens" in message:
if token in message["tokens"]:
matched_field_number += 1
matched[field] = token
# Break out of the token matching loop
break
# Continue to next field
continue
# Allow partial matches for msg
if field == "msg":
for msg in value:
if "msg" in message:
if msg.lower() in message["msg"].lower():
matched_field_number += 1
matched[field] = msg
# Break out of the msg matching loop
break
# Continue to next field
continue
if field in message and message[field] in value:
matched_field_number += 1
matched[field] = message[field]
if matched_field_number == rule_field_length - 2:
rule_matched(rule, message, matched)
class NotificationRuleData(object):
def __init__(self, user, cleaned_data):
def __init__(self, user, cleaned_data, db):
self.user = user
self.object = None
# We are running live
if not isinstance(cleaned_data, dict):
self.object = cleaned_data
cleaned_data = cleaned_data.__dict__
self.cleaned_data = cleaned_data
self.db = db
self.data = self.cleaned_data.get("data")
self.parsed = None
self.aggs = {}
self.validate_user_permissions()
self.parse_data()
self.ensure_list()
self.validate_permissions()
self.validate_schedule_fields()
self.validate_time_fields()
def store_match(self, index, match):
"""
Store a match result.
"""
if self.object.match is None:
self.object.match = {}
if not isinstance(self.object.match, dict):
self.object.match = {}
self.object.match[index] = match
self.object.save()
log.debug(f"Stored match: {index} - {match}")
async def run_schedule(self):
"""
Run the schedule query.
"""
if self.db:
response = await self.db.schedule_query_results(self)
for index, (aggs, results) in response.items():
if not results:
self.store_match(index, False)
aggs_for_index = []
for agg_name in self.aggs.keys():
if agg_name in aggs:
if "match" in aggs[agg_name]:
aggs_for_index.append(aggs[agg_name]["match"])
# All required aggs are present
if len(aggs_for_index) == len(self.aggs.keys()):
if all(aggs_for_index):
self.store_match(index, True)
continue
self.store_match(index, False)
def test_schedule(self):
"""
Test the schedule query to ensure it is valid.
"""
if self.db:
sync_schedule = async_to_sync(self.db.schedule_query_results)
sync_schedule(self)
def validate_schedule_fields(self):
"""
Ensure schedule fields are valid.
index: can be a list, it will schedule one search per index.
source: can be a list, it will be the filter for each search.
tokens: can be list, it will ensure the message matches any token.
msg: can be a list, it will ensure the message contains any msg.
No other fields can be lists containing more than one item.
"""
is_schedule = self.is_schedule
if is_schedule:
allowed_list_fields = ["index", "source", "tokens", "msg"]
for field, value in self.parsed.items():
if field not in allowed_list_fields:
if len(value) > 1:
raise RuleParseError(
(
f"For scheduled rules, field {field} cannot contain "
"more than one item"
),
"data",
)
if len(str(value[0])) == 0:
raise RuleParseError(f"Field {field} cannot be empty", "data")
if "sentiment" in self.parsed:
sentiment = str(self.parsed["sentiment"][0])
sentiment = sentiment.strip()
if sentiment[0] not in [">", "<", "="]:
raise RuleParseError(
(
"Sentiment field must be a comparison operator and then a "
"float: >0.02"
),
"data",
)
operator = sentiment[0]
number = sentiment[1:]
try:
number = float(number)
except ValueError:
raise RuleParseError(
(
"Sentiment field must be a comparison operator and then a "
"float: >0.02"
),
"data",
)
self.aggs["avg_sentiment"] = (operator, number)
else:
if "query" in self.parsed:
raise RuleParseError(
"Field query cannot be used with on-demand rules", "data"
)
if "tags" in self.parsed:
raise RuleParseError(
"Field tags cannot be used with on-demand rules", "data"
)
@property
def is_schedule(self):
if "interval" in self.cleaned_data:
if self.cleaned_data["interval"] != 0:
return True
return False
def ensure_list(self):
"""
Ensure all values are lists.
"""
for field, value in self.parsed.items():
if not isinstance(value, list):
self.parsed[field] = [value]
def validate_user_permissions(self):
"""
Ensure the user can use notification rules.
@@ -161,7 +230,6 @@ class NotificationRuleData(object):
"window",
)
window_seconds = window_number * SECONDS_PER_UNIT[window_unit]
print("Window seconds", window_seconds)
if window_seconds > MAX_WINDOW:
raise RuleParseError(
f"Window cannot be larger than {MAX_WINDOW} seconds (30 days)",
@@ -176,24 +244,24 @@ class NotificationRuleData(object):
index = self.parsed["index"]
if type(index) == list:
for i in index:
db.parse_index(self.user, {"index": i}, raise_error=True)
else:
db.parse_index(self.user, {"index": index}, raise_error=True)
parse_index(self.user, {"index": i}, raise_error=True)
# else:
# db.parse_index(self.user, {"index": index}, raise_error=True)
else:
# Get the default value for the user if not present
index = db.parse_index(self.user, {}, raise_error=True)
index = parse_index(self.user, {}, raise_error=True)
self.parsed["index"] = index
if "source" in self.parsed:
source = self.parsed["source"]
if type(source) == list:
for i in source:
db.parse_source(self.user, {"source": i}, raise_error=True)
else:
db.parse_source(self.user, {"source": source}, raise_error=True)
parse_source(self.user, {"source": i}, raise_error=True)
# else:
# parse_source(self.user, {"source": source}, raise_error=True)
else:
# Get the default value for the user if not present
source = db.parse_source(self.user, {}, raise_error=True)
source = parse_source(self.user, {}, raise_error=True)
self.parsed["source"] = source
def parse_data(self):