neptune/core/lib/rules.py

443 lines
15 KiB
Python

from yaml import dump, load
from yaml.parser import ParserError
from yaml.scanner import ScannerError
try:
from yaml import CDumper as Dumper
from yaml import CLoader as Loader
except ImportError:
from yaml import Loader, Dumper
import orjson
from asgiref.sync import async_to_sync
from core.lib.notify import sendmsg
from core.lib.parsing import parse_index, parse_source
from core.util import logs
log = logs.get_logger("rules")
SECONDS_PER_UNIT = {"s": 1, "m": 60, "h": 3600, "d": 86400, "w": 604800}
MAX_WINDOW = 2592000
class RuleParseError(Exception):
def __init__(self, message, field):
super().__init__(message)
self.field = field
def format_ntfy(**kwargs):
"""
Format a message for ntfy.
"""
rule = kwargs.get("rule")
index = kwargs.get("index")
message = kwargs.get("message")
matched = kwargs.get("matched")
if message:
# Dump the message in YAML for readability
messages_formatted = ""
if isinstance(message, list):
for message in message:
messages_formatted += dump(
message, Dumper=Dumper, default_flow_style=False
)
messages_formatted += "\n"
else:
messages_formatted = dump(message, Dumper=Dumper, default_flow_style=False)
else:
messages_formatted = ""
if matched:
matched = ", ".join([f"{k}: {v}" for k, v in matched.items()])
else:
matched = ""
notify_message = f"{rule.name} on {index}: {matched}\n{messages_formatted}"
notify_message = notify_message.encode("utf-8", "replace")
return notify_message
def format_webhook(**kwargs):
"""
Format a message for a webhook.
"""
rule = kwargs.get("rule")
index = kwargs.get("index")
message = kwargs.get("message")
matched = kwargs.get("matched")
notification_settings = kwargs.get("notification_settings")
notify_message = {
"rule_id": rule.id,
"rule_name": rule.name,
"match": matched,
"index": index,
"data": message,
}
if "priority" in notification_settings:
notify_message["priority"] = notification_settings["priority"]
if "topic" in notification_settings:
notify_message["topic"] = notification_settings["topic"]
notify_message = orjson.dumps(notify_message)
return notify_message
def rule_notify(rule, index, message, matched):
if message:
word = "match"
else:
word = "no match"
title = f"Rule {rule.name} {word} on {index}"
notification_settings = rule.get_notification_settings()
if not notification_settings:
return
cast = {
"title": title,
"user": rule.user,
"rule": rule,
"index": index,
"message": message,
"matched": matched,
"notification_settings": notification_settings,
}
if rule.service == "ntfy":
cast["msg"] = format_ntfy(**cast)
elif rule.service == "webhook":
cast["msg"] = format_webhook(**cast)
sendmsg(**cast)
class NotificationRuleData(object):
def __init__(self, user, cleaned_data, db):
self.user = user
self.object = None
# We are running live
if not isinstance(cleaned_data, dict):
self.object = cleaned_data
cleaned_data = cleaned_data.__dict__
self.cleaned_data = cleaned_data
self.db = db
self.data = self.cleaned_data.get("data")
self.window = self.cleaned_data.get("window")
self.parsed = None
self.aggs = {}
self.validate_user_permissions()
self.parse_data()
self.ensure_list()
self.validate_permissions()
self.validate_schedule_fields()
self.validate_time_fields()
if self.object is not None:
self.populate_matched()
def populate_matched(self):
if self.object.match is None:
self.object.match = {}
for index in self.parsed["index"]:
if index not in self.object.match:
self.object.match[index] = False
self.object.save()
def store_match(self, index, match):
"""
Store a match result.
"""
if self.object.match is None:
self.object.match = {}
if not isinstance(self.object.match, dict):
self.object.match = {}
if index is None:
for index_iter in self.parsed["index"]:
self.object.match[index_iter] = match
else:
self.object.match[index] = match
self.object.save()
log.debug(f"Stored match: {index} - {match}")
def get_match(self, index=None):
"""
Get a match result for an index.
"""
if self.object.match is None:
self.object.match = {}
self.object.save()
return None
if not isinstance(self.object.match, dict):
return None
if index is None:
# Check if we have any matches on all indices
return any(self.object.match.values())
return self.object.match.get(index)
def format_aggs(self, aggs):
"""
Format aggregations for the query.
We have self.aggs, which contains:
{"avg_sentiment": (">", 0.5)}
and aggs, which contains:
{"avg_sentiment": {"value": 0.6}}
It's matched already, we just need to format it like so:
{"avg_sentiment": "0.06>0.5"}
"""
new_aggs = {}
for agg_name, agg in aggs.items():
# Already checked membership below
op, value = self.aggs[agg_name]
new_aggs[agg_name] = f"{agg['value']}{op}{value}"
return new_aggs
def rule_matched(self, index, message, aggs):
"""
A rule has matched.
"""
current_match = self.get_match(index)
if current_match is False:
# Matched now, but not before
formatted_aggs = self.format_aggs(aggs)
rule_notify(self.object, index, message, formatted_aggs)
self.store_match(index, True)
def rule_no_match(self, index=None):
"""
A rule has not matched.
"""
current_match = self.get_match(index)
if current_match is True:
# Matched before, but not now
if self.object.send_empty:
rule_notify(self.object, index, "no_match", None)
self.store_match(index, False)
async def run_schedule(self):
"""
Run the schedule query.
"""
response = await self.db.schedule_query_results(self)
if not response:
self.rule_no_match()
for index, (aggs, results) in response.items():
if not results:
self.rule_not_matched(index)
aggs_for_index = []
for agg_name in self.aggs.keys():
if agg_name in aggs:
if "match" in aggs[agg_name]:
aggs_for_index.append(aggs[agg_name]["match"])
# All required aggs are present
if len(aggs_for_index) == len(self.aggs.keys()):
if all(aggs_for_index):
# Ensure we only send notifications when the previous run
# did not return any matches
self.rule_matched(index, results[: self.object.amount], aggs)
continue
self.rule_not_matched(index)
def test_schedule(self):
"""
Test the schedule query to ensure it is valid.
"""
if self.db:
sync_schedule = async_to_sync(self.db.schedule_query_results)
sync_schedule(self)
def validate_schedule_fields(self):
"""
Ensure schedule fields are valid.
index: can be a list, it will schedule one search per index.
source: can be a list, it will be the filter for each search.
tokens: can be list, it will ensure the message matches any token.
msg: can be a list, it will ensure the message contains any msg.
No other fields can be lists containing more than one item.
"""
is_schedule = self.is_schedule
if is_schedule:
allowed_list_fields = ["index", "source", "tokens", "msg"]
for field, value in self.parsed.items():
if field not in allowed_list_fields:
if len(value) > 1:
raise RuleParseError(
(
f"For scheduled rules, field {field} cannot contain "
"more than one item"
),
"data",
)
if len(str(value[0])) == 0:
raise RuleParseError(f"Field {field} cannot be empty", "data")
if "sentiment" in self.parsed:
sentiment = str(self.parsed["sentiment"][0])
sentiment = sentiment.strip()
if sentiment[0] not in [">", "<", "="]:
raise RuleParseError(
(
"Sentiment field must be a comparison operator and then a "
"float: >0.02"
),
"data",
)
operator = sentiment[0]
number = sentiment[1:]
try:
number = float(number)
except ValueError:
raise RuleParseError(
(
"Sentiment field must be a comparison operator and then a "
"float: >0.02"
),
"data",
)
self.aggs["avg_sentiment"] = (operator, number)
else:
if "query" in self.parsed:
raise RuleParseError(
"Field query cannot be used with on-demand rules", "data"
)
if "tags" in self.parsed:
raise RuleParseError(
"Field tags cannot be used with on-demand rules", "data"
)
@property
def is_schedule(self):
if "interval" in self.cleaned_data:
if self.cleaned_data["interval"] != 0:
return True
return False
def ensure_list(self):
"""
Ensure all values are lists.
"""
for field, value in self.parsed.items():
if not isinstance(value, list):
self.parsed[field] = [value]
def validate_user_permissions(self):
"""
Ensure the user can use notification rules.
"""
if not self.user.has_perm("core.use_rules"):
raise RuleParseError("User does not have permission to use rules", "data")
def validate_time_fields(self):
"""
Validate the interval and window fields.
Prohibit window being specified with an ondemand interval.
"""
interval = self.cleaned_data.get("interval")
window = self.cleaned_data.get("window")
amount = self.cleaned_data.get("amount")
on_demand = interval == 0
if on_demand and window is not None:
# Interval is on demand and window is specified
# We can't have a window with on-demand rules
raise RuleParseError(
"Window cannot be specified with on-demand interval", "window"
)
if not on_demand and window is None:
# Interval is not on demand and window is not specified
# We can't have a non-on-demand interval without a window
raise RuleParseError(
"Window must be specified with non-on-demand interval", "window"
)
if not on_demand and amount is None:
# Interval is not on demand and amount is not specified
# We can't have a non-on-demand interval without an amount
raise RuleParseError(
"Amount must be specified with non-on-demand interval", "amount"
)
if on_demand and amount is not None:
# Interval is on demand and amount is specified
# We can't have an amount with on-demand rules
raise RuleParseError(
"Amount cannot be specified with on-demand interval", "amount"
)
if window is not None:
window_number = window[:-1]
if not window_number.isdigit():
raise RuleParseError("Window prefix must be a number", "window")
window_number = int(window_number)
window_unit = window[-1]
if window_unit not in SECONDS_PER_UNIT:
raise RuleParseError(
(
"Window unit must be one of "
f"{', '.join(SECONDS_PER_UNIT.keys())},"
f" not '{window_unit}'"
),
"window",
)
window_seconds = window_number * SECONDS_PER_UNIT[window_unit]
if window_seconds > MAX_WINDOW:
raise RuleParseError(
f"Window cannot be larger than {MAX_WINDOW} seconds (30 days)",
"window",
)
def validate_permissions(self):
"""
Validate permissions for the source and index variables.
"""
if "index" in self.parsed:
index = self.parsed["index"]
if type(index) == list:
for i in index:
parse_index(self.user, {"index": i}, raise_error=True)
# else:
# db.parse_index(self.user, {"index": index}, raise_error=True)
else:
# Get the default value for the user if not present
index = parse_index(self.user, {}, raise_error=True)
self.parsed["index"] = [index]
if "source" in self.parsed:
source = self.parsed["source"]
if type(source) == list:
for i in source:
parse_source(self.user, {"source": i}, raise_error=True)
# else:
# parse_source(self.user, {"source": source}, raise_error=True)
else:
# Get the default value for the user if not present
source = parse_source(self.user, {}, raise_error=True)
self.parsed["source"] = [source]
def parse_data(self):
"""
Parse the data in the text field to YAML.
"""
try:
self.parsed = load(self.data, Loader=Loader)
except (ScannerError, ParserError) as e:
raise RuleParseError("data", f"Invalid YAML: {e}")
def __str__(self):
return dump(self.parsed, Dumper=Dumper)
def get_data(self):
return self.parsed