You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
neptune/core/lib/rules.py

611 lines
22 KiB
Python

from yaml import dump, load
from yaml.parser import ParserError
from yaml.scanner import ScannerError
try:
from yaml import CDumper as Dumper
from yaml import CLoader as Loader
except ImportError:
from yaml import Loader, Dumper
from datetime import datetime
import orjson
from asgiref.sync import async_to_sync
from siphashc import siphash
from core.lib.notify import sendmsg
from core.lib.parsing import parse_index, parse_source
from core.util import logs
log = logs.get_logger("rules")
SECONDS_PER_UNIT = {"s": 1, "m": 60, "h": 3600, "d": 86400, "w": 604800}
MAX_WINDOW = 2592000
MAX_AMOUNT_NTFY = 10
MAX_AMOUNT_WEBHOOK = 1000
HIGH_FREQUENCY_MIN_SEC = 60
class RuleParseError(Exception):
def __init__(self, message, field):
super().__init__(message)
self.field = field
def format_ntfy(**kwargs):
"""
Format a message for ntfy.
If the message is a list, it will be joined with newlines.
If the message is None, it will be replaced with an empty string.
If specified, `matched` will be pretty-printed in the first line.
kwargs:
rule: The rule object, must be specified
index: The index the rule matched on, can be None
message: The message to send, can be None
matched: The matched fields, can be None
total_hits: The total number of matches, optional
"""
rule = kwargs.get("rule")
index = kwargs.get("index")
message = kwargs.get("message")
matched = kwargs.get("matched")
total_hits = kwargs.get("total_hits", 0)
if message:
# Dump the message in YAML for readability
messages_formatted = ""
if isinstance(message, list):
for message_iter in message:
messages_formatted += dump(
message_iter, Dumper=Dumper, default_flow_style=False
)
messages_formatted += "\n"
else:
messages_formatted = dump(message, Dumper=Dumper, default_flow_style=False)
else:
messages_formatted = ""
if matched:
matched = ", ".join([f"{k}: {v}" for k, v in matched.items()])
else:
matched = ""
notify_message = f"{rule.name} on {index}: {matched}\n{messages_formatted}"
notify_message += f"\nTotal hits: {total_hits}"
notify_message = notify_message.encode("utf-8", "replace")
return notify_message
def format_webhook(**kwargs):
"""
Format a message for a webhook.
Adds some metadata to the message that would normally be only in
notification_settings.
Dumps the message in JSON.
kwargs:
rule: The rule object, must be specified
index: The index the rule matched on, can be None
message: The message to send, can be None, but will be sent as None
matched: The matched fields, can be None, but will be sent as None
total_hits: The total number of matches, optional
notification_settings: The notification settings, must be specified
priority: The priority of the message, optional
topic: The topic of the message, optional
"""
rule = kwargs.get("rule")
index = kwargs.get("index")
message = kwargs.get("message")
matched = kwargs.get("matched")
total_hits = kwargs.get("total_hits", 0)
notification_settings = kwargs.get("notification_settings")
notify_message = {
"rule_id": rule.id,
"rule_name": rule.name,
"match": matched,
"total_hits": total_hits,
"index": index,
"data": message,
}
if "priority" in notification_settings:
notify_message["priority"] = notification_settings["priority"]
if "topic" in notification_settings:
notify_message["topic"] = notification_settings["topic"]
notify_message = orjson.dumps(notify_message)
return notify_message
def rule_notify(rule, index, message, meta=None):
"""
Send a notification for a matching rule.
Gets the notification settings for the rule.
Runs the formatting helpers for the service.
:param rule: The rule object, must be specified
:param index: The index the rule matched on, can be None
:param message: The message to send, can be None
:param meta: dict of metadata, contains `aggs` key for the matched fields
"""
# If there is no message, don't say anything matched
if message:
word = "match"
else:
word = "no match"
title = f"Rule {rule.name} {word} on {index}"
# The user notification settings are merged in with this
notification_settings = rule.get_notification_settings()
if not notification_settings:
# No/invalid notification settings, don't send anything
return
# Create a cast we can reuse for the formatting helpers and sendmsg
cast = {
"title": title,
"user": rule.user,
"rule": rule,
"index": index,
"message": message,
"notification_settings": notification_settings,
}
if meta:
if "matched" in meta:
cast["matched"] = meta["matched"]
if "total_hits" in meta:
cast["total_hits"] = meta["total_hits"]
if rule.service == "ntfy":
cast["msg"] = format_ntfy(**cast)
elif rule.service == "webhook":
cast["msg"] = format_webhook(**cast)
sendmsg(**cast)
class NotificationRuleData(object):
def __init__(self, user, cleaned_data, db):
self.user = user
self.object = None
# We are running live and have been passed a database object
if not isinstance(cleaned_data, dict):
self.object = cleaned_data
cleaned_data = cleaned_data.__dict__
self.cleaned_data = cleaned_data
self.db = db
self.data = self.cleaned_data.get("data")
self.window = self.cleaned_data.get("window")
self.parsed = None
self.aggs = {}
self.validate_user_permissions()
self.parse_data()
self.ensure_list()
self.validate_permissions()
self.validate_schedule_fields()
self.validate_time_fields()
if self.object is not None:
self.populate_matched()
def populate_matched(self):
"""
On first creation, the match field is None. We need to populate it with
a dictionary containing the index names as keys and False as values.
"""
if self.object.match is None:
self.object.match = {}
for index in self.parsed["index"]:
if index not in self.object.match:
self.object.match[index] = False
self.object.save()
def store_match(self, index, match):
"""
Store a match result.
Accepts None for the index to set all indices.
:param index: the index to store the match for, can be None
:param match: the object that matched
"""
if match is not False:
# Dump match to JSON while sorting the keys
match_normalised = orjson.dumps(match, option=orjson.OPT_SORT_KEYS)
match = siphash(self.db.hash_key, match_normalised)
if self.object.match is None:
self.object.match = {}
if not isinstance(self.object.match, dict):
self.object.match = {}
if index is None:
for index_iter in self.parsed["index"]:
self.object.match[index_iter] = match
else:
self.object.match[index] = match
self.object.save()
log.debug(f"Stored match: {index} - {match}")
def get_match(self, index=None, match=None):
"""
Get a match result for an index.
If the index is None, it will return True if any index has a match.
:param index: the index to get the match for, can be None
"""
if self.object.match is None:
self.object.match = {}
self.object.save()
return None
if not isinstance(self.object.match, dict):
return None
if index is None:
# Check if we have any matches on all indices
return any(self.object.match.values())
# Check if it's the same hash
if match is not None:
match_normalised = orjson.dumps(match, option=orjson.OPT_SORT_KEYS)
match = siphash(self.db.hash_key, match_normalised)
hash_matches = self.object.match.get(index) == match
return hash_matches
return self.object.match.get(index)
def format_aggs(self, aggs):
"""
Format aggregations for the query.
We have self.aggs, which contains:
{"avg_sentiment": (">", 0.5)}
and aggs, which contains:
{"avg_sentiment": {"value": 0.6}}
It's matched already, we just need to format it like so:
{"avg_sentiment": "0.06>0.5"}
:param aggs: the aggregations to format
:return: the formatted aggregations
"""
new_aggs = {}
for agg_name, agg in aggs.items():
# Already checked membership below
op, value = self.aggs[agg_name]
new_aggs[agg_name] = f"{agg['value']}{op}{value}"
return
async def ingest_matches(self, index, matches, meta):
"""
Store all matches for an index.
:param index: the index to store the matches for
:param matches: the matches to store
"""
if not isinstance(matches, list):
matches = [matches]
matches_copy = matches.copy()
print("MATHCES COPY: ", matches_copy)
match_ts = datetime.utcnow().isoformat()
for match_index, _ in enumerate(matches_copy):
matches_copy[match_index]["index"] = index
matches_copy[match_index]["rule_uuid"] = self.object.id
matches_copy[match_index]["meta"] = meta
matches_copy[match_index]["match_ts"] = match_ts
await self.db.async_store_matches(matches_copy)
async def rule_matched(self, index, message, meta):
"""
A rule has matched.
If the previous run did not match, send a notification after formatting
the aggregations.
:param index: the index the rule matched on
:param message: the message object that matched
:param aggs: the aggregations that matched
"""
current_match = self.get_match(index, message)
log.debug(f"Rule matched: {index} - current match: {current_match}")
if current_match is False:
# Matched now, but not before
meta["matched"] = self.format_aggs(meta["aggs"])
rule_notify(self.object, index, message, meta)
self.store_match(index, message)
await self.ingest_matches(index, message, meta)
async def rule_no_match(self, index=None):
"""
A rule has not matched.
If the previous run did match, send a notification if configured to notify
for empty matches.
:param index: the index the rule did not match on, can be None
"""
current_match = self.get_match(index)
log.debug(f"Rule not matched: {index} - current match: {current_match}")
if current_match is True:
# Matched before, but not now
if self.object.send_empty:
rule_notify(self.object, index, "no_match", None)
self.store_match(index, False)
async def run_schedule(self):
"""
Run the schedule query.
Get the results from the database, and check if the rule has matched.
Check if all of the required aggregations have matched.
"""
response = await self.db.schedule_query_results(self)
if not response:
# No results in the result_map
await self.rule_no_match()
for index, (meta, results) in response.items():
if not results:
# Falsy results, no matches
await self.rule_no_match(index)
# Add the match values of all aggregations to a list
aggs_for_index = []
for agg_name in self.aggs.keys():
if agg_name in meta["aggs"]:
if "match" in meta["aggs"][agg_name]:
aggs_for_index.append(meta["aggs"][agg_name]["match"])
# All required aggs are present
if len(aggs_for_index) == len(self.aggs.keys()):
if all(aggs_for_index):
# All aggs have matched
await self.rule_matched(index, results[: self.object.amount], meta)
continue
# Default branch, since the happy path has a continue keyword
await self.rule_no_match(index)
def test_schedule(self):
"""
Test the schedule query to ensure it is valid.
Run the query with the async_to_sync helper so we can call it from
a form.
Raises an exception if the query is invalid.
"""
if self.db:
sync_schedule = async_to_sync(self.db.schedule_query_results)
sync_schedule(self)
def validate_schedule_fields(self):
"""
Ensure schedule fields are valid.
index: can be a list, it will schedule one search per index.
source: can be a list, it will be the filter for each search.
tokens: can be list, it will ensure the message matches any token.
msg: can be a list, it will ensure the message contains any msg.
No other fields can be lists containing more than one item.
:raises RuleParseError: if the fields are invalid
"""
is_schedule = self.is_schedule
if is_schedule:
allowed_list_fields = ["index", "source", "tokens", "msg"]
for field, value in self.parsed.items():
if field not in allowed_list_fields:
if len(value) > 1:
raise RuleParseError(
(
f"For scheduled rules, field {field} cannot contain "
"more than one item"
),
"data",
)
if len(str(value[0])) == 0:
raise RuleParseError(f"Field {field} cannot be empty", "data")
if "sentiment" in self.parsed:
sentiment = str(self.parsed["sentiment"][0])
sentiment = sentiment.strip()
if sentiment[0] not in [">", "<", "="]:
raise RuleParseError(
(
"Sentiment field must be a comparison operator and then a "
"float: >0.02"
),
"data",
)
operator = sentiment[0]
number = sentiment[1:]
try:
number = float(number)
except ValueError:
raise RuleParseError(
(
"Sentiment field must be a comparison operator and then a "
"float: >0.02"
),
"data",
)
self.aggs["avg_sentiment"] = (operator, number)
else:
if "query" in self.parsed:
raise RuleParseError(
"Field query cannot be used with on-demand rules", "data"
)
if "tags" in self.parsed:
raise RuleParseError(
"Field tags cannot be used with on-demand rules", "data"
)
@property
def is_schedule(self):
"""
Check if the rule is a schedule rule.
:return: True if the rule is a schedule rule, False otherwise
"""
if "interval" in self.cleaned_data:
if self.cleaned_data["interval"] != 0:
return True
return False
def ensure_list(self):
"""
Ensure all values in the data field are lists.
Convert all strings to lists with one item.
"""
for field, value in self.parsed.items():
if not isinstance(value, list):
self.parsed[field] = [value]
def validate_user_permissions(self):
"""
Ensure the user can use notification rules.
:raises RuleParseError: if the user does not have permission
"""
if not self.user.has_perm("core.use_rules"):
raise RuleParseError("User does not have permission to use rules", "data")
def validate_time_fields(self):
"""
Validate the interval and window fields.
Prohibit window being specified with an ondemand interval.
Prohibit window not being specified with a non-ondemand interval.
Prohibit amount being specified with an on-demand interval.
Prohibut amount not being specified with a non-ondemand interval.
Validate window field.
Validate window unit and enforce maximum.
:raises RuleParseError: if the fields are invalid
"""
interval = self.cleaned_data.get("interval")
window = self.cleaned_data.get("window")
amount = self.cleaned_data.get("amount")
service = self.cleaned_data.get("service")
on_demand = interval == 0
# Not on demand and interval is too low
if not on_demand and interval <= HIGH_FREQUENCY_MIN_SEC:
if not self.user.has_perm("core.rules_high_frequency"):
raise RuleParseError(
"User does not have permission to use high frequency rules", "data"
)
if not on_demand:
if not self.user.has_perm("core.rules_scheduled"):
raise RuleParseError(
"User does not have permission to use scheduled rules", "data"
)
if on_demand and window is not None:
# Interval is on demand and window is specified
# We can't have a window with on-demand rules
raise RuleParseError(
"Window cannot be specified with on-demand interval", "window"
)
if not on_demand and window is None:
# Interval is not on demand and window is not specified
# We can't have a non-on-demand interval without a window
raise RuleParseError(
"Window must be specified with non-on-demand interval", "window"
)
if not on_demand and amount is None:
# Interval is not on demand and amount is not specified
# We can't have a non-on-demand interval without an amount
raise RuleParseError(
"Amount must be specified with non-on-demand interval", "amount"
)
if on_demand and amount is not None:
# Interval is on demand and amount is specified
# We can't have an amount with on-demand rules
raise RuleParseError(
"Amount cannot be specified with on-demand interval", "amount"
)
if window is not None:
window_number = window[:-1]
if not window_number.isdigit():
raise RuleParseError("Window prefix must be a number", "window")
window_number = int(window_number)
window_unit = window[-1]
if window_unit not in SECONDS_PER_UNIT:
raise RuleParseError(
(
"Window unit must be one of "
f"{', '.join(SECONDS_PER_UNIT.keys())},"
f" not '{window_unit}'"
),
"window",
)
window_seconds = window_number * SECONDS_PER_UNIT[window_unit]
if window_seconds > MAX_WINDOW:
raise RuleParseError(
f"Window cannot be larger than {MAX_WINDOW} seconds (30 days)",
"window",
)
if amount is not None:
if service == "ntfy":
if amount > MAX_AMOUNT_NTFY:
raise RuleParseError(
f"Amount cannot be larger than {MAX_AMOUNT_NTFY} for ntfy",
"amount",
)
else:
if amount > MAX_AMOUNT_WEBHOOK:
raise RuleParseError(
(
f"Amount cannot be larger than {MAX_AMOUNT_WEBHOOK} for "
f"{service}"
),
"amount",
)
def validate_permissions(self):
"""
Validate permissions for the source and index variables.
Also set the default values for the user if not present.
Stores the default or expanded values in the parsed field.
:raises QueryError: if the user does not have permission to use the source
"""
if "index" in self.parsed:
index = self.parsed["index"]
if type(index) == list:
for i in index:
parse_index(self.user, {"index": i}, raise_error=True)
# else:
# db.parse_index(self.user, {"index": index}, raise_error=True)
else:
# Get the default value for the user if not present
index = parse_index(self.user, {}, raise_error=True)
self.parsed["index"] = [index]
if "source" in self.parsed:
source = self.parsed["source"]
if type(source) == list:
for i in source:
parse_source(self.user, {"source": i}, raise_error=True)
# else:
# parse_source(self.user, {"source": source}, raise_error=True)
else:
# Get the default value for the user if not present
source = parse_source(self.user, {}, raise_error=True)
self.parsed["source"] = source
def parse_data(self):
"""
Parse the data in the text field to YAML.
:raises RuleParseError: if the data is invalid
"""
try:
self.parsed = load(self.data, Loader=Loader)
except (ScannerError, ParserError) as e:
raise RuleParseError(f"Invalid YAML: {e}", "data")
def __str__(self):
"""
Get a YAML representation of the data field of the rule.
"""
return dump(self.parsed, Dumper=Dumper)
def get_data(self):
"""
Return the data field as a dictionary.
"""
return self.parsed