646 lines
24 KiB
Python
646 lines
24 KiB
Python
from yaml import dump, load
|
|
from yaml.parser import ParserError
|
|
from yaml.scanner import ScannerError
|
|
|
|
try:
|
|
from yaml import CDumper as Dumper
|
|
from yaml import CLoader as Loader
|
|
except ImportError:
|
|
from yaml import Loader, Dumper
|
|
|
|
from datetime import datetime
|
|
|
|
import orjson
|
|
from asgiref.sync import async_to_sync
|
|
from siphashc import siphash
|
|
|
|
from core.lib.notify import sendmsg
|
|
from core.lib.parsing import parse_index, parse_source
|
|
from core.util import logs
|
|
|
|
log = logs.get_logger("rules")
|
|
|
|
SECONDS_PER_UNIT = {"s": 1, "m": 60, "h": 3600, "d": 86400, "w": 604800}
|
|
|
|
MAX_WINDOW = 2592000
|
|
MAX_AMOUNT_NTFY = 10
|
|
MAX_AMOUNT_WEBHOOK = 1000
|
|
HIGH_FREQUENCY_MIN_SEC = 60
|
|
|
|
|
|
class RuleParseError(Exception):
|
|
def __init__(self, message, field):
|
|
super().__init__(message)
|
|
self.field = field
|
|
|
|
|
|
def format_ntfy(**kwargs):
|
|
"""
|
|
Format a message for ntfy.
|
|
If the message is a list, it will be joined with newlines.
|
|
If the message is None, it will be replaced with an empty string.
|
|
If specified, `matched` will be pretty-printed in the first line.
|
|
kwargs:
|
|
rule: The rule object, must be specified
|
|
index: The index the rule matched on, can be None
|
|
message: The message to send, can be None
|
|
matched: The matched fields, can be None
|
|
total_hits: The total number of matches, optional
|
|
"""
|
|
rule = kwargs.get("rule")
|
|
index = kwargs.get("index")
|
|
message = kwargs.get("message")
|
|
matched = kwargs.get("matched")
|
|
total_hits = kwargs.get("total_hits", 0)
|
|
if message:
|
|
# Dump the message in YAML for readability
|
|
messages_formatted = ""
|
|
if isinstance(message, list):
|
|
for message_iter in message:
|
|
messages_formatted += dump(
|
|
message_iter, Dumper=Dumper, default_flow_style=False
|
|
)
|
|
messages_formatted += "\n"
|
|
else:
|
|
messages_formatted = dump(message, Dumper=Dumper, default_flow_style=False)
|
|
else:
|
|
messages_formatted = ""
|
|
|
|
if matched:
|
|
matched = ", ".join([f"{k}: {v}" for k, v in matched.items()])
|
|
else:
|
|
matched = ""
|
|
|
|
notify_message = f"{rule.name} on {index}: {matched}\n{messages_formatted}"
|
|
notify_message += f"\nTotal hits: {total_hits}"
|
|
notify_message = notify_message.encode("utf-8", "replace")
|
|
|
|
return notify_message
|
|
|
|
|
|
def format_webhook(**kwargs):
|
|
"""
|
|
Format a message for a webhook.
|
|
Adds some metadata to the message that would normally be only in
|
|
notification_settings.
|
|
Dumps the message in JSON.
|
|
kwargs:
|
|
rule: The rule object, must be specified
|
|
index: The index the rule matched on, can be None
|
|
message: The message to send, can be None, but will be sent as None
|
|
matched: The matched fields, can be None, but will be sent as None
|
|
total_hits: The total number of matches, optional
|
|
notification_settings: The notification settings, must be specified
|
|
priority: The priority of the message, optional
|
|
topic: The topic of the message, optional
|
|
"""
|
|
rule = kwargs.get("rule")
|
|
index = kwargs.get("index")
|
|
message = kwargs.get("message")
|
|
matched = kwargs.get("matched")
|
|
total_hits = kwargs.get("total_hits", 0)
|
|
notification_settings = kwargs.get("notification_settings")
|
|
notify_message = {
|
|
"rule_id": rule.id,
|
|
"rule_name": rule.name,
|
|
"match": matched,
|
|
"total_hits": total_hits,
|
|
"index": index,
|
|
"data": message,
|
|
}
|
|
if "priority" in notification_settings:
|
|
notify_message["priority"] = notification_settings["priority"]
|
|
if "topic" in notification_settings:
|
|
notify_message["topic"] = notification_settings["topic"]
|
|
notify_message = orjson.dumps(notify_message)
|
|
|
|
return notify_message
|
|
|
|
|
|
def rule_notify(rule, index, message, meta=None):
|
|
"""
|
|
Send a notification for a matching rule.
|
|
Gets the notification settings for the rule.
|
|
Runs the formatting helpers for the service.
|
|
:param rule: The rule object, must be specified
|
|
:param index: The index the rule matched on, can be None
|
|
:param message: The message to send, can be None
|
|
:param meta: dict of metadata, contains `aggs` key for the matched fields
|
|
"""
|
|
# If there is no message, don't say anything matched
|
|
if message:
|
|
word = "match"
|
|
else:
|
|
word = "no match"
|
|
|
|
title = f"Rule {rule.name} {word} on {index}"
|
|
|
|
# The user notification settings are merged in with this
|
|
notification_settings = rule.get_notification_settings()
|
|
if not notification_settings:
|
|
# No/invalid notification settings, don't send anything
|
|
return
|
|
|
|
# Create a cast we can reuse for the formatting helpers and sendmsg
|
|
cast = {
|
|
"title": title,
|
|
"user": rule.user,
|
|
"rule": rule,
|
|
"index": index,
|
|
"message": message,
|
|
"notification_settings": notification_settings,
|
|
}
|
|
if meta:
|
|
if "matched" in meta:
|
|
cast["matched"] = meta["matched"]
|
|
if "total_hits" in meta:
|
|
cast["total_hits"] = meta["total_hits"]
|
|
|
|
if rule.service == "ntfy":
|
|
cast["msg"] = format_ntfy(**cast)
|
|
|
|
elif rule.service == "webhook":
|
|
cast["msg"] = format_webhook(**cast)
|
|
|
|
sendmsg(**cast)
|
|
|
|
|
|
class NotificationRuleData(object):
|
|
def __init__(self, user, cleaned_data, db):
|
|
self.user = user
|
|
self.object = None
|
|
|
|
# We are running live and have been passed a database object
|
|
if not isinstance(cleaned_data, dict):
|
|
self.object = cleaned_data
|
|
cleaned_data = cleaned_data.__dict__
|
|
|
|
self.cleaned_data = cleaned_data
|
|
self.db = db
|
|
self.data = self.cleaned_data.get("data")
|
|
self.window = self.cleaned_data.get("window")
|
|
self.parsed = None
|
|
self.aggs = {}
|
|
|
|
self.validate_user_permissions()
|
|
|
|
self.parse_data()
|
|
self.ensure_list()
|
|
self.validate_permissions()
|
|
self.validate_schedule_fields()
|
|
self.validate_time_fields()
|
|
if self.object is not None:
|
|
self.populate_matched()
|
|
|
|
def populate_matched(self):
|
|
"""
|
|
On first creation, the match field is None. We need to populate it with
|
|
a dictionary containing the index names as keys and False as values.
|
|
"""
|
|
if self.object.match is None:
|
|
self.object.match = {}
|
|
for index in self.parsed["index"]:
|
|
if index not in self.object.match:
|
|
self.object.match[index] = False
|
|
self.object.save()
|
|
|
|
def store_match(self, index, match):
|
|
"""
|
|
Store a match result.
|
|
Accepts None for the index to set all indices.
|
|
:param index: the index to store the match for, can be None
|
|
:param match: the object that matched
|
|
"""
|
|
if match is not False:
|
|
# Dump match to JSON while sorting the keys
|
|
match_normalised = orjson.dumps(match, option=orjson.OPT_SORT_KEYS)
|
|
match = siphash(self.db.hash_key, match_normalised)
|
|
|
|
if self.object.match is None:
|
|
self.object.match = {}
|
|
if not isinstance(self.object.match, dict):
|
|
self.object.match = {}
|
|
|
|
if index is None:
|
|
for index_iter in self.parsed["index"]:
|
|
self.object.match[index_iter] = match
|
|
else:
|
|
self.object.match[index] = match
|
|
self.object.save()
|
|
log.debug(f"Stored match: {index} - {match}")
|
|
|
|
def get_match(self, index=None, match=None):
|
|
"""
|
|
Get a match result for an index.
|
|
If the index is None, it will return True if any index has a match.
|
|
:param index: the index to get the match for, can be None
|
|
"""
|
|
if self.object.match is None:
|
|
self.object.match = {}
|
|
self.object.save()
|
|
return None
|
|
if not isinstance(self.object.match, dict):
|
|
return None
|
|
|
|
if index is None:
|
|
# Check if we have any matches on all indices
|
|
return any(self.object.match.values())
|
|
|
|
# Check if it's the same hash
|
|
if match is not None:
|
|
match_normalised = orjson.dumps(match, option=orjson.OPT_SORT_KEYS)
|
|
match = siphash(self.db.hash_key, match_normalised)
|
|
hash_matches = self.object.match.get(index) == match
|
|
return hash_matches
|
|
|
|
return self.object.match.get(index)
|
|
|
|
def format_aggs(self, aggs):
|
|
"""
|
|
Format aggregations for the query.
|
|
We have self.aggs, which contains:
|
|
{"avg_sentiment": (">", 0.5)}
|
|
and aggs, which contains:
|
|
{"avg_sentiment": {"value": 0.6}}
|
|
It's matched already, we just need to format it like so:
|
|
{"avg_sentiment": "0.06>0.5"}
|
|
:param aggs: the aggregations to format
|
|
:return: the formatted aggregations
|
|
"""
|
|
new_aggs = {}
|
|
for agg_name, agg in aggs.items():
|
|
# Already checked membership below
|
|
op, value = self.aggs[agg_name]
|
|
new_aggs[agg_name] = f"{agg['value']}{op}{value}"
|
|
|
|
return
|
|
|
|
def reform_matches(self, index, matches, meta, mode):
|
|
if not isinstance(matches, list):
|
|
matches = [matches]
|
|
matches_copy = matches.copy()
|
|
match_ts = datetime.utcnow().isoformat()
|
|
for match_index, _ in enumerate(matches_copy):
|
|
matches_copy[match_index]["index"] = index
|
|
matches_copy[match_index]["rule_uuid"] = self.object.id
|
|
matches_copy[match_index]["meta"] = meta
|
|
matches_copy[match_index]["match_ts"] = match_ts
|
|
matches_copy[match_index]["mode"] = mode
|
|
return matches_copy
|
|
|
|
async def ingest_matches(self, index, matches, meta, mode):
|
|
"""
|
|
Store all matches for an index.
|
|
:param index: the index to store the matches for
|
|
:param matches: the matches to store
|
|
"""
|
|
new_matches = self.reform_matches(index, matches, meta, mode)
|
|
await self.db.async_store_matches(new_matches)
|
|
|
|
def ingest_matches_sync(self, index, matches, meta, mode):
|
|
"""
|
|
Store all matches for an index.
|
|
:param index: the index to store the matches for
|
|
:param matches: the matches to store
|
|
"""
|
|
new_matches = self.reform_matches(index, matches, meta, mode)
|
|
self.db.store_matches(new_matches)
|
|
|
|
async def rule_matched(self, index, message, meta, mode):
|
|
"""
|
|
A rule has matched.
|
|
If the previous run did not match, send a notification after formatting
|
|
the aggregations.
|
|
:param index: the index the rule matched on
|
|
:param message: the message object that matched
|
|
:param aggs: the aggregations that matched
|
|
"""
|
|
current_match = self.get_match(index, message)
|
|
log.debug(f"Rule matched: {index} - current match: {current_match}")
|
|
if current_match is False:
|
|
# Matched now, but not before
|
|
if "matched" not in meta:
|
|
meta["matched"] = self.format_aggs(meta["aggs"])
|
|
rule_notify(self.object, index, message, meta)
|
|
self.store_match(index, message)
|
|
await self.ingest_matches(index, message, meta, mode)
|
|
|
|
def rule_matched_sync(self, index, message, meta, mode):
|
|
"""
|
|
A rule has matched.
|
|
If the previous run did not match, send a notification after formatting
|
|
the aggregations.
|
|
:param index: the index the rule matched on
|
|
:param message: the message object that matched
|
|
:param aggs: the aggregations that matched
|
|
"""
|
|
current_match = self.get_match(index, message)
|
|
log.debug(f"Rule matched: {index} - current match: {current_match}")
|
|
if current_match is False:
|
|
# Matched now, but not before
|
|
if "matched" not in meta:
|
|
meta["matched"] = self.format_aggs(meta["aggs"])
|
|
rule_notify(self.object, index, message, meta)
|
|
self.store_match(index, message)
|
|
self.ingest_matches_sync(index, message, meta, mode)
|
|
|
|
async def rule_no_match(self, index=None):
|
|
"""
|
|
A rule has not matched.
|
|
If the previous run did match, send a notification if configured to notify
|
|
for empty matches.
|
|
:param index: the index the rule did not match on, can be None
|
|
|
|
"""
|
|
current_match = self.get_match(index)
|
|
log.debug(f"Rule not matched: {index} - current match: {current_match}")
|
|
if current_match is True:
|
|
# Matched before, but not now
|
|
if self.object.send_empty:
|
|
rule_notify(self.object, index, "no_match", None)
|
|
self.store_match(index, False)
|
|
|
|
async def run_schedule(self):
|
|
"""
|
|
Run the schedule query.
|
|
Get the results from the database, and check if the rule has matched.
|
|
Check if all of the required aggregations have matched.
|
|
"""
|
|
response = await self.db.schedule_query_results(self)
|
|
if not response:
|
|
# No results in the result_map
|
|
await self.rule_no_match()
|
|
for index, (meta, results) in response.items():
|
|
if not results:
|
|
# Falsy results, no matches
|
|
await self.rule_no_match(index)
|
|
|
|
# Add the match values of all aggregations to a list
|
|
aggs_for_index = []
|
|
for agg_name in self.aggs.keys():
|
|
if agg_name in meta["aggs"]:
|
|
if "match" in meta["aggs"][agg_name]:
|
|
aggs_for_index.append(meta["aggs"][agg_name]["match"])
|
|
|
|
# All required aggs are present
|
|
if len(aggs_for_index) == len(self.aggs.keys()):
|
|
if all(aggs_for_index):
|
|
# All aggs have matched
|
|
await self.rule_matched(
|
|
index, results[: self.object.amount], meta, mode="schedule"
|
|
)
|
|
continue
|
|
# Default branch, since the happy path has a continue keyword
|
|
await self.rule_no_match(index)
|
|
|
|
def test_schedule(self):
|
|
"""
|
|
Test the schedule query to ensure it is valid.
|
|
Run the query with the async_to_sync helper so we can call it from
|
|
a form.
|
|
Raises an exception if the query is invalid.
|
|
"""
|
|
if self.db:
|
|
sync_schedule = async_to_sync(self.db.schedule_query_results)
|
|
sync_schedule(self)
|
|
|
|
def validate_schedule_fields(self):
|
|
"""
|
|
Ensure schedule fields are valid.
|
|
index: can be a list, it will schedule one search per index.
|
|
source: can be a list, it will be the filter for each search.
|
|
tokens: can be list, it will ensure the message matches any token.
|
|
msg: can be a list, it will ensure the message contains any msg.
|
|
No other fields can be lists containing more than one item.
|
|
:raises RuleParseError: if the fields are invalid
|
|
"""
|
|
is_schedule = self.is_schedule
|
|
|
|
if is_schedule:
|
|
allowed_list_fields = ["index", "source", "tokens", "msg"]
|
|
for field, value in self.parsed.items():
|
|
if field not in allowed_list_fields:
|
|
if len(value) > 1:
|
|
raise RuleParseError(
|
|
(
|
|
f"For scheduled rules, field {field} cannot contain "
|
|
"more than one item"
|
|
),
|
|
"data",
|
|
)
|
|
if len(str(value[0])) == 0:
|
|
raise RuleParseError(f"Field {field} cannot be empty", "data")
|
|
if "sentiment" in self.parsed:
|
|
sentiment = str(self.parsed["sentiment"][0])
|
|
sentiment = sentiment.strip()
|
|
if sentiment[0] not in [">", "<", "="]:
|
|
raise RuleParseError(
|
|
(
|
|
"Sentiment field must be a comparison operator and then a "
|
|
"float: >0.02"
|
|
),
|
|
"data",
|
|
)
|
|
operator = sentiment[0]
|
|
number = sentiment[1:]
|
|
|
|
try:
|
|
number = float(number)
|
|
except ValueError:
|
|
raise RuleParseError(
|
|
(
|
|
"Sentiment field must be a comparison operator and then a "
|
|
"float: >0.02"
|
|
),
|
|
"data",
|
|
)
|
|
self.aggs["avg_sentiment"] = (operator, number)
|
|
|
|
else:
|
|
if "query" in self.parsed:
|
|
raise RuleParseError(
|
|
"Field query cannot be used with on-demand rules", "data"
|
|
)
|
|
if "tags" in self.parsed:
|
|
raise RuleParseError(
|
|
"Field tags cannot be used with on-demand rules", "data"
|
|
)
|
|
|
|
@property
|
|
def is_schedule(self):
|
|
"""
|
|
Check if the rule is a schedule rule.
|
|
:return: True if the rule is a schedule rule, False otherwise
|
|
"""
|
|
if "interval" in self.cleaned_data:
|
|
if self.cleaned_data["interval"] != 0:
|
|
return True
|
|
return False
|
|
|
|
def ensure_list(self):
|
|
"""
|
|
Ensure all values in the data field are lists.
|
|
Convert all strings to lists with one item.
|
|
"""
|
|
for field, value in self.parsed.items():
|
|
if not isinstance(value, list):
|
|
self.parsed[field] = [value]
|
|
|
|
def validate_user_permissions(self):
|
|
"""
|
|
Ensure the user can use notification rules.
|
|
:raises RuleParseError: if the user does not have permission
|
|
"""
|
|
if not self.user.has_perm("core.use_rules"):
|
|
raise RuleParseError("User does not have permission to use rules", "data")
|
|
|
|
def validate_time_fields(self):
|
|
"""
|
|
Validate the interval and window fields.
|
|
Prohibit window being specified with an ondemand interval.
|
|
Prohibit window not being specified with a non-ondemand interval.
|
|
Prohibit amount being specified with an on-demand interval.
|
|
Prohibut amount not being specified with a non-ondemand interval.
|
|
Validate window field.
|
|
Validate window unit and enforce maximum.
|
|
:raises RuleParseError: if the fields are invalid
|
|
"""
|
|
interval = self.cleaned_data.get("interval")
|
|
window = self.cleaned_data.get("window")
|
|
amount = self.cleaned_data.get("amount")
|
|
service = self.cleaned_data.get("service")
|
|
|
|
on_demand = interval == 0
|
|
|
|
# Not on demand and interval is too low
|
|
if not on_demand and interval <= HIGH_FREQUENCY_MIN_SEC:
|
|
if not self.user.has_perm("core.rules_high_frequency"):
|
|
raise RuleParseError(
|
|
"User does not have permission to use high frequency rules", "data"
|
|
)
|
|
|
|
if not on_demand:
|
|
if not self.user.has_perm("core.rules_scheduled"):
|
|
raise RuleParseError(
|
|
"User does not have permission to use scheduled rules", "data"
|
|
)
|
|
|
|
if on_demand and window is not None:
|
|
# Interval is on demand and window is specified
|
|
# We can't have a window with on-demand rules
|
|
raise RuleParseError(
|
|
"Window cannot be specified with on-demand interval", "window"
|
|
)
|
|
|
|
if not on_demand and window is None:
|
|
# Interval is not on demand and window is not specified
|
|
# We can't have a non-on-demand interval without a window
|
|
raise RuleParseError(
|
|
"Window must be specified with non-on-demand interval", "window"
|
|
)
|
|
|
|
if not on_demand and amount is None:
|
|
# Interval is not on demand and amount is not specified
|
|
# We can't have a non-on-demand interval without an amount
|
|
raise RuleParseError(
|
|
"Amount must be specified with non-on-demand interval", "amount"
|
|
)
|
|
if on_demand and amount is not None:
|
|
# Interval is on demand and amount is specified
|
|
# We can't have an amount with on-demand rules
|
|
raise RuleParseError(
|
|
"Amount cannot be specified with on-demand interval", "amount"
|
|
)
|
|
|
|
if window is not None:
|
|
window_number = window[:-1]
|
|
if not window_number.isdigit():
|
|
raise RuleParseError("Window prefix must be a number", "window")
|
|
window_number = int(window_number)
|
|
window_unit = window[-1]
|
|
if window_unit not in SECONDS_PER_UNIT:
|
|
raise RuleParseError(
|
|
(
|
|
"Window unit must be one of "
|
|
f"{', '.join(SECONDS_PER_UNIT.keys())},"
|
|
f" not '{window_unit}'"
|
|
),
|
|
"window",
|
|
)
|
|
window_seconds = window_number * SECONDS_PER_UNIT[window_unit]
|
|
if window_seconds > MAX_WINDOW:
|
|
raise RuleParseError(
|
|
f"Window cannot be larger than {MAX_WINDOW} seconds (30 days)",
|
|
"window",
|
|
)
|
|
|
|
if amount is not None:
|
|
if service == "ntfy":
|
|
if amount > MAX_AMOUNT_NTFY:
|
|
raise RuleParseError(
|
|
f"Amount cannot be larger than {MAX_AMOUNT_NTFY} for ntfy",
|
|
"amount",
|
|
)
|
|
else:
|
|
if amount > MAX_AMOUNT_WEBHOOK:
|
|
raise RuleParseError(
|
|
(
|
|
f"Amount cannot be larger than {MAX_AMOUNT_WEBHOOK} for "
|
|
f"{service}"
|
|
),
|
|
"amount",
|
|
)
|
|
|
|
def validate_permissions(self):
|
|
"""
|
|
Validate permissions for the source and index variables.
|
|
Also set the default values for the user if not present.
|
|
Stores the default or expanded values in the parsed field.
|
|
:raises QueryError: if the user does not have permission to use the source
|
|
"""
|
|
if "index" in self.parsed:
|
|
index = self.parsed["index"]
|
|
if type(index) == list:
|
|
for i in index:
|
|
parse_index(self.user, {"index": i}, raise_error=True)
|
|
# else:
|
|
# db.parse_index(self.user, {"index": index}, raise_error=True)
|
|
else:
|
|
# Get the default value for the user if not present
|
|
index = parse_index(self.user, {}, raise_error=True)
|
|
self.parsed["index"] = [index]
|
|
|
|
if "source" in self.parsed:
|
|
source = self.parsed["source"]
|
|
if type(source) == list:
|
|
for i in source:
|
|
parse_source(self.user, {"source": i}, raise_error=True)
|
|
# else:
|
|
# parse_source(self.user, {"source": source}, raise_error=True)
|
|
else:
|
|
# Get the default value for the user if not present
|
|
source = parse_source(self.user, {}, raise_error=True)
|
|
self.parsed["source"] = source
|
|
|
|
def parse_data(self):
|
|
"""
|
|
Parse the data in the text field to YAML.
|
|
:raises RuleParseError: if the data is invalid
|
|
"""
|
|
try:
|
|
self.parsed = load(self.data, Loader=Loader)
|
|
except (ScannerError, ParserError) as e:
|
|
raise RuleParseError(f"Invalid YAML: {e}", "data")
|
|
|
|
def __str__(self):
|
|
"""
|
|
Get a YAML representation of the data field of the rule.
|
|
"""
|
|
return dump(self.parsed, Dumper=Dumper)
|
|
|
|
def get_data(self):
|
|
"""
|
|
Return the data field as a dictionary.
|
|
"""
|
|
return self.parsed
|