from yaml import dump, load from yaml.parser import ParserError from yaml.scanner import ScannerError try: from yaml import CDumper as Dumper from yaml import CLoader as Loader except ImportError: from yaml import Loader, Dumper import uuid from copy import deepcopy from datetime import datetime import orjson from siphashc import siphash from core.lib.notify import sendmsg from core.lib.parsing import parse_index, parse_source from core.util import logs log = logs.get_logger("rules") SECONDS_PER_UNIT = {"s": 1, "m": 60, "h": 3600, "d": 86400, "w": 604800} MAX_WINDOW = 2592000 MAX_AMOUNT_NTFY = 10 MAX_AMOUNT_WEBHOOK = 1000 HIGH_FREQUENCY_MIN_SEC = 60 class RuleParseError(Exception): def __init__(self, message, field): super().__init__(message) self.field = field def format_ntfy(**kwargs): """ Format a message for ntfy. If the message is a list, it will be joined with newlines. If the message is None, it will be replaced with an empty string. If specified, `matched` will be pretty-printed in the first line. kwargs: rule: The rule object, must be specified index: The index the rule matched on, can be None message: The message to send, can be None meta: matched: The matched fields, can be None total_hits: The total number of matches, optional """ rule = kwargs.get("rule") index = kwargs.get("index") message = kwargs.get("message") meta = kwargs.get("meta", {}) total_hits = meta.get("total_hits", 0) matched = meta.get("matched") if message: # Dump the message in YAML for readability messages_formatted = "" if isinstance(message, list): for message_iter in message: messages_formatted += dump( message_iter, Dumper=Dumper, default_flow_style=False ) messages_formatted += "\n" else: messages_formatted = dump(message, Dumper=Dumper, default_flow_style=False) else: messages_formatted = "" if matched: matched = ", ".join([f"{k}: {v}" for k, v in matched.items()]) else: matched = "" notify_message = f"{rule.name} on {index}: {matched}\n{messages_formatted}" notify_message += f"\nTotal hits: {total_hits}" notify_message = notify_message.encode("utf-8", "replace") return notify_message def format_webhook(**kwargs): """ Format a message for a webhook. Adds some metadata to the message that would normally be only in notification_settings. Dumps the message in JSON. kwargs: rule: The rule object, must be specified index: The index the rule matched on, can be None message: The message to send, can be None, but will be sent as None meta: matched: The matched fields, can be None, but will be sent as None total_hits: The total number of matches, optional notification_settings: The notification settings, must be specified priority: The priority of the message, optional topic: The topic of the message, optional """ # rule = kwargs.get("rule") # index = kwargs.get("index") message = kwargs.get("message") meta = kwargs.get("meta") notification_settings = kwargs.get("notification_settings") notify_message = { "data": message, "meta": meta, } if "priority" in notification_settings: notify_message["priority"] = notification_settings["priority"] if "topic" in notification_settings: notify_message["topic"] = notification_settings["topic"] notify_message = orjson.dumps(notify_message) return notify_message def rule_notify(rule, index, message, meta=None): """ Send a notification for a matching rule. Gets the notification settings for the rule. Runs the formatting helpers for the service. :param rule: The rule object, must be specified :param index: The index the rule matched on, can be None :param message: The message to send, can be None :param meta: dict of metadata, contains `aggs` key for the matched fields """ # If there is no message, don't say anything matched if message: word = "match" else: word = "no match" title = f"Rule {rule.name} {word} on {index}" # The user notification settings are merged in with this notification_settings = rule.get_notification_settings() if not notification_settings: # No/invalid notification settings, don't send anything return if notification_settings.get("service") == "none": # Don't send anything return # double sigh message_copy = deepcopy(message) for index, _ in enumerate(message_copy): if "meta" in message_copy[index]: del message_copy[index]["meta"] # Create a cast we can reuse for the formatting helpers and sendmsg cast = { "title": title, "user": rule.user, "rule": rule, "index": index, "message": message_copy, "notification_settings": notification_settings, } if meta: cast["meta"] = meta if rule.service == "ntfy": cast["msg"] = format_ntfy(**cast) elif rule.service == "webhook": cast["msg"] = format_webhook(**cast) sendmsg(**cast) class NotificationRuleData(object): def __init__(self, user, cleaned_data, db): self.user = user self.object = None # We are running live and have been passed a database object if not isinstance(cleaned_data, dict): self.object = cleaned_data cleaned_data = cleaned_data.__dict__ self.cleaned_data = cleaned_data self.db = db self.data = self.cleaned_data.get("data") self.window = self.cleaned_data.get("window") self.policy = self.cleaned_data.get("policy") self.parsed = None self.aggs = {} self.validate_user_permissions() self.parse_data() self.ensure_list() self.validate_permissions() self.validate_schedule_fields() self.validate_time_fields() if self.object is not None: self.populate_matched() def clear_database_matches(self): """ Delete all matches for this rule. """ rule_id = str(self.object.id) self.db.delete_rule_entries(rule_id) def populate_matched(self): """ On first creation, the match field is None. We need to populate it with a dictionary containing the index names as keys and False as values. """ if self.object.match is None: self.object.match = {} for index in self.parsed["index"]: if index not in self.object.match: self.object.match[index] = False self.object.save() def format_matched(self, messages): matched = {} for message in messages: for field, value in self.parsed.items(): if field == "msg": # Allow partial matches for msg for msg in value: if "msg" in message: if msg.lower() in message["msg"].lower(): matched[field] = msg # Break out of the msg matching loop break # Continue to next field continue if field == "tokens": # Allow partial matches for tokens for token in value: if "tokens" in message: if token.lower() in [x.lower() for x in message["tokens"]]: matched[field] = token # Break out of the token matching loop break # Continue to next field continue if field in message and message[field] in value: # Do exact matches for all other fields matched[field] = message[field] return matched def store_match(self, index, match): """ Store a match result. Accepts None for the index to set all indices. :param index: the index to store the match for, can be None :param match: the object that matched """ if match is not False: # Dump match to JSON while sorting the keys match_normalised = orjson.dumps(match, option=orjson.OPT_SORT_KEYS) match = siphash(self.db.hash_key, match_normalised) if self.object.match is None: self.object.match = {} if not isinstance(self.object.match, dict): self.object.match = {} if index is None: for index_iter in self.parsed["index"]: self.object.match[index_iter] = match else: self.object.match[index] = match self.object.save() log.debug(f"Stored match: {index} - {match}") def get_match(self, index=None, match=None): """ Get a match result for an index. If the index is None, it will return True if any index has a match. :param index: the index to get the match for, can be None """ if self.object.match is None: self.object.match = {} self.object.save() return None if not isinstance(self.object.match, dict): return None if index is None: # Check if we have any matches on all indices values = self.object.match.values() if not values: return None return any(values) # Check if it's the same hash if match is not None: match_normalised = orjson.dumps(match, option=orjson.OPT_SORT_KEYS) match = siphash(self.db.hash_key, match_normalised) hash_matches = self.object.match.get(index) == match return hash_matches returned_match = self.object.match.get(index, None) if type(returned_match) == int: # We are getting a hash from the database, # but we have nothing to check it against. # In this instance, we are checking if we got a match # at all last time. We can confidently say that since # we have a hash, we did. returned_match = True return returned_match def format_aggs(self, aggs): """ Format aggregations for the query. We have self.aggs, which contains: {"avg_sentiment": (">", 0.5)} and aggs, which contains: {"avg_sentiment": {"value": 0.6}} It's matched already, we just need to format it like so: {"avg_sentiment": "0.06>0.5"} :param aggs: the aggregations to format :return: the formatted aggregations """ new_aggs = {} for agg_name, agg in aggs.items(): if agg_name in self.aggs: op, value = self.aggs[agg_name] new_aggs[agg_name] = f"{agg['value']}{op}{value}" return new_aggs def reform_matches(self, index, matches, meta, mode): if not isinstance(matches, list): matches = [matches] matches_copy = matches.copy() # match_ts = datetime.utcnow().isoformat() match_ts = int(datetime.utcnow().timestamp()) batch_id = uuid.uuid4() # Filter empty fields in meta meta = {k: v for k, v in meta.items() if v} for match_index, _ in enumerate(matches_copy): matches_copy[match_index]["index"] = index matches_copy[match_index]["rule_id"] = str(self.object.id) matches_copy[match_index]["meta"] = meta matches_copy[match_index]["match_ts"] = match_ts matches_copy[match_index]["mode"] = mode matches_copy[match_index]["batch_id"] = str(batch_id) return matches_copy async def ingest_matches(self, index, matches, meta, mode): """ Store all matches for an index. :param index: the index to store the matches for :param matches: the matches to store """ # new_matches = self.reform_matches(index, matches, meta, mode) if self.object.ingest: await self.db.async_store_matches(matches) def ingest_matches_sync(self, index, matches, meta, mode): """ Store all matches for an index. :param index: the index to store the matches for :param matches: the matches to store """ # new_matches = self.reform_matches(index, matches, meta, mode) if self.object.ingest: self.db.store_matches(matches) async def rule_matched(self, index, message, meta, mode): """ A rule has matched. If the previous run did not match, send a notification after formatting the aggregations. :param index: the index the rule matched on :param message: the message object that matched :param aggs: the aggregations that matched """ current_match = self.get_match(index, message) log.debug(f"Rule matched: {index} - current match: {current_match}") last_run_had_matches = current_match is True if self.policy in ["change", "default"]: # Change or Default policy, notifying only on new results if last_run_had_matches: # Last run had matches, and this one did too # We don't need to notify return elif self.policy == "always": # Only here for completeness, we notify below by default pass # We hit the return above if we don't need to notify if "matched" not in meta: meta["matched"] = self.format_matched(message) if "aggs" in meta: aggs_formatted = self.format_aggs(meta["aggs"]) if aggs_formatted: meta["matched_aggs"] = aggs_formatted meta["is_match"] = True self.store_match(index, message) message = self.reform_matches(index, message, meta, mode) rule_notify(self.object, index, message, meta) await self.ingest_matches(index, message, meta, mode) def rule_matched_sync(self, index, message, meta, mode): """ A rule has matched. If the previous run did not match, send a notification after formatting the aggregations. :param index: the index the rule matched on :param message: the message object that matched :param aggs: the aggregations that matched """ current_match = self.get_match(index, message) log.debug(f"Rule matched: {index} - current match: {current_match}") last_run_had_matches = current_match is True if self.policy in ["change", "default"]: # Change or Default policy, notifying only on new results if last_run_had_matches: # Last run had matches, and this one did too # We don't need to notify return elif self.policy == "always": # Only here for completeness, we notify below by default pass # We hit the return above if we don't need to notify if "matched" not in meta: meta["matched"] = self.format_matched(message) if "aggs" in meta: aggs_formatted = self.format_aggs(meta["aggs"]) if aggs_formatted: meta["matched_aggs"] = aggs_formatted meta["is_match"] = True self.store_match(index, message) message = self.reform_matches(index, message, meta, mode) rule_notify(self.object, index, message, meta) self.ingest_matches_sync(index, message, meta, mode) # No async helper for this one as we only need it for schedules async def rule_no_match(self, index=None, message=None, mode=None): """ A rule has not matched. If the previous run did match, send a notification if configured to notify for empty matches. :param index: the index the rule did not match on, can be None """ current_match = self.get_match(index) log.debug( f"Rule not matched: {index} - current match: {current_match}: {message}" ) last_run_had_matches = current_match is True initial = current_match is None self.store_match(index, False) if self.policy != "always": # We hit the return above if we don't need to notify if self.policy in ["change", "default"]: if not last_run_had_matches and not initial: # We don't need to notify if the last run didn't have matches return if self.policy in ["always", "change"]: # Never notify for empty matches on default policy meta = {"msg": message, "is_match": False} matches = [{"msg": None}] message = self.reform_matches(index, matches, meta, mode) rule_notify(self.object, index, matches, meta) await self.ingest_matches( index=index, matches=matches, meta=meta, mode="schedule", ) async def run_schedule(self): """ Run the schedule query. Get the results from the database, and check if the rule has matched. Check if all of the required aggregations have matched. """ response = await self.db.schedule_query_results(self) if not response: # No results in the result_map await self.rule_no_match( message="No response from database", mode="schedule" ) return for index, (meta, results) in response.items(): if not results: # Falsy results, no matches await self.rule_no_match( index, message="No results for index", mode="schedule" ) continue # Add the match values of all aggregations to a list aggs_for_index = [] for agg_name in self.aggs.keys(): if agg_name in meta["aggs"]: if "match" in meta["aggs"][agg_name]: aggs_for_index.append(meta["aggs"][agg_name]["match"]) # All required aggs are present if len(aggs_for_index) == len(self.aggs.keys()): if all(aggs_for_index): # All aggs have matched await self.rule_matched( index, results[: self.object.amount], meta, mode="schedule" ) continue # Default branch, since the happy path has a continue keyword await self.rule_no_match( index, message="Aggregation did not match", mode="schedule" ) def test_schedule(self): """ Test the schedule query to ensure it is valid. Raises an exception if the query is invalid. """ if self.db: self.db.schedule_query_results_test_sync(self) def validate_schedule_fields(self): """ Ensure schedule fields are valid. index: can be a list, it will schedule one search per index. source: can be a list, it will be the filter for each search. tokens: can be list, it will ensure the message matches any token. msg: can be a list, it will ensure the message contains any msg. No other fields can be lists containing more than one item. :raises RuleParseError: if the fields are invalid """ is_schedule = self.is_schedule if is_schedule: allowed_list_fields = ["index", "source", "tokens", "msg"] for field, value in self.parsed.items(): if field not in allowed_list_fields: if len(value) > 1: raise RuleParseError( ( f"For scheduled rules, field {field} cannot contain " "more than one item" ), "data", ) if len(str(value[0])) == 0: raise RuleParseError(f"Field {field} cannot be empty", "data") if "sentiment" in self.parsed: sentiment = str(self.parsed["sentiment"][0]) sentiment = sentiment.strip() if sentiment[0] not in [">", "<", "="]: raise RuleParseError( ( "Sentiment field must be a comparison operator and then a " "float: >0.02" ), "data", ) operator = sentiment[0] number = sentiment[1:] try: number = float(number) except ValueError: raise RuleParseError( ( "Sentiment field must be a comparison operator and then a " "float: >0.02" ), "data", ) self.aggs["avg_sentiment"] = (operator, number) else: if "query" in self.parsed: raise RuleParseError( "Field query cannot be used with on-demand rules", "data" ) if "tags" in self.parsed: raise RuleParseError( "Field tags cannot be used with on-demand rules", "data" ) if self.policy != "default": raise RuleParseError( ( f"Cannot use {self.cleaned_data['policy']} policy with " "on-demand rules" ), "policy", ) @property def is_schedule(self): """ Check if the rule is a schedule rule. :return: True if the rule is a schedule rule, False otherwise """ if "interval" in self.cleaned_data: if self.cleaned_data["interval"] != 0: return True return False def ensure_list(self): """ Ensure all values in the data field are lists. Convert all strings to lists with one item. """ for field, value in self.parsed.items(): if not isinstance(value, list): self.parsed[field] = [value] def validate_user_permissions(self): """ Ensure the user can use notification rules. :raises RuleParseError: if the user does not have permission """ if not self.user.has_perm("core.use_rules"): raise RuleParseError("User does not have permission to use rules", "data") def validate_time_fields(self): """ Validate the interval and window fields. Prohibit window being specified with an ondemand interval. Prohibit window not being specified with a non-ondemand interval. Prohibit amount being specified with an on-demand interval. Prohibut amount not being specified with a non-ondemand interval. Validate window field. Validate window unit and enforce maximum. :raises RuleParseError: if the fields are invalid """ interval = self.cleaned_data.get("interval") window = self.cleaned_data.get("window") amount = self.cleaned_data.get("amount") service = self.cleaned_data.get("service") on_demand = interval == 0 # Not on demand and interval is too low if not on_demand and interval <= HIGH_FREQUENCY_MIN_SEC: if not self.user.has_perm("core.rules_high_frequency"): raise RuleParseError( "User does not have permission to use high frequency rules", "data" ) if not on_demand: if not self.user.has_perm("core.rules_scheduled"): raise RuleParseError( "User does not have permission to use scheduled rules", "data" ) if on_demand and window is not None: # Interval is on demand and window is specified # We can't have a window with on-demand rules raise RuleParseError( "Window cannot be specified with on-demand interval", "window" ) if not on_demand and window is None: # Interval is not on demand and window is not specified # We can't have a non-on-demand interval without a window raise RuleParseError( "Window must be specified with non-on-demand interval", "window" ) if not on_demand and amount is None: # Interval is not on demand and amount is not specified # We can't have a non-on-demand interval without an amount raise RuleParseError( "Amount must be specified with non-on-demand interval", "amount" ) if on_demand and amount is not None: # Interval is on demand and amount is specified # We can't have an amount with on-demand rules raise RuleParseError( "Amount cannot be specified with on-demand interval", "amount" ) if window is not None: window_number = window[:-1] if not window_number.isdigit(): raise RuleParseError("Window prefix must be a number", "window") window_number = int(window_number) window_unit = window[-1] if window_unit not in SECONDS_PER_UNIT: raise RuleParseError( ( "Window unit must be one of " f"{', '.join(SECONDS_PER_UNIT.keys())}," f" not '{window_unit}'" ), "window", ) window_seconds = window_number * SECONDS_PER_UNIT[window_unit] if window_seconds > MAX_WINDOW: raise RuleParseError( f"Window cannot be larger than {MAX_WINDOW} seconds (30 days)", "window", ) if amount is not None: if service == "ntfy": if amount > MAX_AMOUNT_NTFY: raise RuleParseError( f"Amount cannot be larger than {MAX_AMOUNT_NTFY} for ntfy", "amount", ) else: if amount > MAX_AMOUNT_WEBHOOK: raise RuleParseError( ( f"Amount cannot be larger than {MAX_AMOUNT_WEBHOOK} for " f"{service}" ), "amount", ) def validate_permissions(self): """ Validate permissions for the source and index variables. Also set the default values for the user if not present. Stores the default or expanded values in the parsed field. :raises QueryError: if the user does not have permission to use the source """ if "index" in self.parsed: index = self.parsed["index"] if type(index) == list: for i in index: parse_index(self.user, {"index": i}, raise_error=True) # else: # db.parse_index(self.user, {"index": index}, raise_error=True) else: # Get the default value for the user if not present index = parse_index(self.user, {}, raise_error=True) self.parsed["index"] = [index] if "source" in self.parsed: source = self.parsed["source"] if type(source) == list: for i in source: parse_source(self.user, {"source": i}, raise_error=True) # else: # parse_source(self.user, {"source": source}, raise_error=True) else: # Get the default value for the user if not present source = parse_source(self.user, {}, raise_error=True) self.parsed["source"] = source def parse_data(self): """ Parse the data in the text field to YAML. :raises RuleParseError: if the data is invalid """ try: self.parsed = load(self.data, Loader=Loader) except (ScannerError, ParserError) as e: raise RuleParseError(f"Invalid YAML: {e}", "data") def __str__(self): """ Get a YAML representation of the data field of the rule. """ return dump(self.parsed, Dumper=Dumper) def get_data(self): """ Return the data field as a dictionary. """ return self.parsed