from yaml import dump, load from yaml.parser import ParserError from yaml.scanner import ScannerError try: from yaml import CDumper as Dumper from yaml import CLoader as Loader except ImportError: from yaml import Loader, Dumper from datetime import datetime import orjson from asgiref.sync import async_to_sync from siphashc import siphash from core.lib.notify import sendmsg from core.lib.parsing import parse_index, parse_source from core.util import logs log = logs.get_logger("rules") SECONDS_PER_UNIT = {"s": 1, "m": 60, "h": 3600, "d": 86400, "w": 604800} MAX_WINDOW = 2592000 MAX_AMOUNT_NTFY = 10 MAX_AMOUNT_WEBHOOK = 1000 HIGH_FREQUENCY_MIN_SEC = 60 class RuleParseError(Exception): def __init__(self, message, field): super().__init__(message) self.field = field def format_ntfy(**kwargs): """ Format a message for ntfy. If the message is a list, it will be joined with newlines. If the message is None, it will be replaced with an empty string. If specified, `matched` will be pretty-printed in the first line. kwargs: rule: The rule object, must be specified index: The index the rule matched on, can be None message: The message to send, can be None matched: The matched fields, can be None total_hits: The total number of matches, optional """ rule = kwargs.get("rule") index = kwargs.get("index") message = kwargs.get("message") matched = kwargs.get("matched") total_hits = kwargs.get("total_hits", 0) if message: # Dump the message in YAML for readability messages_formatted = "" if isinstance(message, list): for message_iter in message: messages_formatted += dump( message_iter, Dumper=Dumper, default_flow_style=False ) messages_formatted += "\n" else: messages_formatted = dump(message, Dumper=Dumper, default_flow_style=False) else: messages_formatted = "" if matched: matched = ", ".join([f"{k}: {v}" for k, v in matched.items()]) else: matched = "" notify_message = f"{rule.name} on {index}: {matched}\n{messages_formatted}" notify_message += f"\nTotal hits: {total_hits}" notify_message = notify_message.encode("utf-8", "replace") return notify_message def format_webhook(**kwargs): """ Format a message for a webhook. Adds some metadata to the message that would normally be only in notification_settings. Dumps the message in JSON. kwargs: rule: The rule object, must be specified index: The index the rule matched on, can be None message: The message to send, can be None, but will be sent as None matched: The matched fields, can be None, but will be sent as None total_hits: The total number of matches, optional notification_settings: The notification settings, must be specified priority: The priority of the message, optional topic: The topic of the message, optional """ rule = kwargs.get("rule") index = kwargs.get("index") message = kwargs.get("message") matched = kwargs.get("matched") total_hits = kwargs.get("total_hits", 0) notification_settings = kwargs.get("notification_settings") notify_message = { "rule_id": rule.id, "rule_name": rule.name, "match": matched, "total_hits": total_hits, "index": index, "data": message, } if "priority" in notification_settings: notify_message["priority"] = notification_settings["priority"] if "topic" in notification_settings: notify_message["topic"] = notification_settings["topic"] notify_message = orjson.dumps(notify_message) return notify_message def rule_notify(rule, index, message, meta=None): """ Send a notification for a matching rule. Gets the notification settings for the rule. Runs the formatting helpers for the service. :param rule: The rule object, must be specified :param index: The index the rule matched on, can be None :param message: The message to send, can be None :param meta: dict of metadata, contains `aggs` key for the matched fields """ # If there is no message, don't say anything matched if message: word = "match" else: word = "no match" title = f"Rule {rule.name} {word} on {index}" # The user notification settings are merged in with this notification_settings = rule.get_notification_settings() if not notification_settings: # No/invalid notification settings, don't send anything return # Create a cast we can reuse for the formatting helpers and sendmsg cast = { "title": title, "user": rule.user, "rule": rule, "index": index, "message": message, "notification_settings": notification_settings, } if meta: if "matched" in meta: cast["matched"] = meta["matched"] if "total_hits" in meta: cast["total_hits"] = meta["total_hits"] if rule.service == "ntfy": cast["msg"] = format_ntfy(**cast) elif rule.service == "webhook": cast["msg"] = format_webhook(**cast) sendmsg(**cast) class NotificationRuleData(object): def __init__(self, user, cleaned_data, db): self.user = user self.object = None # We are running live and have been passed a database object if not isinstance(cleaned_data, dict): self.object = cleaned_data cleaned_data = cleaned_data.__dict__ self.cleaned_data = cleaned_data self.db = db self.data = self.cleaned_data.get("data") self.window = self.cleaned_data.get("window") self.parsed = None self.aggs = {} self.validate_user_permissions() self.parse_data() self.ensure_list() self.validate_permissions() self.validate_schedule_fields() self.validate_time_fields() if self.object is not None: self.populate_matched() def populate_matched(self): """ On first creation, the match field is None. We need to populate it with a dictionary containing the index names as keys and False as values. """ if self.object.match is None: self.object.match = {} for index in self.parsed["index"]: if index not in self.object.match: self.object.match[index] = False self.object.save() def store_match(self, index, match): """ Store a match result. Accepts None for the index to set all indices. :param index: the index to store the match for, can be None :param match: the object that matched """ if match is not False: # Dump match to JSON while sorting the keys match_normalised = orjson.dumps(match, option=orjson.OPT_SORT_KEYS) match = siphash(self.db.hash_key, match_normalised) if self.object.match is None: self.object.match = {} if not isinstance(self.object.match, dict): self.object.match = {} if index is None: for index_iter in self.parsed["index"]: self.object.match[index_iter] = match else: self.object.match[index] = match self.object.save() log.debug(f"Stored match: {index} - {match}") def get_match(self, index=None, match=None): """ Get a match result for an index. If the index is None, it will return True if any index has a match. :param index: the index to get the match for, can be None """ if self.object.match is None: self.object.match = {} self.object.save() return None if not isinstance(self.object.match, dict): return None if index is None: # Check if we have any matches on all indices return any(self.object.match.values()) # Check if it's the same hash if match is not None: match_normalised = orjson.dumps(match, option=orjson.OPT_SORT_KEYS) match = siphash(self.db.hash_key, match_normalised) hash_matches = self.object.match.get(index) == match return hash_matches return self.object.match.get(index) def format_aggs(self, aggs): """ Format aggregations for the query. We have self.aggs, which contains: {"avg_sentiment": (">", 0.5)} and aggs, which contains: {"avg_sentiment": {"value": 0.6}} It's matched already, we just need to format it like so: {"avg_sentiment": "0.06>0.5"} :param aggs: the aggregations to format :return: the formatted aggregations """ new_aggs = {} for agg_name, agg in aggs.items(): # Already checked membership below op, value = self.aggs[agg_name] new_aggs[agg_name] = f"{agg['value']}{op}{value}" return async def ingest_matches(self, index, matches, meta): """ Store all matches for an index. :param index: the index to store the matches for :param matches: the matches to store """ if not isinstance(matches, list): matches = [matches] matches_copy = matches.copy() print("MATHCES COPY: ", matches_copy) match_ts = datetime.utcnow().isoformat() for match_index, _ in enumerate(matches_copy): matches_copy[match_index]["index"] = index matches_copy[match_index]["rule_uuid"] = self.object.id matches_copy[match_index]["meta"] = meta matches_copy[match_index]["match_ts"] = match_ts await self.db.async_store_matches(matches_copy) async def rule_matched(self, index, message, meta): """ A rule has matched. If the previous run did not match, send a notification after formatting the aggregations. :param index: the index the rule matched on :param message: the message object that matched :param aggs: the aggregations that matched """ current_match = self.get_match(index, message) log.debug(f"Rule matched: {index} - current match: {current_match}") if current_match is False: # Matched now, but not before meta["matched"] = self.format_aggs(meta["aggs"]) rule_notify(self.object, index, message, meta) self.store_match(index, message) await self.ingest_matches(index, message, meta) async def rule_no_match(self, index=None): """ A rule has not matched. If the previous run did match, send a notification if configured to notify for empty matches. :param index: the index the rule did not match on, can be None """ current_match = self.get_match(index) log.debug(f"Rule not matched: {index} - current match: {current_match}") if current_match is True: # Matched before, but not now if self.object.send_empty: rule_notify(self.object, index, "no_match", None) self.store_match(index, False) async def run_schedule(self): """ Run the schedule query. Get the results from the database, and check if the rule has matched. Check if all of the required aggregations have matched. """ response = await self.db.schedule_query_results(self) if not response: # No results in the result_map await self.rule_no_match() for index, (meta, results) in response.items(): if not results: # Falsy results, no matches await self.rule_no_match(index) # Add the match values of all aggregations to a list aggs_for_index = [] for agg_name in self.aggs.keys(): if agg_name in meta["aggs"]: if "match" in meta["aggs"][agg_name]: aggs_for_index.append(meta["aggs"][agg_name]["match"]) # All required aggs are present if len(aggs_for_index) == len(self.aggs.keys()): if all(aggs_for_index): # All aggs have matched await self.rule_matched(index, results[: self.object.amount], meta) continue # Default branch, since the happy path has a continue keyword await self.rule_no_match(index) def test_schedule(self): """ Test the schedule query to ensure it is valid. Run the query with the async_to_sync helper so we can call it from a form. Raises an exception if the query is invalid. """ if self.db: sync_schedule = async_to_sync(self.db.schedule_query_results) sync_schedule(self) def validate_schedule_fields(self): """ Ensure schedule fields are valid. index: can be a list, it will schedule one search per index. source: can be a list, it will be the filter for each search. tokens: can be list, it will ensure the message matches any token. msg: can be a list, it will ensure the message contains any msg. No other fields can be lists containing more than one item. :raises RuleParseError: if the fields are invalid """ is_schedule = self.is_schedule if is_schedule: allowed_list_fields = ["index", "source", "tokens", "msg"] for field, value in self.parsed.items(): if field not in allowed_list_fields: if len(value) > 1: raise RuleParseError( ( f"For scheduled rules, field {field} cannot contain " "more than one item" ), "data", ) if len(str(value[0])) == 0: raise RuleParseError(f"Field {field} cannot be empty", "data") if "sentiment" in self.parsed: sentiment = str(self.parsed["sentiment"][0]) sentiment = sentiment.strip() if sentiment[0] not in [">", "<", "="]: raise RuleParseError( ( "Sentiment field must be a comparison operator and then a " "float: >0.02" ), "data", ) operator = sentiment[0] number = sentiment[1:] try: number = float(number) except ValueError: raise RuleParseError( ( "Sentiment field must be a comparison operator and then a " "float: >0.02" ), "data", ) self.aggs["avg_sentiment"] = (operator, number) else: if "query" in self.parsed: raise RuleParseError( "Field query cannot be used with on-demand rules", "data" ) if "tags" in self.parsed: raise RuleParseError( "Field tags cannot be used with on-demand rules", "data" ) @property def is_schedule(self): """ Check if the rule is a schedule rule. :return: True if the rule is a schedule rule, False otherwise """ if "interval" in self.cleaned_data: if self.cleaned_data["interval"] != 0: return True return False def ensure_list(self): """ Ensure all values in the data field are lists. Convert all strings to lists with one item. """ for field, value in self.parsed.items(): if not isinstance(value, list): self.parsed[field] = [value] def validate_user_permissions(self): """ Ensure the user can use notification rules. :raises RuleParseError: if the user does not have permission """ if not self.user.has_perm("core.use_rules"): raise RuleParseError("User does not have permission to use rules", "data") def validate_time_fields(self): """ Validate the interval and window fields. Prohibit window being specified with an ondemand interval. Prohibit window not being specified with a non-ondemand interval. Prohibit amount being specified with an on-demand interval. Prohibut amount not being specified with a non-ondemand interval. Validate window field. Validate window unit and enforce maximum. :raises RuleParseError: if the fields are invalid """ interval = self.cleaned_data.get("interval") window = self.cleaned_data.get("window") amount = self.cleaned_data.get("amount") service = self.cleaned_data.get("service") on_demand = interval == 0 # Not on demand and interval is too low if not on_demand and interval <= HIGH_FREQUENCY_MIN_SEC: if not self.user.has_perm("core.rules_high_frequency"): raise RuleParseError( "User does not have permission to use high frequency rules", "data" ) if not on_demand: if not self.user.has_perm("core.rules_scheduled"): raise RuleParseError( "User does not have permission to use scheduled rules", "data" ) if on_demand and window is not None: # Interval is on demand and window is specified # We can't have a window with on-demand rules raise RuleParseError( "Window cannot be specified with on-demand interval", "window" ) if not on_demand and window is None: # Interval is not on demand and window is not specified # We can't have a non-on-demand interval without a window raise RuleParseError( "Window must be specified with non-on-demand interval", "window" ) if not on_demand and amount is None: # Interval is not on demand and amount is not specified # We can't have a non-on-demand interval without an amount raise RuleParseError( "Amount must be specified with non-on-demand interval", "amount" ) if on_demand and amount is not None: # Interval is on demand and amount is specified # We can't have an amount with on-demand rules raise RuleParseError( "Amount cannot be specified with on-demand interval", "amount" ) if window is not None: window_number = window[:-1] if not window_number.isdigit(): raise RuleParseError("Window prefix must be a number", "window") window_number = int(window_number) window_unit = window[-1] if window_unit not in SECONDS_PER_UNIT: raise RuleParseError( ( "Window unit must be one of " f"{', '.join(SECONDS_PER_UNIT.keys())}," f" not '{window_unit}'" ), "window", ) window_seconds = window_number * SECONDS_PER_UNIT[window_unit] if window_seconds > MAX_WINDOW: raise RuleParseError( f"Window cannot be larger than {MAX_WINDOW} seconds (30 days)", "window", ) if amount is not None: if service == "ntfy": if amount > MAX_AMOUNT_NTFY: raise RuleParseError( f"Amount cannot be larger than {MAX_AMOUNT_NTFY} for ntfy", "amount", ) else: if amount > MAX_AMOUNT_WEBHOOK: raise RuleParseError( ( f"Amount cannot be larger than {MAX_AMOUNT_WEBHOOK} for " f"{service}" ), "amount", ) def validate_permissions(self): """ Validate permissions for the source and index variables. Also set the default values for the user if not present. Stores the default or expanded values in the parsed field. :raises QueryError: if the user does not have permission to use the source """ if "index" in self.parsed: index = self.parsed["index"] if type(index) == list: for i in index: parse_index(self.user, {"index": i}, raise_error=True) # else: # db.parse_index(self.user, {"index": index}, raise_error=True) else: # Get the default value for the user if not present index = parse_index(self.user, {}, raise_error=True) self.parsed["index"] = [index] if "source" in self.parsed: source = self.parsed["source"] if type(source) == list: for i in source: parse_source(self.user, {"source": i}, raise_error=True) # else: # parse_source(self.user, {"source": source}, raise_error=True) else: # Get the default value for the user if not present source = parse_source(self.user, {}, raise_error=True) self.parsed["source"] = source def parse_data(self): """ Parse the data in the text field to YAML. :raises RuleParseError: if the data is invalid """ try: self.parsed = load(self.data, Loader=Loader) except (ScannerError, ParserError) as e: raise RuleParseError(f"Invalid YAML: {e}", "data") def __str__(self): """ Get a YAML representation of the data field of the rule. """ return dump(self.parsed, Dumper=Dumper) def get_data(self): """ Return the data field as a dictionary. """ return self.parsed