from yaml import dump, load from yaml.parser import ParserError from yaml.scanner import ScannerError try: from yaml import CDumper as Dumper from yaml import CLoader as Loader except ImportError: from yaml import Loader, Dumper import orjson from asgiref.sync import async_to_sync from core.lib.notify import sendmsg from core.lib.parsing import parse_index, parse_source from core.util import logs log = logs.get_logger("rules") SECONDS_PER_UNIT = {"s": 1, "m": 60, "h": 3600, "d": 86400, "w": 604800} MAX_WINDOW = 2592000 class RuleParseError(Exception): def __init__(self, message, field): super().__init__(message) self.field = field def rule_matched(rule, message, matched): title = f"Rule {rule.name} matched" notification_settings = rule.get_notification_settings() cast = { "title": title, **notification_settings, } if rule.service == "ntfy": # Dump the message in YAML for readability messages_formatted = "" if isinstance(message, list): for message in message: messages_formatted += dump( message, Dumper=Dumper, default_flow_style=False ) messages_formatted += "\n" else: messages_formatted = dump(message, Dumper=Dumper, default_flow_style=False) matched = ", ".join([f"{k}: {v}" for k, v in matched.items()]) notify_message = f"{rule.name} match: {matched}\n{messages_formatted}" notify_message = notify_message.encode("utf-8", "replace") elif rule.service == "webhook": notify_message = { "rule_id": rule.id, "rule_name": rule.name, "match": matched, "data": message, } if "priority" in notification_settings: notify_message["priority"] = notification_settings["priority"] if "topic" in notification_settings: notify_message["topic"] = notification_settings["topic"] notify_message = orjson.dumps(notify_message) sendmsg(rule.user, notify_message, **cast) class NotificationRuleData(object): def __init__(self, user, cleaned_data, db): self.user = user self.object = None # We are running live if not isinstance(cleaned_data, dict): self.object = cleaned_data cleaned_data = cleaned_data.__dict__ self.cleaned_data = cleaned_data self.db = db self.data = self.cleaned_data.get("data") self.parsed = None self.aggs = {} self.validate_user_permissions() self.parse_data() self.ensure_list() self.validate_permissions() self.validate_schedule_fields() self.validate_time_fields() def store_match(self, index, match): """ Store a match result. """ if self.object.match is None: self.object.match = {} if not isinstance(self.object.match, dict): self.object.match = {} self.object.match[index] = match self.object.save() log.debug(f"Stored match: {index} - {match}") def get_match(self, index): """ Get a match result for an index. """ if self.object.match is None: return None if not isinstance(self.object.match, dict): return None return self.object.match.get(index) def format_aggs(self, aggs): """ Format aggregations for the query. We have self.aggs, which contains: {"avg_sentiment": (">", 0.5)} and aggs, which contains: {"avg_sentiment": {"value": 0.6}} It's matched already, we just need to format it like so: {"avg_sentiment": "0.06>0.5"} """ new_aggs = {} for agg_name, agg in aggs.items(): # Already checked membership below op, value = self.aggs[agg_name] new_aggs[agg_name] = f"{agg['value']}{op}{value}" return new_aggs async def run_schedule(self): """ Run the schedule query. """ response = await self.db.schedule_query_results(self) for index, (aggs, results) in response.items(): if not results: self.store_match(index, False) aggs_for_index = [] for agg_name in self.aggs.keys(): if agg_name in aggs: if "match" in aggs[agg_name]: aggs_for_index.append(aggs[agg_name]["match"]) # All required aggs are present if len(aggs_for_index) == len(self.aggs.keys()): if all(aggs_for_index): # Ensure we only send notifications when the previous run # did not return any matches current_match = self.get_match(index) if current_match is False: formatted_aggs = self.format_aggs(aggs) rule_matched( self.object, results[: self.object.amount], formatted_aggs ) self.store_match(index, True) continue self.store_match(index, False) def test_schedule(self): """ Test the schedule query to ensure it is valid. """ if self.db: sync_schedule = async_to_sync(self.db.schedule_query_results) sync_schedule(self) def validate_schedule_fields(self): """ Ensure schedule fields are valid. index: can be a list, it will schedule one search per index. source: can be a list, it will be the filter for each search. tokens: can be list, it will ensure the message matches any token. msg: can be a list, it will ensure the message contains any msg. No other fields can be lists containing more than one item. """ is_schedule = self.is_schedule if is_schedule: allowed_list_fields = ["index", "source", "tokens", "msg"] for field, value in self.parsed.items(): if field not in allowed_list_fields: if len(value) > 1: raise RuleParseError( ( f"For scheduled rules, field {field} cannot contain " "more than one item" ), "data", ) if len(str(value[0])) == 0: raise RuleParseError(f"Field {field} cannot be empty", "data") if "sentiment" in self.parsed: sentiment = str(self.parsed["sentiment"][0]) sentiment = sentiment.strip() if sentiment[0] not in [">", "<", "="]: raise RuleParseError( ( "Sentiment field must be a comparison operator and then a " "float: >0.02" ), "data", ) operator = sentiment[0] number = sentiment[1:] try: number = float(number) except ValueError: raise RuleParseError( ( "Sentiment field must be a comparison operator and then a " "float: >0.02" ), "data", ) self.aggs["avg_sentiment"] = (operator, number) else: if "query" in self.parsed: raise RuleParseError( "Field query cannot be used with on-demand rules", "data" ) if "tags" in self.parsed: raise RuleParseError( "Field tags cannot be used with on-demand rules", "data" ) @property def is_schedule(self): if "interval" in self.cleaned_data: if self.cleaned_data["interval"] != 0: return True return False def ensure_list(self): """ Ensure all values are lists. """ for field, value in self.parsed.items(): if not isinstance(value, list): self.parsed[field] = [value] def validate_user_permissions(self): """ Ensure the user can use notification rules. """ if not self.user.has_perm("core.use_rules"): raise RuleParseError("User does not have permission to use rules", "data") def validate_time_fields(self): """ Validate the interval and window fields. Prohibit window being specified with an ondemand interval. """ interval = self.cleaned_data.get("interval") window = self.cleaned_data.get("window") amount = self.cleaned_data.get("amount") on_demand = interval == 0 if on_demand and window is not None: # Interval is on demand and window is specified # We can't have a window with on-demand rules raise RuleParseError( "Window cannot be specified with on-demand interval", "window" ) if not on_demand and window is None: # Interval is not on demand and window is not specified # We can't have a non-on-demand interval without a window raise RuleParseError( "Window must be specified with non-on-demand interval", "window" ) if not on_demand and amount is None: # Interval is not on demand and amount is not specified # We can't have a non-on-demand interval without an amount raise RuleParseError( "Amount must be specified with non-on-demand interval", "amount" ) if on_demand and amount is not None: # Interval is on demand and amount is specified # We can't have an amount with on-demand rules raise RuleParseError( "Amount cannot be specified with on-demand interval", "amount" ) if window is not None: window_number = window[:-1] if not window_number.isdigit(): raise RuleParseError("Window prefix must be a number", "window") window_number = int(window_number) window_unit = window[-1] if window_unit not in SECONDS_PER_UNIT: raise RuleParseError( ( "Window unit must be one of " f"{', '.join(SECONDS_PER_UNIT.keys())}," f" not '{window_unit}'" ), "window", ) window_seconds = window_number * SECONDS_PER_UNIT[window_unit] if window_seconds > MAX_WINDOW: raise RuleParseError( f"Window cannot be larger than {MAX_WINDOW} seconds (30 days)", "window", ) def validate_permissions(self): """ Validate permissions for the source and index variables. """ if "index" in self.parsed: index = self.parsed["index"] if type(index) == list: for i in index: parse_index(self.user, {"index": i}, raise_error=True) # else: # db.parse_index(self.user, {"index": index}, raise_error=True) else: # Get the default value for the user if not present index = parse_index(self.user, {}, raise_error=True) self.parsed["index"] = index if "source" in self.parsed: source = self.parsed["source"] if type(source) == list: for i in source: parse_source(self.user, {"source": i}, raise_error=True) # else: # parse_source(self.user, {"source": source}, raise_error=True) else: # Get the default value for the user if not present source = parse_source(self.user, {}, raise_error=True) self.parsed["source"] = source def parse_data(self): """ Parse the data in the text field to YAML. """ try: self.parsed = load(self.data, Loader=Loader) except (ScannerError, ParserError) as e: raise RuleParseError("data", f"Invalid YAML: {e}") def __str__(self): return dump(self.parsed, Dumper=Dumper) def get_data(self): return self.parsed