Add batch_id to tables

Implement policy parsing and add batch_id to rules
Make things lowercase, improve search queries for rules
2023-02-09 19:11:46 +00:00 · 2023-02-09 19:11:38 +00:00 · 2023-02-09 19:10:15 +00:00 · 2023-02-09 19:09:32 +00:00 · 2023-02-09 19:08:27 +00:00 · 2023-02-09 19:07:49 +00:00
13 changed files with 150 additions and 39 deletions
--- a/app/settings.py
+++ b/app/settings.py
@ -42,6 +42,7 @@ INSTALLED_APPS = [
    "crispy_bulma",
    "django_tables2",
    "django_tables2_bulma_template",
    "prettyjson",
 ]
 CRISPY_TEMPLATE_PACK = "bulma"
 CRISPY_ALLOWED_TEMPLATE_PACKS = ("bulma",)
--- a/core/db/druid.py
+++ b/core/db/druid.py
@ -20,7 +20,7 @@ logger = logging.getLogger(__name__)
 class DruidBackend(StorageBackend):
    def __init__(self):
-        super().__init__("Druid")
+        super().__init__("druid")
    def initialise(self, **kwargs):
        # self.client = PyDruid("http://broker:8082", "druid/v2")
--- a/core/db/elastic.py
+++ b/core/db/elastic.py
@ -33,7 +33,7 @@ mapping = {
            "ts": {"type": "date", "format": "epoch_second"},
            "match_ts": {"type": "date", "format": "iso8601"},
            "file_tim": {"type": "date", "format": "epoch_millis"},
-            "rule_uuid": {"type": "keyword"},
+            "rule_id": {"type": "keyword"},
        }
    }
 }
@ -43,7 +43,7 @@ for field in keyword_fields:
 class ElasticsearchBackend(StorageBackend):
    def __init__(self):
-        super().__init__("Elasticsearch")
+        super().__init__("elasticsearch")
        self.client = None
        self.async_client = None
@ -272,10 +272,12 @@ class ElasticsearchBackend(StorageBackend):
        """
        if self.async_client is None:
            await self.async_initialise()
        print("MATCHES", matches)
        for match in matches:
            result = await self.async_client.index(
                index=settings.INDEX_RULE_STORAGE, body=match
            )
            print("RESULT", result)
            if not result["result"] == "created":
                self.log.error(f"Indexing failed: {result}")
        self.log.debug(f"Indexed {len(matches)} messages in ES")
@ -439,6 +441,7 @@ class ElasticsearchBackend(StorageBackend):
                raise QueryError(error)
            if len(response["hits"]["hits"]) == 0:
                # No results, skip
                result_map[index] = ({}, [])
                continue
            meta, response = self.parse(response, meta=True)
            # print("Parsed response", response)
@ -500,7 +503,7 @@ class ElasticsearchBackend(StorageBackend):
        if rule_object is not None:
            index = settings.INDEX_RULE_STORAGE
-            add_bool.append({"rule_uuid": str(rule_object.id)})
+            add_bool.append({"rule_id": str(rule_object.id)})
        else:
            # I - Index
            index = parse_index(request.user, query_params)
@ -520,14 +523,19 @@ class ElasticsearchBackend(StorageBackend):
        if isinstance(sources, dict):
            return sources
        total_count = len(sources)
-        total_sources = len(settings.MAIN_SOURCES) + len(settings.SOURCES_RESTRICTED)
+        # Total is -1 due to the "all" source
        total_sources = (
            len(settings.MAIN_SOURCES) - 1 + len(settings.SOURCES_RESTRICTED)
        )
        print("total_count", total_count, "total_sources", total_sources)
        if total_count != total_sources:
            add_top_tmp = {"bool": {"should": []}}
            for source_iter in sources:
                add_top_tmp["bool"]["should"].append(
                    {"match_phrase": {"src": source_iter}}
                )
-            add_top.append(add_top_tmp)
+            if rule_object is not None and query_params["source"] != "all":
                add_top.append(add_top_tmp)
        # R - Ranges
        # date_query = False
@ -547,12 +555,17 @@ class ElasticsearchBackend(StorageBackend):
        sort = parse_sort(query_params)
        if isinstance(sort, dict):
            return sort
        if rule_object is not None:
            field = "match_ts"
        else:
            field = "ts"
        if sort:
            # For Druid compatibility
            sort_map = {"ascending": "asc", "descending": "desc"}
            sorting = [
                {
-                    "ts": {
+                    field: {
                        "order": sort_map[sort],
                    }
                }
@ -594,6 +607,7 @@ class ElasticsearchBackend(StorageBackend):
            search_query,
            index=index,
        )
        print("query", search_query)
        if "message" in response:
            return response
--- a/core/db/manticore.py
+++ b/core/db/manticore.py
@ -13,7 +13,7 @@ logger = logging.getLogger(__name__)
 class ManticoreBackend(StorageBackend):
    def __init__(self):
-        super().__init__("Manticore")
+        super().__init__("manticore")
    def initialise(self, **kwargs):
        """
--- a/core/lib/parsing.py
+++ b/core/lib/parsing.py
@ -118,6 +118,7 @@ def parse_source(user, query_params, raise_error=False):
    if source:
        sources = [source]
    else:
        print("NOT SOURCE")
        sources = list(settings.MAIN_SOURCES)
        if user.has_perm("core.restricted_sources"):
            for source_iter in settings.SOURCES_RESTRICTED:
--- a/core/lib/rules.py
+++ b/core/lib/rules.py
@ -8,6 +8,7 @@ try:
 except ImportError:
    from yaml import Loader, Dumper
 import uuid
 from datetime import datetime
 import orjson
@ -286,12 +287,18 @@ class NotificationRuleData(object):
            matches = [matches]
        matches_copy = matches.copy()
        match_ts = datetime.utcnow().isoformat()
        batch_id = uuid.uuid4()
        # Filter empty fields in meta
        meta = {k: v for k, v in meta.items() if v}
        for match_index, _ in enumerate(matches_copy):
            matches_copy[match_index]["index"] = index
-            matches_copy[match_index]["rule_uuid"] = self.object.id
+            matches_copy[match_index]["rule_id"] = str(self.object.id)
            matches_copy[match_index]["meta"] = meta
            matches_copy[match_index]["match_ts"] = match_ts
            matches_copy[match_index]["mode"] = mode
            matches_copy[match_index]["batch_id"] = str(batch_id)
        return matches_copy
    async def ingest_matches(self, index, matches, meta, mode):
@ -324,15 +331,25 @@ class NotificationRuleData(object):
        current_match = self.get_match(index, message)
        log.debug(f"Rule matched: {index} - current match: {current_match}")
-        # Default policy: Trigger only when results change
+        last_run_had_matches = current_match is True
-        if current_match is False:
+        if self.policy in ["change", "default"]:
-            # Matched now, but not before
+            # Change or Default policy, notifying only on new results
-            if "matched" not in meta:
+            if last_run_had_matches:
-                meta["matched"] = self.format_aggs(meta["aggs"])
+                # Last run had matches, and this one did too
-            rule_notify(self.object, index, message, meta)
+                # We don't need to notify
-            self.store_match(index, message)
+                return
-            await self.ingest_matches(index, message, meta, mode)
+
        elif self.policy == "always":
            # Only here for completeness, we notify below by default
            pass
        # We hit the return above if we don't need to notify
        if "aggs" in meta and "matched" not in meta:
            meta["matched"] = self.format_aggs(meta["aggs"])
        rule_notify(self.object, index, message, meta)
        self.store_match(index, message)
        await self.ingest_matches(index, message, meta, mode)
    def rule_matched_sync(self, index, message, meta, mode):
        """
@ -346,14 +363,25 @@ class NotificationRuleData(object):
        current_match = self.get_match(index, message)
        log.debug(f"Rule matched: {index} - current match: {current_match}")
-        # Default policy: Trigger only when results change
+        last_run_had_matches = current_match is True
-        if current_match is False:
+
-            # Matched now, but not before
+        if self.policy in ["change", "default"]:
-            if "matched" not in meta:
+            # Change or Default policy, notifying only on new results
-                meta["matched"] = self.format_aggs(meta["aggs"])
+            if last_run_had_matches:
-            rule_notify(self.object, index, message, meta)
+                # Last run had matches, and this one did too
-            self.store_match(index, message)
+                # We don't need to notify
-            self.ingest_matches_sync(index, message, meta, mode)
+                return
        elif self.policy == "always":
            # Only here for completeness, we notify below by default
            pass
        # We hit the return above if we don't need to notify
        if "aggs" in meta and "matched" not in meta:
            meta["matched"] = self.format_aggs(meta["aggs"])
        rule_notify(self.object, index, message, meta)
        self.store_match(index, message)
        self.ingest_matches_sync(index, message, meta, mode)
    # No async helper for this one as we only need it for schedules
    async def rule_no_match(self, index=None, message=None):
@ -367,15 +395,29 @@ class NotificationRuleData(object):
        current_match = self.get_match(index)
        log.debug(f"Rule not matched: {index} - current match: {current_match}")
-        # Change policy: When there are no results following a successful run
+        last_run_had_matches = current_match is True
-        if current_match is True:
+        if self.policy in ["change", "default"]:
-            # Matched before, but not now
+            print("policy in change or default")
-            if self.object.send_empty:
+            # Change or Default policy, notifying only on new results
-                rule_notify(self.object, index, "no_match", None)
+            if not last_run_had_matches:
-            self.store_match(index, False)
+                print("last run did not have matches")
-            await self.ingest_matches(
+                # Last run did not have matches, nor did this one
-                index=index, message={}, meta={"msg": message}, mode="schedule"
+                # We don't need to notify
-            )
+                return
        elif self.policy == "always":
            print("policy is always")
            # Only here for completeness, we notify below by default
            pass
        # Matched before, but not now
        if self.policy in ["change", "always"]:
            print("policy in change or always")
            rule_notify(self.object, index, "no_match", None)
        self.store_match(index, False)
        await self.ingest_matches(
            index=index, matches=[{"msg": None}], meta={"msg": message}, mode="schedule"
        )
    async def run_schedule(self):
        """
--- a/core/static/js/column-shifter.js
+++ b/core/static/js/column-shifter.js
@ -66,10 +66,11 @@ $(document).ready(function(){
                    "file_size": "off",
                    "lang_code": "off",
                    "tokens": "off",
-                    "rule_uuid": "off",
+                    "rule_id": "off",
                    "index": "off",
                    "meta": "off",
                    "match_ts": "off",
                    "batch_id": "off",
                    //"lang_name": "off",
                    // "words_noun": "off",
                    // "words_adj": "off",
--- a/core/templates/base.html
+++ b/core/templates/base.html
@ -212,6 +212,12 @@
        z-index: 39 !important;
      }
      .small-field {
        overflow: hidden;
        text-overflow: ellipsis;
        overflow-y: hidden;
      }
    </style>
    <!-- Piwik --> {# Yes it's in the source, fight me #}
    <script type="text/javascript">
--- a/core/templates/partials/results_table.html
+++ b/core/templates/partials/results_table.html
@ -4,6 +4,8 @@
 {% load joinsep %}
 {% load urlsafe %}
 {% load pretty %}
 {% load splitstr %}
 {% block table-wrapper %}
  <script src="{% static 'js/column-shifter.js' %}"></script>
  <div id="drilldown-table" class="column-shifter-container" style="position:relative; z-index:1;">
@ -168,6 +170,13 @@
                                <p>{{ row.cells.date }}</p>
                                <p>{{ row.cells.time }}</p>
                              </td>
                            {% elif column.name == 'match_ts' %}
                              <td class="{{ column.name }}">
                                {% with match_ts=cell|splitstr:'T' %}
                                  <p>{{ match_ts.0 }}</p>
                                  <p>{{ match_ts.1 }}</p>
                                {% endwith %}
                              </td>
                            {% elif column.name == 'type' or column.name == 'mtype' %}
                              <td class="{{ column.name }}">
                                <a
@ -376,7 +385,29 @@
                              </td>
                            {% elif column.name == "meta" %}
                              <td class="{{ column.name }}">
-                                <pre>{{ cell|pretty }}</pre>
+                                <pre class="small-field" style="cursor: pointer;">{{ cell|pretty }}</pre>
                              </td>
                            {% elif 'id' in column.name %}
                              <td class="{{ column.name }}">
                                <div class="buttons">
                                  <div class="nowrap-parent">
                                    <!-- <input class="input" type="text" value="{{ cell }}" style="width: 50px;" readonly> -->
                                    <a
                                      class="has-text-grey button nowrap-child"
                                      onclick="populateSearch('{{ column.name }}', '{{ cell|escapejs }}')">
                                      <span class="icon" data-tooltip="Populate {{ cell }}">
                                        <i class="fa-solid fa-arrow-left-long-to-line" aria-hidden="true"></i>
                                      </span>
                                    </a>
                                    <a
                                      class="has-text-grey button nowrap-child"
                                      onclick="window.prompt('Copy to clipboard: Ctrl+C, Enter', '{{ cell|escapejs }}');">
                                      <span class="icon" data-tooltip="Copy to clipboard">
                                        <i class="fa-solid fa-copy" aria-hidden="true"></i>
                                      </span>
                                    </a>
                                  </div>
                                </div>
                              </td>
                            {% else %}
                              <td class="{{ column.name }}">
--- a/core/templatetags/pretty.py
+++ b/core/templatetags/pretty.py
@ -6,4 +6,10 @@ register = template.Library()
@register.filter
 def pretty(data):
-    return orjson.dumps(data, option=orjson.OPT_INDENT_2).decode("utf-8")
+    prettified = orjson.dumps(data, option=orjson.OPT_INDENT_2).decode("utf-8")
    if prettified.startswith("{"):
        prettified = prettified[1:]
    if prettified.endswith("}"):
        prettified = prettified[:-1]
    return prettified
--- a/core/templatetags/splitstr.py
+++ b/core/templatetags/splitstr.py
@ -0,0 +1,8 @@
 from django import template
 register = template.Library()
@register.filter
 def splitstr(value, arg):
    return value.split(arg)
--- a/core/views/ui/tables.py
+++ b/core/views/ui/tables.py
@ -12,7 +12,6 @@ def format_header(self):
    header = header.lower()
    header = header.title()
    if header != "Ident":
        header = header.replace("Uuid", "UUID")
        header = header.replace("Id", "ID")
        header = header.replace("id", "ID")
    if header == "Ts":
@ -79,7 +78,8 @@ class DrilldownTable(Table):
    file_md5 = Column()
    file_ext = Column()
    file_size = Column()
-    rule_uuid = Column()
+    rule_id = Column()
    batch_id = Column()
    index = Column()
    meta = Column()
    match_ts = Column()
--- a/requirements.txt
+++ b/requirements.txt
@ -20,3 +20,4 @@ django-debug-toolbar-template-profiler
 orjson
 msgpack
 apscheduler
 django-prettyjson
Author	SHA1	Message	Date
Mark Veidemanis	7b6da7b704	Add batch_id to tables	2023-02-09 19:11:46 +00:00
Mark Veidemanis	0d564788b6	Implement policy parsing and add batch_id to rules	2023-02-09 19:11:38 +00:00
Mark Veidemanis	fd10a4ba8e	Make things lowercase, improve search queries for rules	2023-02-09 19:10:15 +00:00
Mark Veidemanis	455da73b95	Improve results rendering	2023-02-09 19:09:32 +00:00
Mark Veidemanis	d8005fa15d	Strip leading and trailing brackets from prettified JSON	2023-02-09 19:08:27 +00:00
Mark Veidemanis	6a01aea5e1	Add prettyjson	2023-02-09 19:07:49 +00:00