From 87324de66605ead283ee96e240c031df91979435 Mon Sep 17 00:00:00 2001
From: Mark Veidemanis <m@zm.is>
Date: Tue, 6 Sep 2022 11:53:32 +0100
Subject: [PATCH] Fix some Manticore queries

---
 core/lib/context.py                           |  37 ++--
 core/lib/manticore.py                         |  31 +++-
 core/lib/opensearch.py                        | 161 +----------------
 core/lib/processing.py                        | 164 ++++++++++++++++++
 .../ui/drilldown/search_partial.html          |  41 +----
 core/views/ui/drilldown.py                    |   6 +-
 6 files changed, 215 insertions(+), 225 deletions(-)
 create mode 100644 core/lib/processing.py

diff --git a/core/lib/context.py b/core/lib/context.py
index 40b560d..ccff818 100644
--- a/core/lib/context.py
+++ b/core/lib/context.py
@@ -4,9 +4,9 @@ def construct_query(index, net, channel, src, num, size, type=None, nicks=None):
     extra_should = []
     extra_should2 = []
     if num:
-        extra_must.append({"match": {"num": num}})
+        extra_must.append({"equals": {"num": num}})
     if net:
-        extra_must.append({"match": {"net": net}})
+        extra_must.append({"match_phrase": {"net": net}})
     if channel:
         extra_must.append({"match": {"channel": channel}})
     if nicks:
@@ -52,31 +52,36 @@ def construct_query(index, net, channel, src, num, size, type=None, nicks=None):
             extra_should.append({"match": {"nick": channel}})
         else:
             for ctype in types:
-                extra_should.append({"match": {"mtype": ctype}})
+                extra_should.append({"equals": {"mtype": ctype}})
     else:
         for ctype in types:
             extra_should.append({"match": {"type": ctype}})
     query = {
-        "size": size,
+        "index": index,
+        "limit": size,
         "query": {
             "bool": {
                 "must": [
-                    {"match": {"src": src}},
-                    {
-                        "bool": {
-                            "should": [*extra_should],
-                        }
-                    },
-                    {
-                        "bool": {
-                            "should": [*extra_should2],
-                        }
-                    },
+                    # {"equals": {"src": src}},
+                    # {
+                    #     "bool": {
+                    #         "should": [*extra_should],
+                    #     }
+                    # },
+                    # {
+                    #     "bool": {
+                    #         "should": [*extra_should2],
+                    #     }
+                    # },
                     *extra_must,
                 ]
             }
         },
         "fields": fields,
-        "_source": False,
+        # "_source": False,
     }
+    if extra_should:
+        query["query"]["bool"]["must"].append({"bool": {"should": [*extra_should]}})
+    if extra_should2:
+        query["query"]["bool"]["must"].append({"bool": {"should": [*extra_should2]}})
     return query
diff --git a/core/lib/manticore.py b/core/lib/manticore.py
index 6d2a13a..9d56104 100644
--- a/core/lib/manticore.py
+++ b/core/lib/manticore.py
@@ -1,10 +1,13 @@
-from re import search
-from django.conf import settings
-from core.lib.opensearch import annotate_results, filter_blacklisted, parse_results
-import manticoresearch
-from core.views.helpers import dedup_list
+from datetime import datetime
 from pprint import pprint
 
+import manticoresearch
+from django.conf import settings
+
+from core.lib.processing import annotate_results, filter_blacklisted, parse_results
+from core.views.helpers import dedup_list
+
+
 def initialise_manticore():
     """
     Initialise the Manticore client
@@ -15,8 +18,10 @@ def initialise_manticore():
 
     return (api_client, api_instance)
 
+
 api_client, client = initialise_manticore()
 
+
 def construct_query(query, size, index, blank=False):
     """
     Accept some query parameters and construct an OpenSearch query.
@@ -35,12 +40,14 @@ def construct_query(query, size, index, blank=False):
         query_base["query"]["bool"]["must"].append(query_string)
     return query_base
 
+
 def run_query(client, user, search_query):
     response = client.search(search_query)
     response = response.to_dict()
     filter_blacklisted(user, response)
     return response
 
+
 def query_results(
     request,
     query_params,
@@ -110,6 +117,9 @@ def query_results(
         query = query_params["query"]
         search_query = construct_query(query, size, index)
         query_created = True
+    else:
+        if custom_query:
+            search_query = custom_query
 
     if tags:
         # Get a blank search query
@@ -159,13 +169,16 @@ def query_results(
     add_top.append(add_top_tmp)
     print("AFTER", add_top)
 
-
     # Date/time range
     if set({"from_date", "to_date", "from_time", "to_time"}).issubset(
         query_params.keys()
     ):
         from_ts = f"{query_params['from_date']}T{query_params['from_time']}Z"
         to_ts = f"{query_params['to_date']}T{query_params['to_time']}Z"
+        from_ts = datetime.strptime(from_ts, "%Y-%m-%dT%H:%MZ")
+        to_ts = datetime.strptime(to_ts, "%Y-%m-%dT%H:%MZ")
+        from_ts = int(from_ts.timestamp())
+        to_ts = int(to_ts.timestamp())
         range_query = {
             "range": {
                 "ts": {
@@ -247,7 +260,6 @@ def query_results(
     if sort:
         search_query["sort"] = sort
 
-
     pprint(search_query)
     results = run_query(
         client,
@@ -256,7 +268,7 @@ def query_results(
     )
     if not results:
         return False
-    #results = results.to_dict()
+    # results = results.to_dict()
     results_parsed = parse_results(results)
     if annotate:
         annotate_results(results_parsed)
@@ -280,4 +292,5 @@ def query_results(
         "card": results["hits"]["total"],
         "took": results["took"],
     }
-    return context
\ No newline at end of file
+    print("RTRN", context)
+    return context
diff --git a/core/lib/opensearch.py b/core/lib/opensearch.py
index 6a1c5a4..032cb1a 100644
--- a/core/lib/opensearch.py
+++ b/core/lib/opensearch.py
@@ -5,11 +5,10 @@ from django.conf import settings
 from opensearchpy import OpenSearch
 from opensearchpy.exceptions import NotFoundError, RequestError
 
-from core.lib.threshold import annotate_num_chans, annotate_num_users, annotate_online
-from core.views.helpers import dedup_list
-from datetime import datetime
 # from json import dumps
 # pp = lambda x: print(dumps(x, indent=2))
+from core.lib.processing import annotate_results, filter_blacklisted, parse_results
+from core.views.helpers import dedup_list
 
 
 def initialise_opensearch():
@@ -37,114 +36,6 @@ def initialise_opensearch():
 client = initialise_opensearch()
 
 
-def annotate_results(results_parsed):
-    """
-    Accept a list of dict objects, search for the number of channels and users.
-    Add them to the object.
-    Mutate it in place. Does not return anything.
-    """
-    # Figure out items with net (not discord)
-    nets = set()
-    for x in results_parsed:
-        if "net" in x:
-            nets.add(x["net"])
-
-    for net in nets:
-        # Annotate the online attribute from Threshold
-        nicks = list(
-            set(
-                [
-                    x["nick"]
-                    for x in results_parsed
-                    if {"nick", "src", "net"}.issubset(x)
-                    and x["src"] == "irc"
-                    and x["net"] == net
-                ]
-            )
-        )
-        channels = list(
-            set(
-                [
-                    x["channel"]
-                    for x in results_parsed
-                    if {"channel", "src", "net"}.issubset(x)
-                    and x["src"] == "irc"
-                    and x["net"] == net
-                ]
-            )
-        )
-        online_info = annotate_online(net, nicks)
-        # Annotate the number of users in the channel
-        num_users = annotate_num_users(net, channels)
-        # Annotate the number channels the user is on
-        num_chans = annotate_num_chans(net, nicks)
-        for item in results_parsed:
-            if "net" in item:
-                if item["net"] == net:
-                    if "nick" in item:
-                        if item["nick"] in online_info:
-                            item["online"] = online_info[item["nick"]]
-                    if "channel" in item:
-                        if item["channel"] in num_users:
-                            item["num_users"] = num_users[item["channel"]]
-                    if "nick" in item:
-                        if item["nick"] in num_chans:
-                            item["num_chans"] = num_chans[item["nick"]]
-
-
-def filter_blacklisted(user, response):
-    """
-    Low level filter to take the raw OpenSearch response and remove
-    objects from it we want to keep secret.
-    Does not return, the object is mutated in place.
-    """
-    response["redacted"] = 0
-    response["exemption"] = None
-    if user.is_superuser:
-        response["exemption"] = True
-    # is_anonymous = isinstance(user, AnonymousUser)
-    # For every hit from ES
-    for index, item in enumerate(list(response["hits"]["hits"])):
-        # For every blacklisted type
-        for blacklisted_type in settings.OPENSEARCH_BLACKLISTED.keys():
-            # Check this field we are matching exists
-            if "_source" in item.keys():
-                data_index = "_source"
-            elif "fields" in item.keys():
-                data_index = "fields"
-            else:
-                return False
-            if blacklisted_type in item[data_index].keys():
-                content = item[data_index][blacklisted_type]
-                # For every item in the blacklisted array for the type
-                for blacklisted_item in settings.OPENSEARCH_BLACKLISTED[
-                    blacklisted_type
-                ]:
-                    if blacklisted_item == str(content):
-                        # Remove the item
-                        if item in response["hits"]["hits"]:
-                            # Let the UI know something was redacted
-                            if (
-                                "exemption"
-                                not in response["hits"]["hits"][index][data_index]
-                            ):
-                                response["redacted"] += 1
-                            # Anonymous
-                            if user.is_anonymous:
-                                # Just set it to none so the index is not off
-                                response["hits"]["hits"][index] = None
-                            else:
-                                if not user.has_perm("core.bypass_blacklist"):
-                                    response["hits"]["hits"][index] = None
-                                else:
-                                    response["hits"]["hits"][index][data_index][
-                                        "exemption"
-                                    ] = True
-
-    # Actually get rid of all the things we set to None
-    response["hits"]["hits"] = [hit for hit in response["hits"]["hits"] if hit]
-
-
 def construct_query(query, size, use_query_string=True, tokens=False):
     """
     Accept some query parameters and construct an OpenSearch query.
@@ -233,54 +124,6 @@ def run_main_query(client, user, query, custom_query=False, index=None, size=Non
     return response
 
 
-def parse_results(results):
-    results_parsed = []
-    stringify = ["host", "channel"]
-    if "hits" in results.keys():
-        if "hits" in results["hits"]:
-            for item in results["hits"]["hits"]:
-                if "_source" in item.keys():
-                    data_index = "_source"
-                elif "fields" in item.keys():
-                    data_index = "fields"
-                else:
-                    return False
-                element = item[data_index]
-                for field in stringify:
-                    if field in element:
-                        element[field] = str(element[field])
-                # Why are fields in lists...
-                if data_index == "fields":
-                    element = {k: v[0] for k, v in element.items() if len(v)}
-                element["id"] = item["_id"]
-
-                # Split the timestamp into date and time
-                if "ts" not in element:
-                    if "time" in element:  # will fix data later
-                        ts = element["time"]
-                        del element["time"]
-                        element["ts"] = ts
-                if "ts" in element:
-                    if isinstance(element["ts"], str):
-                        ts = element["ts"]
-                    else:
-                        ts = datetime.utcfromtimestamp(element["ts"]).strftime('%Y-%m-%dT%H:%M:%S')
-                    ts_spl = ts.split("T")
-                    date = ts_spl[0]
-                    time = ts_spl[1]
-                    element["date"] = date
-                    if "." in time:
-                        time_spl = time.split(".")
-                        if len(time_spl) == 2:
-                            element["time"] = time.split(".")[0]
-                        else:
-                            element["time"] = time
-                    else:
-                        element["time"] = time
-                results_parsed.append(element)
-    return results_parsed
-
-
 def query_results(
     request,
     query_params,
diff --git a/core/lib/processing.py b/core/lib/processing.py
new file mode 100644
index 0000000..d3607dc
--- /dev/null
+++ b/core/lib/processing.py
@@ -0,0 +1,164 @@
+from core.lib.threshold import annotate_num_chans, annotate_num_users, annotate_online
+from django.conf import settings
+from datetime import datetime
+
+
+def annotate_results(results_parsed):
+    """
+    Accept a list of dict objects, search for the number of channels and users.
+    Add them to the object.
+    Mutate it in place. Does not return anything.
+    """
+    # Figure out items with net (not discord)
+    nets = set()
+    for x in results_parsed:
+        if "net" in x:
+            nets.add(x["net"])
+
+    for net in nets:
+        # Annotate the online attribute from Threshold
+        nicks = list(
+            set(
+                [
+                    x["nick"]
+                    for x in results_parsed
+                    if {"nick", "src", "net"}.issubset(x)
+                    and x["src"] == "irc"
+                    and x["net"] == net
+                ]
+            )
+        )
+        channels = list(
+            set(
+                [
+                    x["channel"]
+                    for x in results_parsed
+                    if {"channel", "src", "net"}.issubset(x)
+                    and x["src"] == "irc"
+                    and x["net"] == net
+                ]
+            )
+        )
+        online_info = annotate_online(net, nicks)
+        # Annotate the number of users in the channel
+        num_users = annotate_num_users(net, channels)
+        # Annotate the number channels the user is on
+        num_chans = annotate_num_chans(net, nicks)
+        for item in results_parsed:
+            if "net" in item:
+                if item["net"] == net:
+                    if "nick" in item:
+                        if item["nick"] in online_info:
+                            item["online"] = online_info[item["nick"]]
+                    if "channel" in item:
+                        if item["channel"] in num_users:
+                            item["num_users"] = num_users[item["channel"]]
+                    if "nick" in item:
+                        if item["nick"] in num_chans:
+                            item["num_chans"] = num_chans[item["nick"]]
+
+
+def filter_blacklisted(user, response):
+    """
+    Low level filter to take the raw OpenSearch response and remove
+    objects from it we want to keep secret.
+    Does not return, the object is mutated in place.
+    """
+    response["redacted"] = 0
+    response["exemption"] = None
+    if user.is_superuser:
+        response["exemption"] = True
+    # is_anonymous = isinstance(user, AnonymousUser)
+    # For every hit from ES
+    for index, item in enumerate(list(response["hits"]["hits"])):
+        # For every blacklisted type
+        for blacklisted_type in settings.OPENSEARCH_BLACKLISTED.keys():
+            # Check this field we are matching exists
+            if "_source" in item.keys():
+                data_index = "_source"
+            elif "fields" in item.keys():
+                data_index = "fields"
+            else:
+                return False
+            if blacklisted_type in item[data_index].keys():
+                content = item[data_index][blacklisted_type]
+                # For every item in the blacklisted array for the type
+                for blacklisted_item in settings.OPENSEARCH_BLACKLISTED[
+                    blacklisted_type
+                ]:
+                    if blacklisted_item == str(content):
+                        # Remove the item
+                        if item in response["hits"]["hits"]:
+                            # Let the UI know something was redacted
+                            if (
+                                "exemption"
+                                not in response["hits"]["hits"][index][data_index]
+                            ):
+                                response["redacted"] += 1
+                            # Anonymous
+                            if user.is_anonymous:
+                                # Just set it to none so the index is not off
+                                response["hits"]["hits"][index] = None
+                            else:
+                                if not user.has_perm("core.bypass_blacklist"):
+                                    response["hits"]["hits"][index] = None
+                                else:
+                                    response["hits"]["hits"][index][data_index][
+                                        "exemption"
+                                    ] = True
+
+    # Actually get rid of all the things we set to None
+    response["hits"]["hits"] = [hit for hit in response["hits"]["hits"] if hit]
+
+
+def parse_results(results):
+    results_parsed = []
+    stringify = ["host", "channel"]
+    if "hits" in results.keys():
+        if "hits" in results["hits"]:
+            for item in results["hits"]["hits"]:
+                if "_source" in item.keys():
+                    data_index = "_source"
+                elif "fields" in item.keys():
+                    data_index = "fields"
+                else:
+                    return False
+                element = item[data_index]
+                for field in stringify:
+                    if field in element:
+                        element[field] = str(element[field])
+                # Why are fields in lists...
+                if data_index == "fields":
+                    element = {k: v[0] for k, v in element.items() if len(v)}
+                element["id"] = item["_id"]
+
+                # Remove empty values
+                for field in list(element.keys()):
+                    if element[field] == "":
+                        del element[field]
+
+                # Split the timestamp into date and time
+                if "ts" not in element:
+                    if "time" in element:  # will fix data later
+                        ts = element["time"]
+                        del element["time"]
+                        element["ts"] = ts
+                if "ts" in element:
+                    if isinstance(element["ts"], str):
+                        ts = element["ts"]
+                    else:
+                        ts = datetime.utcfromtimestamp(element["ts"]).strftime('%Y-%m-%dT%H:%M:%S')
+                    ts_spl = ts.split("T")
+                    date = ts_spl[0]
+                    time = ts_spl[1]
+                    element["date"] = date
+                    if "." in time:
+                        time_spl = time.split(".")
+                        if len(time_spl) == 2:
+                            element["time"] = time.split(".")[0]
+                        else:
+                            element["time"] = time
+                    else:
+                        element["time"] = time
+                results_parsed.append(element)
+    return results_parsed
\ No newline at end of file
diff --git a/core/templates/ui/drilldown/search_partial.html b/core/templates/ui/drilldown/search_partial.html
index e67bec5..a6da2fd 100644
--- a/core/templates/ui/drilldown/search_partial.html
+++ b/core/templates/ui/drilldown/search_partial.html
@@ -17,7 +17,7 @@
             value="{{ params.query }}"
             class="input"
             type="text"
-            placeholder="Token search: (science | tech | art) + (interest) -hello">
+            placeholder="Search something">
           <span class="icon is-small is-left">
             <i class="fas fa-magnifying-glass"></i>
           </span>
@@ -76,28 +76,6 @@
             </a>
           </p>
         </div>
-        <div class="control">
-          <div class="field rounded-tooltip">
-            <input
-              id="full_query"
-              type="checkbox"
-              class="switch is-rounded is-info"
-              {% if params.query_full is not None %}checked="checked"{% else %}none{% endif %}
-              {% if False %}
-                {# what are you looking at? #}
-                disabled
-              {% endif %}
-              data-script="on click toggle .is-hidden on #query_full">
-            <label
-              for="full_query">
-              Full query
-            </label>
-            {% if False %}
-              {# what are you looking at? #}
-              <span class="tooltiptext tag is-danger is-light">No access</span>
-            {% endif %}
-          </div>
-        </div>
       </div>
       <div class="column is-narrow">
         <div class="field has-addons block">
@@ -406,23 +384,6 @@
       </div>
     </div>
   </div>
-  <div id="query_full" class="block {% if params.query_full is None %}is-hidden{% endif %}">
-    <div class="control is-expanded has-icons-left">
-      <input
-        hx-post="{% url 'search' %}"
-        hx-trigger="keyup changed delay:200ms"
-        hx-target="#results"
-        hx-swap="innerHTML"
-        name="query_full"
-        value="{{ params.query_full }}"
-        class="input"
-        type="text"
-        placeholder="Full query: msg: science AND src: 4ch AND channel: 100293">
-      <span class="icon is-small is-left">
-        <i class="fas fa-magnifying-glass"></i>
-      </span>
-    </div>
-  </div>
   <div class="block">
     <input
       hx-trigger="change"
diff --git a/core/views/ui/drilldown.py b/core/views/ui/drilldown.py
index 8bf39cb..77b014d 100644
--- a/core/views/ui/drilldown.py
+++ b/core/views/ui/drilldown.py
@@ -12,7 +12,8 @@ from rest_framework.parsers import FormParser
 from rest_framework.views import APIView
 
 from core.lib.context import construct_query
-#from core.lib.opensearch import query_results
+
+# from core.lib.opensearch import query_results
 from core.lib.manticore import query_results
 from core.lib.threshold import (
     annotate_num_chans,
@@ -367,6 +368,8 @@ class DrilldownContextModal(APIView):
             if query_params["type"] not in ["znc", "auth"]:
                 annotate = True
         # Create the query with the context helper
+        if query_params["num"].isdigit():
+            query_params["num"] = int(query_params["num"])
         search_query = construct_query(
             query_params["index"],
             query_params["net"],
@@ -403,6 +406,7 @@ class DrilldownContextModal(APIView):
         # for index, item in enumerate(results["object_list"]):
         #    results["object_list"][index]["time"] = item["time"]+"SSS"
         unique = str(uuid.uuid4())[:8]
+        print("PARAMS", query_params)
         context = {
             "net": query_params["net"],
             "channel": query_params["channel"],