From b50ef76c2d1005a9f3e9d81b61f3d64a114eef37 Mon Sep 17 00:00:00 2001 From: Mark Veidemanis Date: Thu, 21 Jul 2022 13:52:41 +0100 Subject: [PATCH] Implement meta search --- app/local_settings.example.py | 5 ++ core/lib/meta.py | 94 ++++++++++++++++++++++++++++ core/lib/nicktrace.py | 5 +- core/lib/opensearch.py | 8 +-- core/templates/ui/insights/meta.html | 75 +++++++++++++++++----- core/views/dynamic/insights.py | 19 +++++- 6 files changed, 184 insertions(+), 22 deletions(-) create mode 100644 core/lib/meta.py diff --git a/app/local_settings.example.py b/app/local_settings.example.py index 38237a6..d36c7f8 100644 --- a/app/local_settings.example.py +++ b/app/local_settings.example.py @@ -58,4 +58,9 @@ NICKTRACE_MAX_ITERATIONS = 4 NICKTRACE_MAX_CHUNK_SIZE = 500 NICKTRACE_QUERY_SIZE = 10000 +# Meta +META_MAX_ITERATIONS = 4 +META_MAX_CHUNK_SIZE = 500 +META_QUERY_SIZE = 10000 + DEBUG = True diff --git a/core/lib/meta.py b/core/lib/meta.py new file mode 100644 index 0000000..b1cf783 --- /dev/null +++ b/core/lib/meta.py @@ -0,0 +1,94 @@ +from math import ceil + +from django.conf import settings +from numpy import array_split + +from core.lib.opensearch import client, run_main_query + + +def construct_query(net, nicks): + # Construct the query + query_nicks = [{"match": {"nick": x}} for x in nicks] + query_should = query_nicks + # print("QUERY SHOULD", query_should) + # Get the initial query + query = { + "size": settings.META_QUERY_SIZE, + "query": { + "bool": { + "must": [ + {"match": {"net": net}}, + {"match": {"type": "who"}}, + { + "bool": { + "should": query_should, + } + }, + ] + } + }, + } + return query + + +def get_meta(request, net, nicks, iter=True): + """ + Get all related nicknames of the given nickname by tracking nickname changes. + """ + + # Split query into chunks + split_nicks = array_split( + nicks, ceil(len(nicks) / settings.META_MAX_CHUNK_SIZE) + ) + meta = [] + for nicks_chunked in split_nicks: + if len(nicks_chunked) == 0: + break + meta_tmp = [] + query = construct_query(net, nicks_chunked) + results = run_main_query(client, request.user, query, + custom_query=True, index=settings.OPENSEARCH_INDEX_META) + if "hits" in results.keys(): + if "hits" in results["hits"]: + for item in results["hits"]["hits"]: + element = item["_source"] + element["id"] = item["_id"] + + # Split the timestamp into date and time + ts = element["ts"] + ts_spl = ts.split("T") + date = ts_spl[0] + time = ts_spl[1] + element["date"] = date + element["time"] = time + meta_tmp.append(element) + for x in meta_tmp: + if x not in meta: + meta.append(x) + print("GOT META", meta) + + # Run the search again, passing in all the users we found + + # Nicknames we find from the repeated search + # nicks_searched = [] + # if iter: + # nicks_l2 = [] + # loop = 0 + # while loop < settings.META_MAX_ITERATIONS: + # loop += 1 + # nicks_not_searched = [x for x in nicks if x not in nicks_searched] + # nicks_l2 = get_nicks(request, net, nicks, False) + + # # Add all the nicks we just searched for to the list + # for x in nicks_not_searched: + # if x not in nicks_not_searched: + # nicks_searched.append(x) + + # # If all of the nicks we received now, we already know about + # if set(nicks_l2).issubset(set(nicks)): + # break + # for x in nicks_l2: + # if x not in nicks: + # nicks.append(x) + + return meta diff --git a/core/lib/nicktrace.py b/core/lib/nicktrace.py index af7e801..8c0137f 100644 --- a/core/lib/nicktrace.py +++ b/core/lib/nicktrace.py @@ -41,7 +41,7 @@ def get_nicks(request, net, nicks, iter=True): split_nicks = array_split( nicks, ceil(len(nicks) / settings.NICKTRACE_MAX_CHUNK_SIZE) ) - nicks = [] + nicks = [*nicks] for nicks_chunked in split_nicks: if len(nicks_chunked) == 0: break @@ -66,7 +66,6 @@ def get_nicks(request, net, nicks, iter=True): nicks.append(element["user"]) # Run the search again, passing in all the users we found - # Nicknames we find from the repeated search nicks_searched = [] if iter: @@ -75,6 +74,8 @@ def get_nicks(request, net, nicks, iter=True): while loop < settings.NICKTRACE_MAX_ITERATIONS: loop += 1 nicks_not_searched = [x for x in nicks if x not in nicks_searched] + if not nicks_not_searched: + break nicks_l2 = get_nicks(request, net, nicks, False) # Add all the nicks we just searched for to the list diff --git a/core/lib/opensearch.py b/core/lib/opensearch.py index 31331de..9ebe79c 100644 --- a/core/lib/opensearch.py +++ b/core/lib/opensearch.py @@ -93,7 +93,7 @@ def filter_blacklisted(user, response): response["exemption"] = True -def run_main_query(client, user, query, custom_query=False, size=None): +def run_main_query(client, user, query, custom_query=False, index=None, size=None): """ Low level helper to run an ES query. Accept a user to pass it to the filter, so we can @@ -101,14 +101,14 @@ def run_main_query(client, user, query, custom_query=False, size=None): Accept fields and size, for the fields we want to match and the number of results to return. """ + if not index: + index = settings.OPENSEARCH_INDEX_MAIN if custom_query: search_query = query else: search_query = construct_query(query, size) try: - response = client.search( - body=search_query, index=settings.OPENSEARCH_INDEX_MAIN - ) + response = client.search(body=search_query, index=index) except RequestError: print("REQUEST ERROR") return False diff --git a/core/templates/ui/insights/meta.html b/core/templates/ui/insights/meta.html index fd971c7..f1c55ad 100644 --- a/core/templates/ui/insights/meta.html +++ b/core/templates/ui/insights/meta.html @@ -1,19 +1,64 @@
- - - - -
- - bbb - -
+
+ {% for item in meta %} + + + {% if 'ts' in item %} + + + + + {% endif %} + {% if 'nick' in item %} + + + + + {% endif %} + {% if 'ident' in item %} + + + + + {% endif %} + {% if 'host' in item %} + + + + + {% endif %} + {% if 'realname' in item %} + + + + + {% endif %} + {% if 'status' in item %} + + + + + {% endif %} + {% if 'server' in item %} + + + + + {% endif %} + {% if 'channel' in item %} + + + + + {% endif %} + +
ts +

{{ item.date }}

+

{{ item.time }}

+
nick{{ item.nick }}
ident{{ item.ident }}
host{{ item.host }}
realname{{ item.realname }}
status{{ item.status }}
server{{ item.server }}
channel{{ item.channel }}
+
+ {% endfor %} +
\ No newline at end of file diff --git a/core/views/dynamic/insights.py b/core/views/dynamic/insights.py index 950d431..ce5e9b3 100644 --- a/core/views/dynamic/insights.py +++ b/core/views/dynamic/insights.py @@ -5,6 +5,7 @@ from django.views import View from rest_framework.parsers import FormParser from rest_framework.views import APIView +from core.lib.meta import get_meta from core.lib.nicktrace import get_nicks from core.lib.opensearch import query_single_result from core.lib.threshold import ( @@ -90,8 +91,24 @@ class InsightsMeta(LoginRequiredMixin, APIView): return HttpResponse("No nick") net = request.data["net"] nick = request.data["nick"] + meta = get_meta(request, net, [nick]) + unique_values = {} + for x in meta: + for k, v in x.items(): + if k not in unique_values: + unique_values[k] = set() + unique_values[k].add(v) + meta_dedup = [] + for x in meta: + meta_l2 = {} + for k, v in x.items(): + if v in unique_values[k]: + meta_l2[k] = v + unique_values[k].remove(v) + if not set(meta_l2.keys()).issubset(set(["ts", "time", "date", "id"])): + meta_dedup.append(meta_l2) - context = {"net": net, "nick": nick} + context = {"net": net, "nick": nick, "meta": meta_dedup} return render(request, self.template_name, context)