Implement meta search

This commit is contained in:
Mark Veidemanis 2022-07-21 13:52:41 +01:00
parent e7dba44736
commit b50ef76c2d
Signed by: m
GPG Key ID: 5ACFCEED46C0904F
6 changed files with 184 additions and 22 deletions

View File

@ -58,4 +58,9 @@ NICKTRACE_MAX_ITERATIONS = 4
NICKTRACE_MAX_CHUNK_SIZE = 500 NICKTRACE_MAX_CHUNK_SIZE = 500
NICKTRACE_QUERY_SIZE = 10000 NICKTRACE_QUERY_SIZE = 10000
# Meta
META_MAX_ITERATIONS = 4
META_MAX_CHUNK_SIZE = 500
META_QUERY_SIZE = 10000
DEBUG = True DEBUG = True

94
core/lib/meta.py Normal file
View File

@ -0,0 +1,94 @@
from math import ceil
from django.conf import settings
from numpy import array_split
from core.lib.opensearch import client, run_main_query
def construct_query(net, nicks):
# Construct the query
query_nicks = [{"match": {"nick": x}} for x in nicks]
query_should = query_nicks
# print("QUERY SHOULD", query_should)
# Get the initial query
query = {
"size": settings.META_QUERY_SIZE,
"query": {
"bool": {
"must": [
{"match": {"net": net}},
{"match": {"type": "who"}},
{
"bool": {
"should": query_should,
}
},
]
}
},
}
return query
def get_meta(request, net, nicks, iter=True):
"""
Get all related nicknames of the given nickname by tracking nickname changes.
"""
# Split query into chunks
split_nicks = array_split(
nicks, ceil(len(nicks) / settings.META_MAX_CHUNK_SIZE)
)
meta = []
for nicks_chunked in split_nicks:
if len(nicks_chunked) == 0:
break
meta_tmp = []
query = construct_query(net, nicks_chunked)
results = run_main_query(client, request.user, query,
custom_query=True, index=settings.OPENSEARCH_INDEX_META)
if "hits" in results.keys():
if "hits" in results["hits"]:
for item in results["hits"]["hits"]:
element = item["_source"]
element["id"] = item["_id"]
# Split the timestamp into date and time
ts = element["ts"]
ts_spl = ts.split("T")
date = ts_spl[0]
time = ts_spl[1]
element["date"] = date
element["time"] = time
meta_tmp.append(element)
for x in meta_tmp:
if x not in meta:
meta.append(x)
print("GOT META", meta)
# Run the search again, passing in all the users we found
# Nicknames we find from the repeated search
# nicks_searched = []
# if iter:
# nicks_l2 = []
# loop = 0
# while loop < settings.META_MAX_ITERATIONS:
# loop += 1
# nicks_not_searched = [x for x in nicks if x not in nicks_searched]
# nicks_l2 = get_nicks(request, net, nicks, False)
# # Add all the nicks we just searched for to the list
# for x in nicks_not_searched:
# if x not in nicks_not_searched:
# nicks_searched.append(x)
# # If all of the nicks we received now, we already know about
# if set(nicks_l2).issubset(set(nicks)):
# break
# for x in nicks_l2:
# if x not in nicks:
# nicks.append(x)
return meta

View File

@ -41,7 +41,7 @@ def get_nicks(request, net, nicks, iter=True):
split_nicks = array_split( split_nicks = array_split(
nicks, ceil(len(nicks) / settings.NICKTRACE_MAX_CHUNK_SIZE) nicks, ceil(len(nicks) / settings.NICKTRACE_MAX_CHUNK_SIZE)
) )
nicks = [] nicks = [*nicks]
for nicks_chunked in split_nicks: for nicks_chunked in split_nicks:
if len(nicks_chunked) == 0: if len(nicks_chunked) == 0:
break break
@ -66,7 +66,6 @@ def get_nicks(request, net, nicks, iter=True):
nicks.append(element["user"]) nicks.append(element["user"])
# Run the search again, passing in all the users we found # Run the search again, passing in all the users we found
# Nicknames we find from the repeated search # Nicknames we find from the repeated search
nicks_searched = [] nicks_searched = []
if iter: if iter:
@ -75,6 +74,8 @@ def get_nicks(request, net, nicks, iter=True):
while loop < settings.NICKTRACE_MAX_ITERATIONS: while loop < settings.NICKTRACE_MAX_ITERATIONS:
loop += 1 loop += 1
nicks_not_searched = [x for x in nicks if x not in nicks_searched] nicks_not_searched = [x for x in nicks if x not in nicks_searched]
if not nicks_not_searched:
break
nicks_l2 = get_nicks(request, net, nicks, False) nicks_l2 = get_nicks(request, net, nicks, False)
# Add all the nicks we just searched for to the list # Add all the nicks we just searched for to the list

View File

@ -93,7 +93,7 @@ def filter_blacklisted(user, response):
response["exemption"] = True response["exemption"] = True
def run_main_query(client, user, query, custom_query=False, size=None): def run_main_query(client, user, query, custom_query=False, index=None, size=None):
""" """
Low level helper to run an ES query. Low level helper to run an ES query.
Accept a user to pass it to the filter, so we can Accept a user to pass it to the filter, so we can
@ -101,14 +101,14 @@ def run_main_query(client, user, query, custom_query=False, size=None):
Accept fields and size, for the fields we want to match and the Accept fields and size, for the fields we want to match and the
number of results to return. number of results to return.
""" """
if not index:
index = settings.OPENSEARCH_INDEX_MAIN
if custom_query: if custom_query:
search_query = query search_query = query
else: else:
search_query = construct_query(query, size) search_query = construct_query(query, size)
try: try:
response = client.search( response = client.search(body=search_query, index=index)
body=search_query, index=settings.OPENSEARCH_INDEX_MAIN
)
except RequestError: except RequestError:
print("REQUEST ERROR") print("REQUEST ERROR")
return False return False

View File

@ -1,19 +1,64 @@
<div id="meta"> <div id="meta">
<div class="content" style="max-height: 30em; overflow: auto;"> <div class="content" style="max-height: 30em; overflow: auto;">
<table class="table is-fullwidth is-hoverable"> <div class="table-container">
<tr> {% for item in meta %}
<td> <table class="table is-striped is-fullwidth is-hoverable">
<span <tbody>
hx-headers='{"X-CSRFToken": "{{ csrf_token }}"}' {% if 'ts' in item %}
hx-post="{% url 'modal_drilldown' %}" <tr>
hx-vals='{"net": "{{ net }}", "nick": "{{ nick }}", "channel": "{{ chan }}"}' <th>ts</th>
hx-target="#modals-here" <td>
hx-trigger="click" <p>{{ item.date }}</p>
class="button is-small"> <p>{{ item.time }}</p>
bbb </td>
</span> </tr>
</td> {% endif %}
</tr> {% if 'nick' in item %}
</table> <tr>
<th>nick</th>
<td>{{ item.nick }}</td>
</tr>
{% endif %}
{% if 'ident' in item %}
<tr>
<th>ident</th>
<td>{{ item.ident }}</td>
</tr>
{% endif %}
{% if 'host' in item %}
<tr>
<th>host</th>
<td>{{ item.host }}</td>
</tr>
{% endif %}
{% if 'realname' in item %}
<tr>
<th>realname</th>
<td>{{ item.realname }}</td>
</tr>
{% endif %}
{% if 'status' in item %}
<tr>
<th>status</th>
<td>{{ item.status }}</td>
</tr>
{% endif %}
{% if 'server' in item %}
<tr>
<th>server</th>
<td>{{ item.server }}</td>
</tr>
{% endif %}
{% if 'channel' in item %}
<tr>
<th>channel</th>
<td>{{ item.channel }}</td>
</tr>
{% endif %}
</tbody>
</table>
<hr/>
{% endfor %}
</div>
</div> </div>
</div> </div>

View File

@ -5,6 +5,7 @@ from django.views import View
from rest_framework.parsers import FormParser from rest_framework.parsers import FormParser
from rest_framework.views import APIView from rest_framework.views import APIView
from core.lib.meta import get_meta
from core.lib.nicktrace import get_nicks from core.lib.nicktrace import get_nicks
from core.lib.opensearch import query_single_result from core.lib.opensearch import query_single_result
from core.lib.threshold import ( from core.lib.threshold import (
@ -90,8 +91,24 @@ class InsightsMeta(LoginRequiredMixin, APIView):
return HttpResponse("No nick") return HttpResponse("No nick")
net = request.data["net"] net = request.data["net"]
nick = request.data["nick"] nick = request.data["nick"]
meta = get_meta(request, net, [nick])
unique_values = {}
for x in meta:
for k, v in x.items():
if k not in unique_values:
unique_values[k] = set()
unique_values[k].add(v)
meta_dedup = []
for x in meta:
meta_l2 = {}
for k, v in x.items():
if v in unique_values[k]:
meta_l2[k] = v
unique_values[k].remove(v)
if not set(meta_l2.keys()).issubset(set(["ts", "time", "date", "id"])):
meta_dedup.append(meta_l2)
context = {"net": net, "nick": nick} context = {"net": net, "nick": nick, "meta": meta_dedup}
return render(request, self.template_name, context) return render(request, self.template_name, context)