Implement meta search

This commit is contained in:
Mark Veidemanis 2022-07-21 13:52:41 +01:00
parent e7dba44736
commit b50ef76c2d
Signed by: m
GPG Key ID: 5ACFCEED46C0904F
6 changed files with 184 additions and 22 deletions

View File

@ -58,4 +58,9 @@ NICKTRACE_MAX_ITERATIONS = 4
NICKTRACE_MAX_CHUNK_SIZE = 500
NICKTRACE_QUERY_SIZE = 10000
# Meta
META_MAX_ITERATIONS = 4
META_MAX_CHUNK_SIZE = 500
META_QUERY_SIZE = 10000
DEBUG = True

94
core/lib/meta.py Normal file
View File

@ -0,0 +1,94 @@
from math import ceil
from django.conf import settings
from numpy import array_split
from core.lib.opensearch import client, run_main_query
def construct_query(net, nicks):
# Construct the query
query_nicks = [{"match": {"nick": x}} for x in nicks]
query_should = query_nicks
# print("QUERY SHOULD", query_should)
# Get the initial query
query = {
"size": settings.META_QUERY_SIZE,
"query": {
"bool": {
"must": [
{"match": {"net": net}},
{"match": {"type": "who"}},
{
"bool": {
"should": query_should,
}
},
]
}
},
}
return query
def get_meta(request, net, nicks, iter=True):
"""
Get all related nicknames of the given nickname by tracking nickname changes.
"""
# Split query into chunks
split_nicks = array_split(
nicks, ceil(len(nicks) / settings.META_MAX_CHUNK_SIZE)
)
meta = []
for nicks_chunked in split_nicks:
if len(nicks_chunked) == 0:
break
meta_tmp = []
query = construct_query(net, nicks_chunked)
results = run_main_query(client, request.user, query,
custom_query=True, index=settings.OPENSEARCH_INDEX_META)
if "hits" in results.keys():
if "hits" in results["hits"]:
for item in results["hits"]["hits"]:
element = item["_source"]
element["id"] = item["_id"]
# Split the timestamp into date and time
ts = element["ts"]
ts_spl = ts.split("T")
date = ts_spl[0]
time = ts_spl[1]
element["date"] = date
element["time"] = time
meta_tmp.append(element)
for x in meta_tmp:
if x not in meta:
meta.append(x)
print("GOT META", meta)
# Run the search again, passing in all the users we found
# Nicknames we find from the repeated search
# nicks_searched = []
# if iter:
# nicks_l2 = []
# loop = 0
# while loop < settings.META_MAX_ITERATIONS:
# loop += 1
# nicks_not_searched = [x for x in nicks if x not in nicks_searched]
# nicks_l2 = get_nicks(request, net, nicks, False)
# # Add all the nicks we just searched for to the list
# for x in nicks_not_searched:
# if x not in nicks_not_searched:
# nicks_searched.append(x)
# # If all of the nicks we received now, we already know about
# if set(nicks_l2).issubset(set(nicks)):
# break
# for x in nicks_l2:
# if x not in nicks:
# nicks.append(x)
return meta

View File

@ -41,7 +41,7 @@ def get_nicks(request, net, nicks, iter=True):
split_nicks = array_split(
nicks, ceil(len(nicks) / settings.NICKTRACE_MAX_CHUNK_SIZE)
)
nicks = []
nicks = [*nicks]
for nicks_chunked in split_nicks:
if len(nicks_chunked) == 0:
break
@ -66,7 +66,6 @@ def get_nicks(request, net, nicks, iter=True):
nicks.append(element["user"])
# Run the search again, passing in all the users we found
# Nicknames we find from the repeated search
nicks_searched = []
if iter:
@ -75,6 +74,8 @@ def get_nicks(request, net, nicks, iter=True):
while loop < settings.NICKTRACE_MAX_ITERATIONS:
loop += 1
nicks_not_searched = [x for x in nicks if x not in nicks_searched]
if not nicks_not_searched:
break
nicks_l2 = get_nicks(request, net, nicks, False)
# Add all the nicks we just searched for to the list

View File

@ -93,7 +93,7 @@ def filter_blacklisted(user, response):
response["exemption"] = True
def run_main_query(client, user, query, custom_query=False, size=None):
def run_main_query(client, user, query, custom_query=False, index=None, size=None):
"""
Low level helper to run an ES query.
Accept a user to pass it to the filter, so we can
@ -101,14 +101,14 @@ def run_main_query(client, user, query, custom_query=False, size=None):
Accept fields and size, for the fields we want to match and the
number of results to return.
"""
if not index:
index = settings.OPENSEARCH_INDEX_MAIN
if custom_query:
search_query = query
else:
search_query = construct_query(query, size)
try:
response = client.search(
body=search_query, index=settings.OPENSEARCH_INDEX_MAIN
)
response = client.search(body=search_query, index=index)
except RequestError:
print("REQUEST ERROR")
return False

View File

@ -1,19 +1,64 @@
<div id="meta">
<div class="content" style="max-height: 30em; overflow: auto;">
<table class="table is-fullwidth is-hoverable">
<tr>
<td>
<span
hx-headers='{"X-CSRFToken": "{{ csrf_token }}"}'
hx-post="{% url 'modal_drilldown' %}"
hx-vals='{"net": "{{ net }}", "nick": "{{ nick }}", "channel": "{{ chan }}"}'
hx-target="#modals-here"
hx-trigger="click"
class="button is-small">
bbb
</span>
</td>
</tr>
</table>
<div class="table-container">
{% for item in meta %}
<table class="table is-striped is-fullwidth is-hoverable">
<tbody>
{% if 'ts' in item %}
<tr>
<th>ts</th>
<td>
<p>{{ item.date }}</p>
<p>{{ item.time }}</p>
</td>
</tr>
{% endif %}
{% if 'nick' in item %}
<tr>
<th>nick</th>
<td>{{ item.nick }}</td>
</tr>
{% endif %}
{% if 'ident' in item %}
<tr>
<th>ident</th>
<td>{{ item.ident }}</td>
</tr>
{% endif %}
{% if 'host' in item %}
<tr>
<th>host</th>
<td>{{ item.host }}</td>
</tr>
{% endif %}
{% if 'realname' in item %}
<tr>
<th>realname</th>
<td>{{ item.realname }}</td>
</tr>
{% endif %}
{% if 'status' in item %}
<tr>
<th>status</th>
<td>{{ item.status }}</td>
</tr>
{% endif %}
{% if 'server' in item %}
<tr>
<th>server</th>
<td>{{ item.server }}</td>
</tr>
{% endif %}
{% if 'channel' in item %}
<tr>
<th>channel</th>
<td>{{ item.channel }}</td>
</tr>
{% endif %}
</tbody>
</table>
<hr/>
{% endfor %}
</div>
</div>
</div>

View File

@ -5,6 +5,7 @@ from django.views import View
from rest_framework.parsers import FormParser
from rest_framework.views import APIView
from core.lib.meta import get_meta
from core.lib.nicktrace import get_nicks
from core.lib.opensearch import query_single_result
from core.lib.threshold import (
@ -90,8 +91,24 @@ class InsightsMeta(LoginRequiredMixin, APIView):
return HttpResponse("No nick")
net = request.data["net"]
nick = request.data["nick"]
meta = get_meta(request, net, [nick])
unique_values = {}
for x in meta:
for k, v in x.items():
if k not in unique_values:
unique_values[k] = set()
unique_values[k].add(v)
meta_dedup = []
for x in meta:
meta_l2 = {}
for k, v in x.items():
if v in unique_values[k]:
meta_l2[k] = v
unique_values[k].remove(v)
if not set(meta_l2.keys()).issubset(set(["ts", "time", "date", "id"])):
meta_dedup.append(meta_l2)
context = {"net": net, "nick": nick}
context = {"net": net, "nick": nick, "meta": meta_dedup}
return render(request, self.template_name, context)