Implement more efficient and accurate search algorithm

This commit is contained in:
Mark Veidemanis 2022-07-21 13:52:34 +01:00
parent 589ee6d735
commit f0f7ceb8d1
Signed by: m
GPG Key ID: 5ACFCEED46C0904F
3 changed files with 78 additions and 49 deletions

View File

@ -53,4 +53,9 @@ THRESHOLD_API_KEY = "name"
THRESHOLD_API_TOKEN = "token" THRESHOLD_API_TOKEN = "token"
THRESHOLD_API_COUNTER = "counter" THRESHOLD_API_COUNTER = "counter"
# NickTrace
NICKTRACE_MAX_ITERATIONS = 4
NICKTRACE_MAX_CHUNK_SIZE = 500
NICKTRACE_QUERY_SIZE = 10000
DEBUG = True DEBUG = True

View File

@ -1,14 +1,20 @@
from math import ceil
from django.conf import settings
from numpy import array_split
from core.lib.opensearch import client, run_main_query from core.lib.opensearch import client, run_main_query
def get_nicks(request, net, nick, iter=0): def construct_query(net, nicks):
""" # Construct the query
Get all related nicknames of the given nickname by tracking nickname changes. query_nicks = [{"match": {"nick": x}} for x in nicks]
""" query_users = [{"match": {"user": x}} for x in nicks]
print("GET NICKS INIT", net, nick, iter) query_should = query_nicks + query_users
# print("QUERY SHOULD", query_should)
# Get the initial query # Get the initial query
query = { query = {
"size": 10000, "size": settings.NICKTRACE_QUERY_SIZE,
"query": { "query": {
"bool": { "bool": {
"must": [ "must": [
@ -16,18 +22,31 @@ def get_nicks(request, net, nick, iter=0):
{"match": {"type": "nick"}}, {"match": {"type": "nick"}},
{ {
"bool": { "bool": {
"should": [ "should": query_should,
{"match": {"nick": nick}},
{"match": {"user": nick}},
]
} }
}, },
] ]
} }
}, },
} }
results = run_main_query(client, request.user, query, custom_query=True) return query
def get_nicks(request, net, nicks, iter=True):
"""
Get all related nicknames of the given nickname by tracking nickname changes.
"""
# Split query into chunks
split_nicks = array_split(
nicks, ceil(len(nicks) / settings.NICKTRACE_MAX_CHUNK_SIZE)
)
nicks = [] nicks = []
for nicks_chunked in split_nicks:
if len(nicks_chunked) == 0:
break
query = construct_query(net, nicks_chunked)
results = run_main_query(client, request.user, query, custom_query=True)
if "hits" in results.keys(): if "hits" in results.keys():
if "hits" in results["hits"]: if "hits" in results["hits"]:
for item in results["hits"]["hits"]: for item in results["hits"]["hits"]:
@ -46,25 +65,28 @@ def get_nicks(request, net, nick, iter=0):
if element["user"] not in nicks: if element["user"] not in nicks:
nicks.append(element["user"]) nicks.append(element["user"])
# if iter < 2: # Run the search again, passing in all the users we found
# iter += 1
# collect_nicks = [] # Nicknames we find from the repeated search
# for x in nicks: nicks_searched = []
# nicks_2 = get_nicks(request, net, x, iter) if iter:
# print("NICKS_2", nicks_2) nicks_l2 = []
# for y in nicks_2: loop = 0
# if y not in collect_nicks: while loop < settings.NICKTRACE_MAX_ITERATIONS:
# collect_nicks.append(y) loop += 1
# print("RETURN NICKS", nick, collect_nicks) nicks_not_searched = [x for x in nicks if x not in nicks_searched]
# for x in collect_nicks: nicks_l2 = get_nicks(request, net, nicks, False)
# if x not in nicks:
# nicks.append(x) # Add all the nicks we just searched for to the list
# else: for x in nicks_not_searched:
# print("ABORTING SEARCH") if x not in nicks_not_searched:
nicks_searched.append(x)
# If all of the nicks we received now, we already know about
if set(nicks_l2).issubset(set(nicks)):
break
for x in nicks_l2:
if x not in nicks:
nicks.append(x)
return nicks return nicks
# results = set()
# nicks = query["nicks"]
# for nick in nicks:
# if nick not in results:
# nicks_result = get_nicks(request, net_nick)
# results.add(nick)

View File

@ -66,7 +66,9 @@ class InsightsNicks(LoginRequiredMixin, APIView):
return HttpResponse("No nick") return HttpResponse("No nick")
net = request.data["net"] net = request.data["net"]
nick = request.data["nick"] nick = request.data["nick"]
nicks = get_nicks(request, net, nick) nicks = get_nicks(request, net, [nick])
# Filter Guest
nicks = [x for x in nicks if not x.startswith("Guest")]
online = annotate_online(net, nicks) online = annotate_online(net, nicks)
if not nicks: if not nicks:
return HttpResponseForbidden() return HttpResponseForbidden()