Implement more efficient and accurate search algorithm
This commit is contained in:
parent
589ee6d735
commit
f0f7ceb8d1
|
@ -53,4 +53,9 @@ THRESHOLD_API_KEY = "name"
|
|||
THRESHOLD_API_TOKEN = "token"
|
||||
THRESHOLD_API_COUNTER = "counter"
|
||||
|
||||
# NickTrace
|
||||
NICKTRACE_MAX_ITERATIONS = 4
|
||||
NICKTRACE_MAX_CHUNK_SIZE = 500
|
||||
NICKTRACE_QUERY_SIZE = 10000
|
||||
|
||||
DEBUG = True
|
||||
|
|
|
@ -1,14 +1,20 @@
|
|||
from math import ceil
|
||||
|
||||
from django.conf import settings
|
||||
from numpy import array_split
|
||||
|
||||
from core.lib.opensearch import client, run_main_query
|
||||
|
||||
|
||||
def get_nicks(request, net, nick, iter=0):
|
||||
"""
|
||||
Get all related nicknames of the given nickname by tracking nickname changes.
|
||||
"""
|
||||
print("GET NICKS INIT", net, nick, iter)
|
||||
def construct_query(net, nicks):
|
||||
# Construct the query
|
||||
query_nicks = [{"match": {"nick": x}} for x in nicks]
|
||||
query_users = [{"match": {"user": x}} for x in nicks]
|
||||
query_should = query_nicks + query_users
|
||||
# print("QUERY SHOULD", query_should)
|
||||
# Get the initial query
|
||||
query = {
|
||||
"size": 10000,
|
||||
"size": settings.NICKTRACE_QUERY_SIZE,
|
||||
"query": {
|
||||
"bool": {
|
||||
"must": [
|
||||
|
@ -16,55 +22,71 @@ def get_nicks(request, net, nick, iter=0):
|
|||
{"match": {"type": "nick"}},
|
||||
{
|
||||
"bool": {
|
||||
"should": [
|
||||
{"match": {"nick": nick}},
|
||||
{"match": {"user": nick}},
|
||||
]
|
||||
"should": query_should,
|
||||
}
|
||||
},
|
||||
]
|
||||
}
|
||||
},
|
||||
}
|
||||
results = run_main_query(client, request.user, query, custom_query=True)
|
||||
return query
|
||||
|
||||
|
||||
def get_nicks(request, net, nicks, iter=True):
|
||||
"""
|
||||
Get all related nicknames of the given nickname by tracking nickname changes.
|
||||
"""
|
||||
|
||||
# Split query into chunks
|
||||
split_nicks = array_split(
|
||||
nicks, ceil(len(nicks) / settings.NICKTRACE_MAX_CHUNK_SIZE)
|
||||
)
|
||||
nicks = []
|
||||
if "hits" in results.keys():
|
||||
if "hits" in results["hits"]:
|
||||
for item in results["hits"]["hits"]:
|
||||
element = item["_source"]
|
||||
element["id"] = item["_id"]
|
||||
for nicks_chunked in split_nicks:
|
||||
if len(nicks_chunked) == 0:
|
||||
break
|
||||
query = construct_query(net, nicks_chunked)
|
||||
results = run_main_query(client, request.user, query, custom_query=True)
|
||||
if "hits" in results.keys():
|
||||
if "hits" in results["hits"]:
|
||||
for item in results["hits"]["hits"]:
|
||||
element = item["_source"]
|
||||
element["id"] = item["_id"]
|
||||
|
||||
# Split the timestamp into date and time
|
||||
ts = element["ts"]
|
||||
ts_spl = ts.split("T")
|
||||
date = ts_spl[0]
|
||||
time = ts_spl[1]
|
||||
element["date"] = date
|
||||
element["time"] = time
|
||||
if element["nick"] not in nicks:
|
||||
nicks.append(element["nick"])
|
||||
if element["user"] not in nicks:
|
||||
nicks.append(element["user"])
|
||||
# Split the timestamp into date and time
|
||||
ts = element["ts"]
|
||||
ts_spl = ts.split("T")
|
||||
date = ts_spl[0]
|
||||
time = ts_spl[1]
|
||||
element["date"] = date
|
||||
element["time"] = time
|
||||
if element["nick"] not in nicks:
|
||||
nicks.append(element["nick"])
|
||||
if element["user"] not in nicks:
|
||||
nicks.append(element["user"])
|
||||
|
||||
# Run the search again, passing in all the users we found
|
||||
|
||||
# Nicknames we find from the repeated search
|
||||
nicks_searched = []
|
||||
if iter:
|
||||
nicks_l2 = []
|
||||
loop = 0
|
||||
while loop < settings.NICKTRACE_MAX_ITERATIONS:
|
||||
loop += 1
|
||||
nicks_not_searched = [x for x in nicks if x not in nicks_searched]
|
||||
nicks_l2 = get_nicks(request, net, nicks, False)
|
||||
|
||||
# Add all the nicks we just searched for to the list
|
||||
for x in nicks_not_searched:
|
||||
if x not in nicks_not_searched:
|
||||
nicks_searched.append(x)
|
||||
|
||||
# If all of the nicks we received now, we already know about
|
||||
if set(nicks_l2).issubset(set(nicks)):
|
||||
break
|
||||
for x in nicks_l2:
|
||||
if x not in nicks:
|
||||
nicks.append(x)
|
||||
|
||||
# if iter < 2:
|
||||
# iter += 1
|
||||
# collect_nicks = []
|
||||
# for x in nicks:
|
||||
# nicks_2 = get_nicks(request, net, x, iter)
|
||||
# print("NICKS_2", nicks_2)
|
||||
# for y in nicks_2:
|
||||
# if y not in collect_nicks:
|
||||
# collect_nicks.append(y)
|
||||
# print("RETURN NICKS", nick, collect_nicks)
|
||||
# for x in collect_nicks:
|
||||
# if x not in nicks:
|
||||
# nicks.append(x)
|
||||
# else:
|
||||
# print("ABORTING SEARCH")
|
||||
return nicks
|
||||
# results = set()
|
||||
# nicks = query["nicks"]
|
||||
# for nick in nicks:
|
||||
# if nick not in results:
|
||||
# nicks_result = get_nicks(request, net_nick)
|
||||
# results.add(nick)
|
||||
|
|
|
@ -66,7 +66,9 @@ class InsightsNicks(LoginRequiredMixin, APIView):
|
|||
return HttpResponse("No nick")
|
||||
net = request.data["net"]
|
||||
nick = request.data["nick"]
|
||||
nicks = get_nicks(request, net, nick)
|
||||
nicks = get_nicks(request, net, [nick])
|
||||
# Filter Guest
|
||||
nicks = [x for x in nicks if not x.startswith("Guest")]
|
||||
online = annotate_online(net, nicks)
|
||||
if not nicks:
|
||||
return HttpResponseForbidden()
|
||||
|
|
Loading…
Reference in New Issue