Implement more efficient and accurate search algorithm
This commit is contained in:
parent
589ee6d735
commit
f0f7ceb8d1
|
@ -53,4 +53,9 @@ THRESHOLD_API_KEY = "name"
|
||||||
THRESHOLD_API_TOKEN = "token"
|
THRESHOLD_API_TOKEN = "token"
|
||||||
THRESHOLD_API_COUNTER = "counter"
|
THRESHOLD_API_COUNTER = "counter"
|
||||||
|
|
||||||
|
# NickTrace
|
||||||
|
NICKTRACE_MAX_ITERATIONS = 4
|
||||||
|
NICKTRACE_MAX_CHUNK_SIZE = 500
|
||||||
|
NICKTRACE_QUERY_SIZE = 10000
|
||||||
|
|
||||||
DEBUG = True
|
DEBUG = True
|
||||||
|
|
|
@ -1,14 +1,20 @@
|
||||||
|
from math import ceil
|
||||||
|
|
||||||
|
from django.conf import settings
|
||||||
|
from numpy import array_split
|
||||||
|
|
||||||
from core.lib.opensearch import client, run_main_query
|
from core.lib.opensearch import client, run_main_query
|
||||||
|
|
||||||
|
|
||||||
def get_nicks(request, net, nick, iter=0):
|
def construct_query(net, nicks):
|
||||||
"""
|
# Construct the query
|
||||||
Get all related nicknames of the given nickname by tracking nickname changes.
|
query_nicks = [{"match": {"nick": x}} for x in nicks]
|
||||||
"""
|
query_users = [{"match": {"user": x}} for x in nicks]
|
||||||
print("GET NICKS INIT", net, nick, iter)
|
query_should = query_nicks + query_users
|
||||||
|
# print("QUERY SHOULD", query_should)
|
||||||
# Get the initial query
|
# Get the initial query
|
||||||
query = {
|
query = {
|
||||||
"size": 10000,
|
"size": settings.NICKTRACE_QUERY_SIZE,
|
||||||
"query": {
|
"query": {
|
||||||
"bool": {
|
"bool": {
|
||||||
"must": [
|
"must": [
|
||||||
|
@ -16,55 +22,71 @@ def get_nicks(request, net, nick, iter=0):
|
||||||
{"match": {"type": "nick"}},
|
{"match": {"type": "nick"}},
|
||||||
{
|
{
|
||||||
"bool": {
|
"bool": {
|
||||||
"should": [
|
"should": query_should,
|
||||||
{"match": {"nick": nick}},
|
|
||||||
{"match": {"user": nick}},
|
|
||||||
]
|
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
results = run_main_query(client, request.user, query, custom_query=True)
|
return query
|
||||||
|
|
||||||
|
|
||||||
|
def get_nicks(request, net, nicks, iter=True):
|
||||||
|
"""
|
||||||
|
Get all related nicknames of the given nickname by tracking nickname changes.
|
||||||
|
"""
|
||||||
|
|
||||||
|
# Split query into chunks
|
||||||
|
split_nicks = array_split(
|
||||||
|
nicks, ceil(len(nicks) / settings.NICKTRACE_MAX_CHUNK_SIZE)
|
||||||
|
)
|
||||||
nicks = []
|
nicks = []
|
||||||
if "hits" in results.keys():
|
for nicks_chunked in split_nicks:
|
||||||
if "hits" in results["hits"]:
|
if len(nicks_chunked) == 0:
|
||||||
for item in results["hits"]["hits"]:
|
break
|
||||||
element = item["_source"]
|
query = construct_query(net, nicks_chunked)
|
||||||
element["id"] = item["_id"]
|
results = run_main_query(client, request.user, query, custom_query=True)
|
||||||
|
if "hits" in results.keys():
|
||||||
|
if "hits" in results["hits"]:
|
||||||
|
for item in results["hits"]["hits"]:
|
||||||
|
element = item["_source"]
|
||||||
|
element["id"] = item["_id"]
|
||||||
|
|
||||||
# Split the timestamp into date and time
|
# Split the timestamp into date and time
|
||||||
ts = element["ts"]
|
ts = element["ts"]
|
||||||
ts_spl = ts.split("T")
|
ts_spl = ts.split("T")
|
||||||
date = ts_spl[0]
|
date = ts_spl[0]
|
||||||
time = ts_spl[1]
|
time = ts_spl[1]
|
||||||
element["date"] = date
|
element["date"] = date
|
||||||
element["time"] = time
|
element["time"] = time
|
||||||
if element["nick"] not in nicks:
|
if element["nick"] not in nicks:
|
||||||
nicks.append(element["nick"])
|
nicks.append(element["nick"])
|
||||||
if element["user"] not in nicks:
|
if element["user"] not in nicks:
|
||||||
nicks.append(element["user"])
|
nicks.append(element["user"])
|
||||||
|
|
||||||
|
# Run the search again, passing in all the users we found
|
||||||
|
|
||||||
|
# Nicknames we find from the repeated search
|
||||||
|
nicks_searched = []
|
||||||
|
if iter:
|
||||||
|
nicks_l2 = []
|
||||||
|
loop = 0
|
||||||
|
while loop < settings.NICKTRACE_MAX_ITERATIONS:
|
||||||
|
loop += 1
|
||||||
|
nicks_not_searched = [x for x in nicks if x not in nicks_searched]
|
||||||
|
nicks_l2 = get_nicks(request, net, nicks, False)
|
||||||
|
|
||||||
|
# Add all the nicks we just searched for to the list
|
||||||
|
for x in nicks_not_searched:
|
||||||
|
if x not in nicks_not_searched:
|
||||||
|
nicks_searched.append(x)
|
||||||
|
|
||||||
|
# If all of the nicks we received now, we already know about
|
||||||
|
if set(nicks_l2).issubset(set(nicks)):
|
||||||
|
break
|
||||||
|
for x in nicks_l2:
|
||||||
|
if x not in nicks:
|
||||||
|
nicks.append(x)
|
||||||
|
|
||||||
# if iter < 2:
|
|
||||||
# iter += 1
|
|
||||||
# collect_nicks = []
|
|
||||||
# for x in nicks:
|
|
||||||
# nicks_2 = get_nicks(request, net, x, iter)
|
|
||||||
# print("NICKS_2", nicks_2)
|
|
||||||
# for y in nicks_2:
|
|
||||||
# if y not in collect_nicks:
|
|
||||||
# collect_nicks.append(y)
|
|
||||||
# print("RETURN NICKS", nick, collect_nicks)
|
|
||||||
# for x in collect_nicks:
|
|
||||||
# if x not in nicks:
|
|
||||||
# nicks.append(x)
|
|
||||||
# else:
|
|
||||||
# print("ABORTING SEARCH")
|
|
||||||
return nicks
|
return nicks
|
||||||
# results = set()
|
|
||||||
# nicks = query["nicks"]
|
|
||||||
# for nick in nicks:
|
|
||||||
# if nick not in results:
|
|
||||||
# nicks_result = get_nicks(request, net_nick)
|
|
||||||
# results.add(nick)
|
|
||||||
|
|
|
@ -66,7 +66,9 @@ class InsightsNicks(LoginRequiredMixin, APIView):
|
||||||
return HttpResponse("No nick")
|
return HttpResponse("No nick")
|
||||||
net = request.data["net"]
|
net = request.data["net"]
|
||||||
nick = request.data["nick"]
|
nick = request.data["nick"]
|
||||||
nicks = get_nicks(request, net, nick)
|
nicks = get_nicks(request, net, [nick])
|
||||||
|
# Filter Guest
|
||||||
|
nicks = [x for x in nicks if not x.startswith("Guest")]
|
||||||
online = annotate_online(net, nicks)
|
online = annotate_online(net, nicks)
|
||||||
if not nicks:
|
if not nicks:
|
||||||
return HttpResponseForbidden()
|
return HttpResponseForbidden()
|
||||||
|
|
Loading…
Reference in New Issue