Implement Insights page
This commit is contained in:
@@ -2,8 +2,13 @@ from django.conf import settings
|
||||
from opensearchpy import OpenSearch
|
||||
from opensearchpy.exceptions import RequestError
|
||||
|
||||
from core.lib.threshold import annotate_num_chans, annotate_num_users, annotate_online
|
||||
|
||||
|
||||
def initialise_opensearch():
|
||||
"""
|
||||
Inititialise the OpenSearch API endpoint.
|
||||
"""
|
||||
auth = (settings.OPENSEARCH_USERNAME, settings.OPENSEARCH_PASSWORD)
|
||||
client = OpenSearch(
|
||||
# fmt: off
|
||||
@@ -22,9 +27,157 @@ def initialise_opensearch():
|
||||
return client
|
||||
|
||||
|
||||
def construct_query(query, fields, size):
|
||||
if not fields:
|
||||
fields = settings.OPENSEARCH_MAIN_SEARCH_FIELDS
|
||||
client = initialise_opensearch()
|
||||
|
||||
|
||||
def annotate_results(results_parsed):
|
||||
"""
|
||||
Accept a list of dict objects, search for the number of channels and users.
|
||||
Add them to the object.
|
||||
Mutate it in place. Does not return anything.
|
||||
"""
|
||||
# Figure out items with net (not discord)
|
||||
nets = set()
|
||||
for x in results_parsed:
|
||||
if "net" in x:
|
||||
nets.add(x["net"])
|
||||
|
||||
for net in nets:
|
||||
# Annotate the online attribute from Threshold
|
||||
online_info = annotate_online(
|
||||
net, [x["nick"] for x in results_parsed if x["src"] == "irc"]
|
||||
)
|
||||
# Annotate the number of users in the channel
|
||||
num_users = annotate_num_users(
|
||||
net, [x["channel"] for x in results_parsed if x["src"] == "irc"]
|
||||
)
|
||||
# Annotate the number channels the user is on
|
||||
num_chans = annotate_num_chans(
|
||||
net, [x["nick"] for x in results_parsed if x["src"] == "irc"]
|
||||
)
|
||||
for item in results_parsed:
|
||||
if item["nick"] in online_info:
|
||||
item["online"] = online_info[item["nick"]]
|
||||
if item["channel"] in num_users:
|
||||
item["num_users"] = num_users[item["channel"]]
|
||||
if item["nick"] in num_chans:
|
||||
item["num_chans"] = num_chans[item["nick"]]
|
||||
|
||||
|
||||
def filter_blacklisted(user, response):
|
||||
"""
|
||||
Low level filter to take the raw OpenSearch response and remove
|
||||
objects from it we want to keep secret.
|
||||
Does not return, the object is mutated in place.
|
||||
"""
|
||||
response["redacted"] = 0
|
||||
response["exemption"] = None
|
||||
# For every hit from ES
|
||||
for item in list(response["hits"]["hits"]):
|
||||
# For every blacklisted type
|
||||
for blacklisted_type in settings.OPENSEARCH_BLACKLISTED.keys():
|
||||
# Check this field we are matching exists
|
||||
if blacklisted_type in item["_source"].keys():
|
||||
content = item["_source"][blacklisted_type]
|
||||
# For every item in the blacklisted array for the type
|
||||
for blacklisted_item in settings.OPENSEARCH_BLACKLISTED[
|
||||
blacklisted_type
|
||||
]:
|
||||
if blacklisted_item in str(content):
|
||||
# Remove the item
|
||||
if item in response["hits"]["hits"]:
|
||||
if not user.is_superuser:
|
||||
response["hits"]["hits"].remove(item)
|
||||
# Let the UI know something was redacted
|
||||
response["redacted"] += 1
|
||||
response["exemption"] = True
|
||||
|
||||
|
||||
def run_main_query(client, user, query, size=None):
|
||||
"""
|
||||
Low level helper to run an ES query.
|
||||
Accept a user to pass it to the filter, so we can
|
||||
avoid filtering for superusers.
|
||||
Accept fields and size, for the fields we want to match and the
|
||||
number of results to return.
|
||||
"""
|
||||
search_query = construct_query(query, size)
|
||||
try:
|
||||
response = client.search(
|
||||
body=search_query, index=settings.OPENSEARCH_INDEX_MAIN
|
||||
)
|
||||
except RequestError:
|
||||
print("REQUEST ERROR")
|
||||
return False
|
||||
filter_blacklisted(user, response)
|
||||
return response
|
||||
|
||||
|
||||
def query_results(request, size=None):
|
||||
"""
|
||||
API helper to alter the OpenSearch return format into something
|
||||
a bit better to parse.
|
||||
Accept a HTTP request object. Run the query, and annotate the
|
||||
results with the other data we have.
|
||||
"""
|
||||
if not size:
|
||||
if "size" in request.POST:
|
||||
size = request.POST["size"]
|
||||
if size not in settings.OPENSEARCH_MAIN_SIZES:
|
||||
return False
|
||||
if "query" in request.POST:
|
||||
query = request.POST["query"]
|
||||
results = run_main_query(
|
||||
client,
|
||||
request.user,
|
||||
query,
|
||||
size,
|
||||
)
|
||||
if not results:
|
||||
return False
|
||||
results_parsed = []
|
||||
if "hits" in results.keys():
|
||||
if "hits" in results["hits"]:
|
||||
for item in results["hits"]["hits"]:
|
||||
element = item["_source"]
|
||||
element["id"] = item["_id"]
|
||||
|
||||
# Split the timestamp into date and time
|
||||
ts = element["ts"]
|
||||
ts_spl = ts.split("T")
|
||||
date = ts_spl[0]
|
||||
time = ts_spl[1]
|
||||
element["date"] = date
|
||||
element["time"] = time
|
||||
results_parsed.append(element)
|
||||
|
||||
annotate_results(results_parsed)
|
||||
|
||||
context = {
|
||||
"query": query,
|
||||
"results": results_parsed,
|
||||
"card": results["hits"]["total"]["value"],
|
||||
"took": results["took"],
|
||||
"redacted": results["redacted"],
|
||||
"exemption": results["exemption"],
|
||||
}
|
||||
return context
|
||||
|
||||
|
||||
def query_single_result(request):
|
||||
context = query_results(request, 1)
|
||||
dedup_set = {item["nick"] for item in context["results"]}
|
||||
if len(dedup_set) == 1:
|
||||
context["item"] = context["results"][0]
|
||||
else:
|
||||
return (len(dedup_set), context)
|
||||
return (1, context)
|
||||
|
||||
|
||||
def construct_query(query, size):
|
||||
"""
|
||||
Accept some query parameters and construct an OpenSearch query.
|
||||
"""
|
||||
if not size:
|
||||
size = 5
|
||||
query = {
|
||||
@@ -32,7 +185,7 @@ def construct_query(query, fields, size):
|
||||
"query": {
|
||||
"query_string": {
|
||||
"query": query,
|
||||
"fields": fields,
|
||||
# "fields": fields,
|
||||
# "default_field": "msg",
|
||||
# "type": "best_fields",
|
||||
"fuzziness": "AUTO",
|
||||
@@ -63,46 +216,3 @@ def construct_query(query, fields, size):
|
||||
],
|
||||
}
|
||||
return query
|
||||
|
||||
|
||||
def filter_blacklisted(user, response):
|
||||
response["redacted"] = 0
|
||||
response["exemption"] = None
|
||||
# For every hit from ES
|
||||
for item in list(response["hits"]["hits"]):
|
||||
# For every blacklisted type
|
||||
for blacklisted_type in settings.OPENSEARCH_BLACKLISTED.keys():
|
||||
# Check this field we are matching exists
|
||||
if blacklisted_type in item["_source"].keys():
|
||||
content = item["_source"][blacklisted_type]
|
||||
# For every item in the blacklisted array for the type
|
||||
for blacklisted_item in settings.OPENSEARCH_BLACKLISTED[
|
||||
blacklisted_type
|
||||
]:
|
||||
if blacklisted_item in str(content):
|
||||
# Remove the item
|
||||
if item in response["hits"]["hits"]:
|
||||
if not user.is_superuser:
|
||||
response["hits"]["hits"].remove(item)
|
||||
# Let the UI know something was redacted
|
||||
response["redacted"] += 1
|
||||
response["exemption"] = True
|
||||
|
||||
|
||||
def run_main_query(client, user, query, fields=None, size=None):
|
||||
if fields:
|
||||
for field in fields:
|
||||
if field not in settings.OPENSEARCH_MAIN_SEARCH_FIELDS:
|
||||
return False
|
||||
if size:
|
||||
if size not in settings.OPENSEARCH_MAIN_SIZES:
|
||||
return False
|
||||
search_query = construct_query(query, fields, size)
|
||||
# fmt: off
|
||||
try:
|
||||
response = client.search(body=search_query,
|
||||
index=settings.OPENSEARCH_INDEX_MAIN)
|
||||
except RequestError:
|
||||
return False
|
||||
filter_blacklisted(user, response)
|
||||
return response
|
||||
|
||||
Reference in New Issue
Block a user