Implement Insights page

This commit is contained in:
2022-07-21 13:51:55 +01:00
parent a56042376c
commit 185bda02ea
15 changed files with 652 additions and 274 deletions

View File

@@ -2,8 +2,13 @@ from django.conf import settings
from opensearchpy import OpenSearch
from opensearchpy.exceptions import RequestError
from core.lib.threshold import annotate_num_chans, annotate_num_users, annotate_online
def initialise_opensearch():
"""
Inititialise the OpenSearch API endpoint.
"""
auth = (settings.OPENSEARCH_USERNAME, settings.OPENSEARCH_PASSWORD)
client = OpenSearch(
# fmt: off
@@ -22,9 +27,157 @@ def initialise_opensearch():
return client
def construct_query(query, fields, size):
if not fields:
fields = settings.OPENSEARCH_MAIN_SEARCH_FIELDS
client = initialise_opensearch()
def annotate_results(results_parsed):
"""
Accept a list of dict objects, search for the number of channels and users.
Add them to the object.
Mutate it in place. Does not return anything.
"""
# Figure out items with net (not discord)
nets = set()
for x in results_parsed:
if "net" in x:
nets.add(x["net"])
for net in nets:
# Annotate the online attribute from Threshold
online_info = annotate_online(
net, [x["nick"] for x in results_parsed if x["src"] == "irc"]
)
# Annotate the number of users in the channel
num_users = annotate_num_users(
net, [x["channel"] for x in results_parsed if x["src"] == "irc"]
)
# Annotate the number channels the user is on
num_chans = annotate_num_chans(
net, [x["nick"] for x in results_parsed if x["src"] == "irc"]
)
for item in results_parsed:
if item["nick"] in online_info:
item["online"] = online_info[item["nick"]]
if item["channel"] in num_users:
item["num_users"] = num_users[item["channel"]]
if item["nick"] in num_chans:
item["num_chans"] = num_chans[item["nick"]]
def filter_blacklisted(user, response):
"""
Low level filter to take the raw OpenSearch response and remove
objects from it we want to keep secret.
Does not return, the object is mutated in place.
"""
response["redacted"] = 0
response["exemption"] = None
# For every hit from ES
for item in list(response["hits"]["hits"]):
# For every blacklisted type
for blacklisted_type in settings.OPENSEARCH_BLACKLISTED.keys():
# Check this field we are matching exists
if blacklisted_type in item["_source"].keys():
content = item["_source"][blacklisted_type]
# For every item in the blacklisted array for the type
for blacklisted_item in settings.OPENSEARCH_BLACKLISTED[
blacklisted_type
]:
if blacklisted_item in str(content):
# Remove the item
if item in response["hits"]["hits"]:
if not user.is_superuser:
response["hits"]["hits"].remove(item)
# Let the UI know something was redacted
response["redacted"] += 1
response["exemption"] = True
def run_main_query(client, user, query, size=None):
"""
Low level helper to run an ES query.
Accept a user to pass it to the filter, so we can
avoid filtering for superusers.
Accept fields and size, for the fields we want to match and the
number of results to return.
"""
search_query = construct_query(query, size)
try:
response = client.search(
body=search_query, index=settings.OPENSEARCH_INDEX_MAIN
)
except RequestError:
print("REQUEST ERROR")
return False
filter_blacklisted(user, response)
return response
def query_results(request, size=None):
"""
API helper to alter the OpenSearch return format into something
a bit better to parse.
Accept a HTTP request object. Run the query, and annotate the
results with the other data we have.
"""
if not size:
if "size" in request.POST:
size = request.POST["size"]
if size not in settings.OPENSEARCH_MAIN_SIZES:
return False
if "query" in request.POST:
query = request.POST["query"]
results = run_main_query(
client,
request.user,
query,
size,
)
if not results:
return False
results_parsed = []
if "hits" in results.keys():
if "hits" in results["hits"]:
for item in results["hits"]["hits"]:
element = item["_source"]
element["id"] = item["_id"]
# Split the timestamp into date and time
ts = element["ts"]
ts_spl = ts.split("T")
date = ts_spl[0]
time = ts_spl[1]
element["date"] = date
element["time"] = time
results_parsed.append(element)
annotate_results(results_parsed)
context = {
"query": query,
"results": results_parsed,
"card": results["hits"]["total"]["value"],
"took": results["took"],
"redacted": results["redacted"],
"exemption": results["exemption"],
}
return context
def query_single_result(request):
context = query_results(request, 1)
dedup_set = {item["nick"] for item in context["results"]}
if len(dedup_set) == 1:
context["item"] = context["results"][0]
else:
return (len(dedup_set), context)
return (1, context)
def construct_query(query, size):
"""
Accept some query parameters and construct an OpenSearch query.
"""
if not size:
size = 5
query = {
@@ -32,7 +185,7 @@ def construct_query(query, fields, size):
"query": {
"query_string": {
"query": query,
"fields": fields,
# "fields": fields,
# "default_field": "msg",
# "type": "best_fields",
"fuzziness": "AUTO",
@@ -63,46 +216,3 @@ def construct_query(query, fields, size):
],
}
return query
def filter_blacklisted(user, response):
response["redacted"] = 0
response["exemption"] = None
# For every hit from ES
for item in list(response["hits"]["hits"]):
# For every blacklisted type
for blacklisted_type in settings.OPENSEARCH_BLACKLISTED.keys():
# Check this field we are matching exists
if blacklisted_type in item["_source"].keys():
content = item["_source"][blacklisted_type]
# For every item in the blacklisted array for the type
for blacklisted_item in settings.OPENSEARCH_BLACKLISTED[
blacklisted_type
]:
if blacklisted_item in str(content):
# Remove the item
if item in response["hits"]["hits"]:
if not user.is_superuser:
response["hits"]["hits"].remove(item)
# Let the UI know something was redacted
response["redacted"] += 1
response["exemption"] = True
def run_main_query(client, user, query, fields=None, size=None):
if fields:
for field in fields:
if field not in settings.OPENSEARCH_MAIN_SEARCH_FIELDS:
return False
if size:
if size not in settings.OPENSEARCH_MAIN_SIZES:
return False
search_query = construct_query(query, fields, size)
# fmt: off
try:
response = client.search(body=search_query,
index=settings.OPENSEARCH_INDEX_MAIN)
except RequestError:
return False
filter_blacklisted(user, response)
return response