Implement redaction for search results

This commit is contained in:
2022-07-21 13:49:27 +01:00
parent dfabddb6b1
commit 2362048cc7
4 changed files with 36 additions and 3 deletions

View File

@@ -1,6 +1,10 @@
import pprint
from django.conf import settings
from opensearchpy import OpenSearch
pp = pprint.PrettyPrinter(indent=4)
def initialise_opensearch():
auth = (settings.OPENSEARCH_USERNAME, settings.OPENSEARCH_PASSWORD)
@@ -57,9 +61,33 @@ def construct_query(query, fields, results):
return query
def filter_blacklisted(response):
pp.pprint(response["hits"]["hits"])
print("LEN", len(response["hits"]["hits"]))
response["redacted"] = 0
# For every hit from ES
for item in list(response["hits"]["hits"]):
# For every blacklisted type
for blacklisted_type in settings.OPENSEARCH_BLACKLISTED.keys():
# Check this field we are matching exists
if blacklisted_type in item["_source"].keys():
content = item["_source"][blacklisted_type]
# For every item in the blacklisted array for the type
for blacklisted_item in settings.OPENSEARCH_BLACKLISTED[
blacklisted_type
]:
if blacklisted_item in str(content):
# Remove the item
if item in response["hits"]["hits"]:
response["hits"]["hits"].remove(item)
# Let the UI know something was redacted
response["redacted"] += 1
def run_main_query(client, query, fields=None, results=None):
search_query = construct_query(query, fields, results)
# fmt: off
response = client.search(body=search_query,
index=settings.OPENSEARCH_INDEX_MAIN)
filter_blacklisted(response)
return response