Implement redaction for search results
This commit is contained in:
parent
dfabddb6b1
commit
2362048cc7
|
@ -10,6 +10,11 @@ OPENSEARCH_INDEX_META = "meta"
|
|||
|
||||
OPENSEARCH_MAIN_SEARCH_FIELDS = ["msg"]
|
||||
|
||||
OPENSEARCH_BLACKLISTED = {
|
||||
"msg": ["example.com"],
|
||||
"nick": ["me"],
|
||||
}
|
||||
|
||||
# URLs
|
||||
DOMAIN = "example.com"
|
||||
URL = f"https://{DOMAIN}"
|
||||
|
|
|
@ -1,6 +1,10 @@
|
|||
import pprint
|
||||
|
||||
from django.conf import settings
|
||||
from opensearchpy import OpenSearch
|
||||
|
||||
pp = pprint.PrettyPrinter(indent=4)
|
||||
|
||||
|
||||
def initialise_opensearch():
|
||||
auth = (settings.OPENSEARCH_USERNAME, settings.OPENSEARCH_PASSWORD)
|
||||
|
@ -57,9 +61,33 @@ def construct_query(query, fields, results):
|
|||
return query
|
||||
|
||||
|
||||
def filter_blacklisted(response):
|
||||
pp.pprint(response["hits"]["hits"])
|
||||
print("LEN", len(response["hits"]["hits"]))
|
||||
response["redacted"] = 0
|
||||
# For every hit from ES
|
||||
for item in list(response["hits"]["hits"]):
|
||||
# For every blacklisted type
|
||||
for blacklisted_type in settings.OPENSEARCH_BLACKLISTED.keys():
|
||||
# Check this field we are matching exists
|
||||
if blacklisted_type in item["_source"].keys():
|
||||
content = item["_source"][blacklisted_type]
|
||||
# For every item in the blacklisted array for the type
|
||||
for blacklisted_item in settings.OPENSEARCH_BLACKLISTED[
|
||||
blacklisted_type
|
||||
]:
|
||||
if blacklisted_item in str(content):
|
||||
# Remove the item
|
||||
if item in response["hits"]["hits"]:
|
||||
response["hits"]["hits"].remove(item)
|
||||
# Let the UI know something was redacted
|
||||
response["redacted"] += 1
|
||||
|
||||
|
||||
def run_main_query(client, query, fields=None, results=None):
|
||||
search_query = construct_query(query, fields, results)
|
||||
# fmt: off
|
||||
response = client.search(body=search_query,
|
||||
index=settings.OPENSEARCH_INDEX_MAIN)
|
||||
filter_blacklisted(response)
|
||||
return response
|
||||
|
|
|
@ -126,6 +126,7 @@
|
|||
</div>
|
||||
</div>
|
||||
<p>{{ card }} hits</p>
|
||||
<p>{{ redacted }} redacted</p>
|
||||
<p>{{ took }}ms</p>
|
||||
{% endif %}
|
||||
|
||||
|
|
|
@ -27,16 +27,14 @@ class Drilldown(LoginRequiredMixin, View):
|
|||
fields = None
|
||||
if "fields" in request.POST:
|
||||
fields = request.POST.getlist("fields")
|
||||
print("FIELD", fields)
|
||||
if "results" in request.POST:
|
||||
results = request.POST["results"]
|
||||
print("RESULTS", results)
|
||||
if "query" in request.POST:
|
||||
query = request.POST["query"]
|
||||
# field = results.POST["field"]
|
||||
# print("FIELD ", field)
|
||||
results = run_main_query(client, query, fields, results)
|
||||
pp.pprint(results)
|
||||
# pp.pprint(results)
|
||||
results_parsed = []
|
||||
if "hits" in results.keys():
|
||||
if "hits" in results["hits"]:
|
||||
|
@ -47,6 +45,7 @@ class Drilldown(LoginRequiredMixin, View):
|
|||
"results": results_parsed,
|
||||
"card": results["hits"]["total"]["value"],
|
||||
"took": results["took"],
|
||||
"redacted": results["redacted"],
|
||||
"fields": settings.OPENSEARCH_MAIN_SEARCH_FIELDS,
|
||||
}
|
||||
return render(request, self.template_name, context)
|
||||
|
|
Loading…
Reference in New Issue