diff --git a/app/local_settings.example.py b/app/local_settings.example.py index 71509ab..713fd6e 100644 --- a/app/local_settings.example.py +++ b/app/local_settings.example.py @@ -10,6 +10,11 @@ OPENSEARCH_INDEX_META = "meta" OPENSEARCH_MAIN_SEARCH_FIELDS = ["msg"] +OPENSEARCH_BLACKLISTED = { + "msg": ["example.com"], + "nick": ["me"], +} + # URLs DOMAIN = "example.com" URL = f"https://{DOMAIN}" diff --git a/core/lib/opensearch.py b/core/lib/opensearch.py index 2626eb3..cfbda5e 100644 --- a/core/lib/opensearch.py +++ b/core/lib/opensearch.py @@ -1,6 +1,10 @@ +import pprint + from django.conf import settings from opensearchpy import OpenSearch +pp = pprint.PrettyPrinter(indent=4) + def initialise_opensearch(): auth = (settings.OPENSEARCH_USERNAME, settings.OPENSEARCH_PASSWORD) @@ -57,9 +61,33 @@ def construct_query(query, fields, results): return query +def filter_blacklisted(response): + pp.pprint(response["hits"]["hits"]) + print("LEN", len(response["hits"]["hits"])) + response["redacted"] = 0 + # For every hit from ES + for item in list(response["hits"]["hits"]): + # For every blacklisted type + for blacklisted_type in settings.OPENSEARCH_BLACKLISTED.keys(): + # Check this field we are matching exists + if blacklisted_type in item["_source"].keys(): + content = item["_source"][blacklisted_type] + # For every item in the blacklisted array for the type + for blacklisted_item in settings.OPENSEARCH_BLACKLISTED[ + blacklisted_type + ]: + if blacklisted_item in str(content): + # Remove the item + if item in response["hits"]["hits"]: + response["hits"]["hits"].remove(item) + # Let the UI know something was redacted + response["redacted"] += 1 + + def run_main_query(client, query, fields=None, results=None): search_query = construct_query(query, fields, results) # fmt: off response = client.search(body=search_query, index=settings.OPENSEARCH_INDEX_MAIN) + filter_blacklisted(response) return response diff --git a/core/templates/ui/drilldown.html b/core/templates/ui/drilldown.html index 5700988..47153b1 100644 --- a/core/templates/ui/drilldown.html +++ b/core/templates/ui/drilldown.html @@ -126,6 +126,7 @@

{{ card }} hits

+

{{ redacted }} redacted

{{ took }}ms

{% endif %} diff --git a/core/ui/views/drilldown.py b/core/ui/views/drilldown.py index 9ebbb69..90518bc 100644 --- a/core/ui/views/drilldown.py +++ b/core/ui/views/drilldown.py @@ -27,16 +27,14 @@ class Drilldown(LoginRequiredMixin, View): fields = None if "fields" in request.POST: fields = request.POST.getlist("fields") - print("FIELD", fields) if "results" in request.POST: results = request.POST["results"] - print("RESULTS", results) if "query" in request.POST: query = request.POST["query"] # field = results.POST["field"] # print("FIELD ", field) results = run_main_query(client, query, fields, results) - pp.pprint(results) + # pp.pprint(results) results_parsed = [] if "hits" in results.keys(): if "hits" in results["hits"]: @@ -47,6 +45,7 @@ class Drilldown(LoginRequiredMixin, View): "results": results_parsed, "card": results["hits"]["total"]["value"], "took": results["took"], + "redacted": results["redacted"], "fields": settings.OPENSEARCH_MAIN_SEARCH_FIELDS, } return render(request, self.template_name, context)