Implement redaction for search results
This commit is contained in:
parent
dfabddb6b1
commit
2362048cc7
|
@ -10,6 +10,11 @@ OPENSEARCH_INDEX_META = "meta"
|
||||||
|
|
||||||
OPENSEARCH_MAIN_SEARCH_FIELDS = ["msg"]
|
OPENSEARCH_MAIN_SEARCH_FIELDS = ["msg"]
|
||||||
|
|
||||||
|
OPENSEARCH_BLACKLISTED = {
|
||||||
|
"msg": ["example.com"],
|
||||||
|
"nick": ["me"],
|
||||||
|
}
|
||||||
|
|
||||||
# URLs
|
# URLs
|
||||||
DOMAIN = "example.com"
|
DOMAIN = "example.com"
|
||||||
URL = f"https://{DOMAIN}"
|
URL = f"https://{DOMAIN}"
|
||||||
|
|
|
@ -1,6 +1,10 @@
|
||||||
|
import pprint
|
||||||
|
|
||||||
from django.conf import settings
|
from django.conf import settings
|
||||||
from opensearchpy import OpenSearch
|
from opensearchpy import OpenSearch
|
||||||
|
|
||||||
|
pp = pprint.PrettyPrinter(indent=4)
|
||||||
|
|
||||||
|
|
||||||
def initialise_opensearch():
|
def initialise_opensearch():
|
||||||
auth = (settings.OPENSEARCH_USERNAME, settings.OPENSEARCH_PASSWORD)
|
auth = (settings.OPENSEARCH_USERNAME, settings.OPENSEARCH_PASSWORD)
|
||||||
|
@ -57,9 +61,33 @@ def construct_query(query, fields, results):
|
||||||
return query
|
return query
|
||||||
|
|
||||||
|
|
||||||
|
def filter_blacklisted(response):
|
||||||
|
pp.pprint(response["hits"]["hits"])
|
||||||
|
print("LEN", len(response["hits"]["hits"]))
|
||||||
|
response["redacted"] = 0
|
||||||
|
# For every hit from ES
|
||||||
|
for item in list(response["hits"]["hits"]):
|
||||||
|
# For every blacklisted type
|
||||||
|
for blacklisted_type in settings.OPENSEARCH_BLACKLISTED.keys():
|
||||||
|
# Check this field we are matching exists
|
||||||
|
if blacklisted_type in item["_source"].keys():
|
||||||
|
content = item["_source"][blacklisted_type]
|
||||||
|
# For every item in the blacklisted array for the type
|
||||||
|
for blacklisted_item in settings.OPENSEARCH_BLACKLISTED[
|
||||||
|
blacklisted_type
|
||||||
|
]:
|
||||||
|
if blacklisted_item in str(content):
|
||||||
|
# Remove the item
|
||||||
|
if item in response["hits"]["hits"]:
|
||||||
|
response["hits"]["hits"].remove(item)
|
||||||
|
# Let the UI know something was redacted
|
||||||
|
response["redacted"] += 1
|
||||||
|
|
||||||
|
|
||||||
def run_main_query(client, query, fields=None, results=None):
|
def run_main_query(client, query, fields=None, results=None):
|
||||||
search_query = construct_query(query, fields, results)
|
search_query = construct_query(query, fields, results)
|
||||||
# fmt: off
|
# fmt: off
|
||||||
response = client.search(body=search_query,
|
response = client.search(body=search_query,
|
||||||
index=settings.OPENSEARCH_INDEX_MAIN)
|
index=settings.OPENSEARCH_INDEX_MAIN)
|
||||||
|
filter_blacklisted(response)
|
||||||
return response
|
return response
|
||||||
|
|
|
@ -126,6 +126,7 @@
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
<p>{{ card }} hits</p>
|
<p>{{ card }} hits</p>
|
||||||
|
<p>{{ redacted }} redacted</p>
|
||||||
<p>{{ took }}ms</p>
|
<p>{{ took }}ms</p>
|
||||||
{% endif %}
|
{% endif %}
|
||||||
|
|
||||||
|
|
|
@ -27,16 +27,14 @@ class Drilldown(LoginRequiredMixin, View):
|
||||||
fields = None
|
fields = None
|
||||||
if "fields" in request.POST:
|
if "fields" in request.POST:
|
||||||
fields = request.POST.getlist("fields")
|
fields = request.POST.getlist("fields")
|
||||||
print("FIELD", fields)
|
|
||||||
if "results" in request.POST:
|
if "results" in request.POST:
|
||||||
results = request.POST["results"]
|
results = request.POST["results"]
|
||||||
print("RESULTS", results)
|
|
||||||
if "query" in request.POST:
|
if "query" in request.POST:
|
||||||
query = request.POST["query"]
|
query = request.POST["query"]
|
||||||
# field = results.POST["field"]
|
# field = results.POST["field"]
|
||||||
# print("FIELD ", field)
|
# print("FIELD ", field)
|
||||||
results = run_main_query(client, query, fields, results)
|
results = run_main_query(client, query, fields, results)
|
||||||
pp.pprint(results)
|
# pp.pprint(results)
|
||||||
results_parsed = []
|
results_parsed = []
|
||||||
if "hits" in results.keys():
|
if "hits" in results.keys():
|
||||||
if "hits" in results["hits"]:
|
if "hits" in results["hits"]:
|
||||||
|
@ -47,6 +45,7 @@ class Drilldown(LoginRequiredMixin, View):
|
||||||
"results": results_parsed,
|
"results": results_parsed,
|
||||||
"card": results["hits"]["total"]["value"],
|
"card": results["hits"]["total"]["value"],
|
||||||
"took": results["took"],
|
"took": results["took"],
|
||||||
|
"redacted": results["redacted"],
|
||||||
"fields": settings.OPENSEARCH_MAIN_SEARCH_FIELDS,
|
"fields": settings.OPENSEARCH_MAIN_SEARCH_FIELDS,
|
||||||
}
|
}
|
||||||
return render(request, self.template_name, context)
|
return render(request, self.template_name, context)
|
||||||
|
|
Loading…
Reference in New Issue