neptune/core/lib/opensearch.py

103 lines
3.6 KiB
Python

import pprint
from django.conf import settings
from opensearchpy import OpenSearch
pp = pprint.PrettyPrinter(indent=4)
def initialise_opensearch():
auth = (settings.OPENSEARCH_USERNAME, settings.OPENSEARCH_PASSWORD)
client = OpenSearch(
# fmt: off
hosts=[{"host": settings.OPENSEARCH_URL,
"port": settings.OPENSEARCH_PORT}],
http_compress=False, # enables gzip compression for request bodies
http_auth=auth,
# client_cert = client_cert_path,
# client_key = client_key_path,
use_ssl=settings.OPENSEARCH_TLS,
verify_certs=False,
ssl_assert_hostname=False,
ssl_show_warn=False,
# a_certs=ca_certs_path,
)
return client
def construct_query(query, fields, size):
if not fields:
fields = settings.OPENSEARCH_MAIN_SEARCH_FIELDS
if not size:
size = 5
query = {
"size": size,
"query": {
"query_string": {
"query": query,
"fields": fields,
# "default_field": "msg",
# "type": "best_fields",
"fuzziness": "AUTO",
"fuzzy_transpositions": True,
"fuzzy_max_expansions": 50,
"fuzzy_prefix_length": 0,
# "minimum_should_match": 1,
"default_operator": "or",
"analyzer": "standard",
"lenient": True,
"boost": 1,
"allow_leading_wildcard": True,
# "enable_position_increments": False,
"phrase_slop": 3,
# "max_determinized_states": 10000,
"quote_field_suffix": "",
"quote_analyzer": "standard",
"analyze_wildcard": False,
"auto_generate_synonyms_phrase_query": True,
}
},
}
return query
def filter_blacklisted(user, response):
pp.pprint(response["hits"]["hits"])
print("LEN", len(response["hits"]["hits"]))
response["redacted"] = 0
# For every hit from ES
for item in list(response["hits"]["hits"]):
# For every blacklisted type
for blacklisted_type in settings.OPENSEARCH_BLACKLISTED.keys():
# Check this field we are matching exists
if blacklisted_type in item["_source"].keys():
content = item["_source"][blacklisted_type]
# For every item in the blacklisted array for the type
for blacklisted_item in settings.OPENSEARCH_BLACKLISTED[
blacklisted_type
]:
if blacklisted_item in str(content):
# Remove the item
if item in response["hits"]["hits"]:
if not user.is_superuser:
response["hits"]["hits"].remove(item)
# Let the UI know something was redacted
response["redacted"] += 1
response["exemption"] = True
def run_main_query(client, user, query, fields=None, size=None):
if fields:
for field in fields:
if field not in settings.OPENSEARCH_MAIN_SEARCH_FIELDS:
return False
if size:
if size not in settings.OPENSEARCH_MAIN_SIZES:
return False
search_query = construct_query(query, fields, size)
# fmt: off
response = client.search(body=search_query,
index=settings.OPENSEARCH_INDEX_MAIN)
filter_blacklisted(user, response)
return response