neptune/core/lib/opensearch.py

109 lines
3.7 KiB
Python

from django.conf import settings
from opensearchpy import OpenSearch
from opensearchpy.exceptions import RequestError
def initialise_opensearch():
auth = (settings.OPENSEARCH_USERNAME, settings.OPENSEARCH_PASSWORD)
client = OpenSearch(
# fmt: off
hosts=[{"host": settings.OPENSEARCH_URL,
"port": settings.OPENSEARCH_PORT}],
http_compress=False, # enables gzip compression for request bodies
http_auth=auth,
# client_cert = client_cert_path,
# client_key = client_key_path,
use_ssl=settings.OPENSEARCH_TLS,
verify_certs=False,
ssl_assert_hostname=False,
ssl_show_warn=False,
# a_certs=ca_certs_path,
)
return client
def construct_query(query, fields, size):
if not fields:
fields = settings.OPENSEARCH_MAIN_SEARCH_FIELDS
if not size:
size = 5
query = {
"size": size,
"query": {
"query_string": {
"query": query,
"fields": fields,
# "default_field": "msg",
# "type": "best_fields",
"fuzziness": "AUTO",
"fuzzy_transpositions": True,
"fuzzy_max_expansions": 50,
"fuzzy_prefix_length": 0,
# "minimum_should_match": 1,
"default_operator": "or",
"analyzer": "standard",
"lenient": True,
"boost": 1,
"allow_leading_wildcard": True,
# "enable_position_increments": False,
"phrase_slop": 3,
# "max_determinized_states": 10000,
"quote_field_suffix": "",
"quote_analyzer": "standard",
"analyze_wildcard": False,
"auto_generate_synonyms_phrase_query": True,
}
},
"sort": [
{
"ts": {
"order": "desc",
}
}
],
}
return query
def filter_blacklisted(user, response):
response["redacted"] = 0
response["exemption"] = None
# For every hit from ES
for item in list(response["hits"]["hits"]):
# For every blacklisted type
for blacklisted_type in settings.OPENSEARCH_BLACKLISTED.keys():
# Check this field we are matching exists
if blacklisted_type in item["_source"].keys():
content = item["_source"][blacklisted_type]
# For every item in the blacklisted array for the type
for blacklisted_item in settings.OPENSEARCH_BLACKLISTED[
blacklisted_type
]:
if blacklisted_item in str(content):
# Remove the item
if item in response["hits"]["hits"]:
if not user.is_superuser:
response["hits"]["hits"].remove(item)
# Let the UI know something was redacted
response["redacted"] += 1
response["exemption"] = True
def run_main_query(client, user, query, fields=None, size=None):
if fields:
for field in fields:
if field not in settings.OPENSEARCH_MAIN_SEARCH_FIELDS:
return False
if size:
if size not in settings.OPENSEARCH_MAIN_SIZES:
return False
search_query = construct_query(query, fields, size)
# fmt: off
try:
response = client.search(body=search_query,
index=settings.OPENSEARCH_INDEX_MAIN)
except RequestError:
return False
filter_blacklisted(user, response)
return response