neptune/core/lib/opensearch.py

103 lines
3.6 KiB
Python
Raw Normal View History

2022-07-21 12:49:27 +00:00
import pprint
2022-07-21 12:47:02 +00:00
from django.conf import settings
from opensearchpy import OpenSearch
2022-07-21 12:49:27 +00:00
pp = pprint.PrettyPrinter(indent=4)
2022-07-21 12:47:02 +00:00
def initialise_opensearch():
auth = (settings.OPENSEARCH_USERNAME, settings.OPENSEARCH_PASSWORD)
client = OpenSearch(
2022-07-21 12:47:10 +00:00
# fmt: off
2022-07-21 12:47:02 +00:00
hosts=[{"host": settings.OPENSEARCH_URL,
2022-07-21 12:47:10 +00:00
"port": settings.OPENSEARCH_PORT}],
2022-07-21 12:47:02 +00:00
http_compress=False, # enables gzip compression for request bodies
http_auth=auth,
# client_cert = client_cert_path,
# client_key = client_key_path,
use_ssl=settings.OPENSEARCH_TLS,
verify_certs=False,
ssl_assert_hostname=False,
ssl_show_warn=False,
# a_certs=ca_certs_path,
)
return client
def construct_query(query, fields, size):
2022-07-21 12:49:01 +00:00
if not fields:
fields = settings.OPENSEARCH_MAIN_SEARCH_FIELDS
if not size:
size = 5
2022-07-21 12:47:02 +00:00
query = {
"size": size,
2022-07-21 12:47:02 +00:00
"query": {
2022-07-21 12:49:17 +00:00
"query_string": {
2022-07-21 12:47:02 +00:00
"query": query,
2022-07-21 12:49:01 +00:00
"fields": fields,
2022-07-21 12:49:17 +00:00
# "default_field": "msg",
# "type": "best_fields",
"fuzziness": "AUTO",
"fuzzy_transpositions": True,
"fuzzy_max_expansions": 50,
"fuzzy_prefix_length": 0,
2022-07-21 12:49:22 +00:00
# "minimum_should_match": 1,
2022-07-21 12:49:17 +00:00
"default_operator": "or",
"analyzer": "standard",
2022-07-21 12:49:22 +00:00
"lenient": True,
2022-07-21 12:49:17 +00:00
"boost": 1,
"allow_leading_wildcard": True,
2022-07-21 12:49:22 +00:00
# "enable_position_increments": False,
2022-07-21 12:49:17 +00:00
"phrase_slop": 3,
2022-07-21 12:49:22 +00:00
# "max_determinized_states": 10000,
2022-07-21 12:49:17 +00:00
"quote_field_suffix": "",
"quote_analyzer": "standard",
2022-07-21 12:49:22 +00:00
"analyze_wildcard": False,
2022-07-21 12:49:17 +00:00
"auto_generate_synonyms_phrase_query": True,
2022-07-21 12:47:02 +00:00
}
},
}
return query
def filter_blacklisted(user, response):
2022-07-21 12:49:27 +00:00
pp.pprint(response["hits"]["hits"])
print("LEN", len(response["hits"]["hits"]))
response["redacted"] = 0
# For every hit from ES
for item in list(response["hits"]["hits"]):
# For every blacklisted type
for blacklisted_type in settings.OPENSEARCH_BLACKLISTED.keys():
# Check this field we are matching exists
if blacklisted_type in item["_source"].keys():
content = item["_source"][blacklisted_type]
# For every item in the blacklisted array for the type
for blacklisted_item in settings.OPENSEARCH_BLACKLISTED[
blacklisted_type
]:
if blacklisted_item in str(content):
# Remove the item
if item in response["hits"]["hits"]:
if not user.is_superuser:
response["hits"]["hits"].remove(item)
2022-07-21 12:49:27 +00:00
# Let the UI know something was redacted
response["redacted"] += 1
response["exemption"] = True
2022-07-21 12:49:27 +00:00
def run_main_query(client, user, query, fields=None, size=None):
if fields:
for field in fields:
if field not in settings.OPENSEARCH_MAIN_SEARCH_FIELDS:
return False
if size:
if size not in settings.OPENSEARCH_MAIN_SIZES:
return False
search_query = construct_query(query, fields, size)
2022-07-21 12:47:10 +00:00
# fmt: off
response = client.search(body=search_query,
index=settings.OPENSEARCH_INDEX_MAIN)
filter_blacklisted(user, response)
2022-07-21 12:47:02 +00:00
return response