from django.conf import settings from opensearchpy import OpenSearch from opensearchpy.exceptions import RequestError def initialise_opensearch(): auth = (settings.OPENSEARCH_USERNAME, settings.OPENSEARCH_PASSWORD) client = OpenSearch( # fmt: off hosts=[{"host": settings.OPENSEARCH_URL, "port": settings.OPENSEARCH_PORT}], http_compress=False, # enables gzip compression for request bodies http_auth=auth, # client_cert = client_cert_path, # client_key = client_key_path, use_ssl=settings.OPENSEARCH_TLS, verify_certs=False, ssl_assert_hostname=False, ssl_show_warn=False, # a_certs=ca_certs_path, ) return client def construct_query(query, fields, size): if not fields: fields = settings.OPENSEARCH_MAIN_SEARCH_FIELDS if not size: size = 5 query = { "size": size, "query": { "query_string": { "query": query, "fields": fields, # "default_field": "msg", # "type": "best_fields", "fuzziness": "AUTO", "fuzzy_transpositions": True, "fuzzy_max_expansions": 50, "fuzzy_prefix_length": 0, # "minimum_should_match": 1, "default_operator": "or", "analyzer": "standard", "lenient": True, "boost": 1, "allow_leading_wildcard": True, # "enable_position_increments": False, "phrase_slop": 3, # "max_determinized_states": 10000, "quote_field_suffix": "", "quote_analyzer": "standard", "analyze_wildcard": False, "auto_generate_synonyms_phrase_query": True, } }, "sort": [ { "ts": { "order": "desc", } } ], } return query def filter_blacklisted(user, response): response["redacted"] = 0 response["exemption"] = None # For every hit from ES for item in list(response["hits"]["hits"]): # For every blacklisted type for blacklisted_type in settings.OPENSEARCH_BLACKLISTED.keys(): # Check this field we are matching exists if blacklisted_type in item["_source"].keys(): content = item["_source"][blacklisted_type] # For every item in the blacklisted array for the type for blacklisted_item in settings.OPENSEARCH_BLACKLISTED[ blacklisted_type ]: if blacklisted_item in str(content): # Remove the item if item in response["hits"]["hits"]: if not user.is_superuser: response["hits"]["hits"].remove(item) # Let the UI know something was redacted response["redacted"] += 1 response["exemption"] = True def run_main_query(client, user, query, fields=None, size=None): if fields: for field in fields: if field not in settings.OPENSEARCH_MAIN_SEARCH_FIELDS: return False if size: if size not in settings.OPENSEARCH_MAIN_SIZES: return False search_query = construct_query(query, fields, size) # fmt: off try: response = client.search(body=search_query, index=settings.OPENSEARCH_INDEX_MAIN) except RequestError: return False filter_blacklisted(user, response) return response