2022-07-21 12:47:02 +00:00
|
|
|
from django.conf import settings
|
|
|
|
from opensearchpy import OpenSearch
|
2022-07-21 12:51:27 +00:00
|
|
|
from opensearchpy.exceptions import RequestError
|
2022-07-21 12:47:02 +00:00
|
|
|
|
|
|
|
|
|
|
|
def initialise_opensearch():
|
|
|
|
auth = (settings.OPENSEARCH_USERNAME, settings.OPENSEARCH_PASSWORD)
|
|
|
|
client = OpenSearch(
|
2022-07-21 12:47:10 +00:00
|
|
|
# fmt: off
|
2022-07-21 12:47:02 +00:00
|
|
|
hosts=[{"host": settings.OPENSEARCH_URL,
|
2022-07-21 12:47:10 +00:00
|
|
|
"port": settings.OPENSEARCH_PORT}],
|
2022-07-21 12:47:02 +00:00
|
|
|
http_compress=False, # enables gzip compression for request bodies
|
|
|
|
http_auth=auth,
|
|
|
|
# client_cert = client_cert_path,
|
|
|
|
# client_key = client_key_path,
|
|
|
|
use_ssl=settings.OPENSEARCH_TLS,
|
|
|
|
verify_certs=False,
|
|
|
|
ssl_assert_hostname=False,
|
|
|
|
ssl_show_warn=False,
|
|
|
|
# a_certs=ca_certs_path,
|
|
|
|
)
|
|
|
|
return client
|
|
|
|
|
|
|
|
|
2022-07-21 12:49:32 +00:00
|
|
|
def construct_query(query, fields, size):
|
2022-07-21 12:49:01 +00:00
|
|
|
if not fields:
|
|
|
|
fields = settings.OPENSEARCH_MAIN_SEARCH_FIELDS
|
2022-07-21 12:49:32 +00:00
|
|
|
if not size:
|
|
|
|
size = 5
|
2022-07-21 12:47:02 +00:00
|
|
|
query = {
|
2022-07-21 12:49:32 +00:00
|
|
|
"size": size,
|
2022-07-21 12:47:02 +00:00
|
|
|
"query": {
|
2022-07-21 12:49:17 +00:00
|
|
|
"query_string": {
|
2022-07-21 12:47:02 +00:00
|
|
|
"query": query,
|
2022-07-21 12:49:01 +00:00
|
|
|
"fields": fields,
|
2022-07-21 12:49:17 +00:00
|
|
|
# "default_field": "msg",
|
|
|
|
# "type": "best_fields",
|
|
|
|
"fuzziness": "AUTO",
|
|
|
|
"fuzzy_transpositions": True,
|
|
|
|
"fuzzy_max_expansions": 50,
|
|
|
|
"fuzzy_prefix_length": 0,
|
2022-07-21 12:49:22 +00:00
|
|
|
# "minimum_should_match": 1,
|
2022-07-21 12:49:17 +00:00
|
|
|
"default_operator": "or",
|
|
|
|
"analyzer": "standard",
|
2022-07-21 12:49:22 +00:00
|
|
|
"lenient": True,
|
2022-07-21 12:49:17 +00:00
|
|
|
"boost": 1,
|
|
|
|
"allow_leading_wildcard": True,
|
2022-07-21 12:49:22 +00:00
|
|
|
# "enable_position_increments": False,
|
2022-07-21 12:49:17 +00:00
|
|
|
"phrase_slop": 3,
|
2022-07-21 12:49:22 +00:00
|
|
|
# "max_determinized_states": 10000,
|
2022-07-21 12:49:17 +00:00
|
|
|
"quote_field_suffix": "",
|
|
|
|
"quote_analyzer": "standard",
|
2022-07-21 12:49:22 +00:00
|
|
|
"analyze_wildcard": False,
|
2022-07-21 12:49:17 +00:00
|
|
|
"auto_generate_synonyms_phrase_query": True,
|
2022-07-21 12:47:02 +00:00
|
|
|
}
|
|
|
|
},
|
|
|
|
}
|
|
|
|
return query
|
|
|
|
|
|
|
|
|
2022-07-21 12:49:32 +00:00
|
|
|
def filter_blacklisted(user, response):
|
2022-07-21 12:49:27 +00:00
|
|
|
response["redacted"] = 0
|
2022-07-21 12:49:38 +00:00
|
|
|
response["exemption"] = None
|
2022-07-21 12:49:27 +00:00
|
|
|
# For every hit from ES
|
|
|
|
for item in list(response["hits"]["hits"]):
|
|
|
|
# For every blacklisted type
|
|
|
|
for blacklisted_type in settings.OPENSEARCH_BLACKLISTED.keys():
|
|
|
|
# Check this field we are matching exists
|
|
|
|
if blacklisted_type in item["_source"].keys():
|
|
|
|
content = item["_source"][blacklisted_type]
|
|
|
|
# For every item in the blacklisted array for the type
|
|
|
|
for blacklisted_item in settings.OPENSEARCH_BLACKLISTED[
|
|
|
|
blacklisted_type
|
|
|
|
]:
|
|
|
|
if blacklisted_item in str(content):
|
|
|
|
# Remove the item
|
|
|
|
if item in response["hits"]["hits"]:
|
2022-07-21 12:49:32 +00:00
|
|
|
if not user.is_superuser:
|
|
|
|
response["hits"]["hits"].remove(item)
|
2022-07-21 12:49:27 +00:00
|
|
|
# Let the UI know something was redacted
|
|
|
|
response["redacted"] += 1
|
2022-07-21 12:49:32 +00:00
|
|
|
response["exemption"] = True
|
2022-07-21 12:49:27 +00:00
|
|
|
|
|
|
|
|
2022-07-21 12:49:32 +00:00
|
|
|
def run_main_query(client, user, query, fields=None, size=None):
|
|
|
|
if fields:
|
|
|
|
for field in fields:
|
|
|
|
if field not in settings.OPENSEARCH_MAIN_SEARCH_FIELDS:
|
|
|
|
return False
|
|
|
|
if size:
|
|
|
|
if size not in settings.OPENSEARCH_MAIN_SIZES:
|
|
|
|
return False
|
|
|
|
search_query = construct_query(query, fields, size)
|
2022-07-21 12:47:10 +00:00
|
|
|
# fmt: off
|
2022-07-21 12:51:27 +00:00
|
|
|
try:
|
|
|
|
response = client.search(body=search_query,
|
|
|
|
index=settings.OPENSEARCH_INDEX_MAIN)
|
|
|
|
except RequestError:
|
|
|
|
return False
|
2022-07-21 12:49:32 +00:00
|
|
|
filter_blacklisted(user, response)
|
2022-07-21 12:47:02 +00:00
|
|
|
return response
|