|
|
|
@ -97,8 +97,14 @@ def filter_blacklisted(user, response):
|
|
|
|
|
# For every blacklisted type
|
|
|
|
|
for blacklisted_type in settings.OPENSEARCH_BLACKLISTED.keys():
|
|
|
|
|
# Check this field we are matching exists
|
|
|
|
|
if blacklisted_type in item["_source"].keys():
|
|
|
|
|
content = item["_source"][blacklisted_type]
|
|
|
|
|
if "_source" in item.keys():
|
|
|
|
|
data_index = "_source"
|
|
|
|
|
elif "fields" in item.keys():
|
|
|
|
|
data_index = "fields"
|
|
|
|
|
else:
|
|
|
|
|
return False
|
|
|
|
|
if blacklisted_type in item[data_index].keys():
|
|
|
|
|
content = item[data_index][blacklisted_type]
|
|
|
|
|
# For every item in the blacklisted array for the type
|
|
|
|
|
for blacklisted_item in settings.OPENSEARCH_BLACKLISTED[
|
|
|
|
|
blacklisted_type
|
|
|
|
@ -109,7 +115,7 @@ def filter_blacklisted(user, response):
|
|
|
|
|
# Let the UI know something was redacted
|
|
|
|
|
if (
|
|
|
|
|
"exemption"
|
|
|
|
|
not in response["hits"]["hits"][index]["_source"]
|
|
|
|
|
not in response["hits"]["hits"][index][data_index]
|
|
|
|
|
):
|
|
|
|
|
response["redacted"] += 1
|
|
|
|
|
# Anonymous
|
|
|
|
@ -120,7 +126,7 @@ def filter_blacklisted(user, response):
|
|
|
|
|
if not user.is_superuser:
|
|
|
|
|
response["hits"]["hits"][index] = None
|
|
|
|
|
else:
|
|
|
|
|
response["hits"]["hits"][index]["_source"][
|
|
|
|
|
response["hits"]["hits"][index][data_index][
|
|
|
|
|
"exemption"
|
|
|
|
|
] = True
|
|
|
|
|
|
|
|
|
@ -128,6 +134,49 @@ def filter_blacklisted(user, response):
|
|
|
|
|
response["hits"]["hits"] = [hit for hit in response["hits"]["hits"] if hit]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def construct_query(query, size):
|
|
|
|
|
"""
|
|
|
|
|
Accept some query parameters and construct an OpenSearch query.
|
|
|
|
|
"""
|
|
|
|
|
if not size:
|
|
|
|
|
size = 5
|
|
|
|
|
query = {
|
|
|
|
|
"size": size,
|
|
|
|
|
"query": {
|
|
|
|
|
"bool": {
|
|
|
|
|
"must": [
|
|
|
|
|
{
|
|
|
|
|
"query_string": {
|
|
|
|
|
"query": query,
|
|
|
|
|
# "fields": fields,
|
|
|
|
|
# "default_field": "msg",
|
|
|
|
|
# "type": "best_fields",
|
|
|
|
|
"fuzziness": "AUTO",
|
|
|
|
|
"fuzzy_transpositions": True,
|
|
|
|
|
"fuzzy_max_expansions": 50,
|
|
|
|
|
"fuzzy_prefix_length": 0,
|
|
|
|
|
# "minimum_should_match": 1,
|
|
|
|
|
"default_operator": "or",
|
|
|
|
|
"analyzer": "standard",
|
|
|
|
|
"lenient": True,
|
|
|
|
|
"boost": 1,
|
|
|
|
|
"allow_leading_wildcard": True,
|
|
|
|
|
# "enable_position_increments": False,
|
|
|
|
|
"phrase_slop": 3,
|
|
|
|
|
# "max_determinized_states": 10000,
|
|
|
|
|
"quote_field_suffix": "",
|
|
|
|
|
"quote_analyzer": "standard",
|
|
|
|
|
"analyze_wildcard": False,
|
|
|
|
|
"auto_generate_synonyms_phrase_query": True,
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
]
|
|
|
|
|
}
|
|
|
|
|
},
|
|
|
|
|
}
|
|
|
|
|
return query
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def run_main_query(client, user, query, custom_query=False, index=None, size=None):
|
|
|
|
|
"""
|
|
|
|
|
Low level helper to run an ES query.
|
|
|
|
@ -153,12 +202,22 @@ def run_main_query(client, user, query, custom_query=False, index=None, size=Non
|
|
|
|
|
filter_blacklisted(user, response)
|
|
|
|
|
return response
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def parse_results(results):
|
|
|
|
|
results_parsed = []
|
|
|
|
|
if "hits" in results.keys():
|
|
|
|
|
if "hits" in results["hits"]:
|
|
|
|
|
for item in results["hits"]["hits"]:
|
|
|
|
|
element = item["_source"]
|
|
|
|
|
if "_source" in item.keys():
|
|
|
|
|
data_index = "_source"
|
|
|
|
|
elif "fields" in item.keys():
|
|
|
|
|
data_index = "fields"
|
|
|
|
|
else:
|
|
|
|
|
return False
|
|
|
|
|
element = item[data_index]
|
|
|
|
|
# Why are fields in lists...
|
|
|
|
|
if data_index == "fields":
|
|
|
|
|
element = {k: v[0] for k, v in element.items() if len(v)}
|
|
|
|
|
element["id"] = item["_id"]
|
|
|
|
|
|
|
|
|
|
# Split the timestamp into date and time
|
|
|
|
@ -173,11 +232,19 @@ def parse_results(results):
|
|
|
|
|
date = ts_spl[0]
|
|
|
|
|
time = ts_spl[1]
|
|
|
|
|
element["date"] = date
|
|
|
|
|
element["time"] = time
|
|
|
|
|
if "." in time:
|
|
|
|
|
time_spl = time.split(".")
|
|
|
|
|
if len(time_spl) == 2:
|
|
|
|
|
element["time"] = time.split(".")[0]
|
|
|
|
|
else:
|
|
|
|
|
element["time"] = time
|
|
|
|
|
else:
|
|
|
|
|
element["time"] = time
|
|
|
|
|
results_parsed.append(element)
|
|
|
|
|
return results_parsed
|
|
|
|
|
|
|
|
|
|
def query_results(request, query_params, size=None):
|
|
|
|
|
|
|
|
|
|
def query_results(request, query_params, size=None, annotate=True, custom_query=False):
|
|
|
|
|
"""
|
|
|
|
|
API helper to alter the OpenSearch return format into something
|
|
|
|
|
a bit better to parse.
|
|
|
|
@ -185,6 +252,7 @@ def query_results(request, query_params, size=None):
|
|
|
|
|
results with the other data we have.
|
|
|
|
|
"""
|
|
|
|
|
# is_anonymous = isinstance(request.user, AnonymousUser)
|
|
|
|
|
query = None
|
|
|
|
|
message = None
|
|
|
|
|
message_class = None
|
|
|
|
|
add_bool = []
|
|
|
|
@ -202,6 +270,8 @@ def query_results(request, query_params, size=None):
|
|
|
|
|
message = "Size is not permitted"
|
|
|
|
|
message_class = "danger"
|
|
|
|
|
return {"message": message, "class": message_class}
|
|
|
|
|
else:
|
|
|
|
|
size = 20
|
|
|
|
|
if "source" in query_params:
|
|
|
|
|
source = query_params["source"]
|
|
|
|
|
if source not in settings.OPENSEARCH_MAIN_SOURCES:
|
|
|
|
@ -275,72 +345,79 @@ def query_results(request, query_params, size=None):
|
|
|
|
|
if "query" in query_params:
|
|
|
|
|
query = query_params["query"]
|
|
|
|
|
search_query = construct_query(query, size)
|
|
|
|
|
if add_bool:
|
|
|
|
|
for item in add_bool:
|
|
|
|
|
search_query["query"]["bool"]["must"].append({"match": item})
|
|
|
|
|
if add_top:
|
|
|
|
|
for item in add_top:
|
|
|
|
|
search_query["query"]["bool"]["must"].append(item)
|
|
|
|
|
if add_top_negative:
|
|
|
|
|
for item in add_top_negative:
|
|
|
|
|
if "must_not" in search_query["query"]["bool"]:
|
|
|
|
|
search_query["query"]["bool"]["must_not"].append(item)
|
|
|
|
|
else:
|
|
|
|
|
search_query["query"]["bool"]["must_not"] = [item]
|
|
|
|
|
if sort:
|
|
|
|
|
search_query["sort"] = sort
|
|
|
|
|
|
|
|
|
|
if "index" in query_params:
|
|
|
|
|
if not request.user.is_superuser:
|
|
|
|
|
message = "How did you get here?"
|
|
|
|
|
message_class = "danger"
|
|
|
|
|
return {"message": message, "class": message_class}
|
|
|
|
|
else:
|
|
|
|
|
print("NO QUERY")
|
|
|
|
|
if custom_query:
|
|
|
|
|
print("CUSTOM")
|
|
|
|
|
search_query = custom_query
|
|
|
|
|
if add_bool:
|
|
|
|
|
for item in add_bool:
|
|
|
|
|
search_query["query"]["bool"]["must"].append({"match": item})
|
|
|
|
|
if add_top:
|
|
|
|
|
for item in add_top:
|
|
|
|
|
search_query["query"]["bool"]["must"].append(item)
|
|
|
|
|
if add_top_negative:
|
|
|
|
|
for item in add_top_negative:
|
|
|
|
|
if "must_not" in search_query["query"]["bool"]:
|
|
|
|
|
search_query["query"]["bool"]["must_not"].append(item)
|
|
|
|
|
else:
|
|
|
|
|
index = query_params["index"]
|
|
|
|
|
if index == "main":
|
|
|
|
|
index = settings.OPENSEARCH_INDEX_MAIN
|
|
|
|
|
elif index == "meta":
|
|
|
|
|
search_query["query"]["bool"]["must_not"] = [item]
|
|
|
|
|
if sort:
|
|
|
|
|
search_query["sort"] = sort
|
|
|
|
|
|
|
|
|
|
if "index" in query_params:
|
|
|
|
|
index = query_params["index"]
|
|
|
|
|
if index == "main":
|
|
|
|
|
index = settings.OPENSEARCH_INDEX_MAIN
|
|
|
|
|
else:
|
|
|
|
|
if request.user.is_superuser:
|
|
|
|
|
if index == "meta":
|
|
|
|
|
index = settings.OPENSEARCH_INDEX_META
|
|
|
|
|
elif index == "int":
|
|
|
|
|
if index == "int":
|
|
|
|
|
index = settings.OPENSEARCH_INDEX_INT
|
|
|
|
|
else:
|
|
|
|
|
message = "Index is not valid."
|
|
|
|
|
message_class = "danger"
|
|
|
|
|
return {"message": message, "class": message_class}
|
|
|
|
|
else:
|
|
|
|
|
index = settings.OPENSEARCH_INDEX_MAIN
|
|
|
|
|
results = run_main_query(
|
|
|
|
|
client,
|
|
|
|
|
request.user, # passed through run_main_query to filter_blacklisted
|
|
|
|
|
search_query,
|
|
|
|
|
custom_query=True,
|
|
|
|
|
index=index,
|
|
|
|
|
size=size,
|
|
|
|
|
)
|
|
|
|
|
if not results:
|
|
|
|
|
return False
|
|
|
|
|
if isinstance(results, Exception):
|
|
|
|
|
message = results.info["error"]["root_cause"][0]["reason"]
|
|
|
|
|
message_class = "danger"
|
|
|
|
|
return {"message": message, "class": message_class}
|
|
|
|
|
if len(results["hits"]["hits"]) == 0:
|
|
|
|
|
message = "No results."
|
|
|
|
|
message_class = "danger"
|
|
|
|
|
return {"message": message, "class": message_class}
|
|
|
|
|
|
|
|
|
|
results_parsed = parse_results(results)
|
|
|
|
|
else:
|
|
|
|
|
message = "Not permitted to search by this index"
|
|
|
|
|
message_class = "danger"
|
|
|
|
|
return {"message": message, "class": message_class}
|
|
|
|
|
else:
|
|
|
|
|
index = settings.OPENSEARCH_INDEX_MAIN
|
|
|
|
|
results = run_main_query(
|
|
|
|
|
client,
|
|
|
|
|
request.user, # passed through run_main_query to filter_blacklisted
|
|
|
|
|
search_query,
|
|
|
|
|
custom_query=True,
|
|
|
|
|
index=index,
|
|
|
|
|
size=size,
|
|
|
|
|
)
|
|
|
|
|
if not results:
|
|
|
|
|
return False
|
|
|
|
|
if isinstance(results, Exception):
|
|
|
|
|
message = results.info["error"]["root_cause"][0]["reason"]
|
|
|
|
|
message_class = "danger"
|
|
|
|
|
return {"message": message, "class": message_class}
|
|
|
|
|
if len(results["hits"]["hits"]) == 0:
|
|
|
|
|
message = "No results."
|
|
|
|
|
message_class = "danger"
|
|
|
|
|
return {"message": message, "class": message_class}
|
|
|
|
|
|
|
|
|
|
results_parsed = parse_results(results)
|
|
|
|
|
if annotate:
|
|
|
|
|
annotate_results(results_parsed)
|
|
|
|
|
|
|
|
|
|
context = {
|
|
|
|
|
"query": query,
|
|
|
|
|
"object_list": results_parsed,
|
|
|
|
|
"card": results["hits"]["total"]["value"],
|
|
|
|
|
"took": results["took"],
|
|
|
|
|
"redacted": results["redacted"],
|
|
|
|
|
"exemption": results["exemption"],
|
|
|
|
|
}
|
|
|
|
|
return context
|
|
|
|
|
context = {
|
|
|
|
|
"object_list": results_parsed,
|
|
|
|
|
"card": results["hits"]["total"]["value"],
|
|
|
|
|
"took": results["took"],
|
|
|
|
|
"redacted": results["redacted"],
|
|
|
|
|
"exemption": results["exemption"],
|
|
|
|
|
}
|
|
|
|
|
if query:
|
|
|
|
|
context["query"] = query
|
|
|
|
|
return context
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def query_single_result(request):
|
|
|
|
@ -355,46 +432,3 @@ def query_single_result(request):
|
|
|
|
|
context["item"] = context["object_list"][0]
|
|
|
|
|
|
|
|
|
|
return context
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def construct_query(query, size):
|
|
|
|
|
"""
|
|
|
|
|
Accept some query parameters and construct an OpenSearch query.
|
|
|
|
|
"""
|
|
|
|
|
if not size:
|
|
|
|
|
size = 5
|
|
|
|
|
query = {
|
|
|
|
|
"size": size,
|
|
|
|
|
"query": {
|
|
|
|
|
"bool": {
|
|
|
|
|
"must": [
|
|
|
|
|
{
|
|
|
|
|
"query_string": {
|
|
|
|
|
"query": query,
|
|
|
|
|
# "fields": fields,
|
|
|
|
|
# "default_field": "msg",
|
|
|
|
|
# "type": "best_fields",
|
|
|
|
|
"fuzziness": "AUTO",
|
|
|
|
|
"fuzzy_transpositions": True,
|
|
|
|
|
"fuzzy_max_expansions": 50,
|
|
|
|
|
"fuzzy_prefix_length": 0,
|
|
|
|
|
# "minimum_should_match": 1,
|
|
|
|
|
"default_operator": "or",
|
|
|
|
|
"analyzer": "standard",
|
|
|
|
|
"lenient": True,
|
|
|
|
|
"boost": 1,
|
|
|
|
|
"allow_leading_wildcard": True,
|
|
|
|
|
# "enable_position_increments": False,
|
|
|
|
|
"phrase_slop": 3,
|
|
|
|
|
# "max_determinized_states": 10000,
|
|
|
|
|
"quote_field_suffix": "",
|
|
|
|
|
"quote_analyzer": "standard",
|
|
|
|
|
"analyze_wildcard": False,
|
|
|
|
|
"auto_generate_synonyms_phrase_query": True,
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
]
|
|
|
|
|
}
|
|
|
|
|
},
|
|
|
|
|
}
|
|
|
|
|
return query
|
|
|
|
|