|
|
|
@ -5,7 +5,16 @@ from opensearchpy import OpenSearch
|
|
|
|
|
from opensearchpy.exceptions import NotFoundError, RequestError
|
|
|
|
|
|
|
|
|
|
from core.lib.threshold import annotate_num_chans, annotate_num_users, annotate_online
|
|
|
|
|
from core.views.helpers import dedup_list, encrypt_list, hash_list, hash_lookup
|
|
|
|
|
from core.views.helpers import (
|
|
|
|
|
SearchDenied,
|
|
|
|
|
dedup_list,
|
|
|
|
|
encrypt_list,
|
|
|
|
|
hash_list,
|
|
|
|
|
hash_lookup,
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
# from json import dumps
|
|
|
|
|
# pp = lambda x: print(dumps(x, indent=2))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def initialise_opensearch():
|
|
|
|
@ -141,47 +150,66 @@ def filter_blacklisted(user, response):
|
|
|
|
|
response["hits"]["hits"] = [hit for hit in response["hits"]["hits"] if hit]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def construct_query(query, size):
|
|
|
|
|
def construct_query(query, size, use_query_string=True, tokens=False):
|
|
|
|
|
"""
|
|
|
|
|
Accept some query parameters and construct an OpenSearch query.
|
|
|
|
|
"""
|
|
|
|
|
if not size:
|
|
|
|
|
size = 5
|
|
|
|
|
query = {
|
|
|
|
|
query_base = {
|
|
|
|
|
"size": size,
|
|
|
|
|
"query": {
|
|
|
|
|
"bool": {
|
|
|
|
|
"must": [
|
|
|
|
|
{
|
|
|
|
|
"query_string": {
|
|
|
|
|
"query": query,
|
|
|
|
|
# "fields": fields,
|
|
|
|
|
# "default_field": "msg",
|
|
|
|
|
# "type": "best_fields",
|
|
|
|
|
"fuzziness": "AUTO",
|
|
|
|
|
"fuzzy_transpositions": True,
|
|
|
|
|
"fuzzy_max_expansions": 50,
|
|
|
|
|
"fuzzy_prefix_length": 0,
|
|
|
|
|
# "minimum_should_match": 1,
|
|
|
|
|
"default_operator": "or",
|
|
|
|
|
"analyzer": "standard",
|
|
|
|
|
"lenient": True,
|
|
|
|
|
"boost": 1,
|
|
|
|
|
"allow_leading_wildcard": True,
|
|
|
|
|
# "enable_position_increments": False,
|
|
|
|
|
"phrase_slop": 3,
|
|
|
|
|
# "max_determinized_states": 10000,
|
|
|
|
|
"quote_field_suffix": "",
|
|
|
|
|
"quote_analyzer": "standard",
|
|
|
|
|
"analyze_wildcard": False,
|
|
|
|
|
"auto_generate_synonyms_phrase_query": True,
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
]
|
|
|
|
|
}
|
|
|
|
|
},
|
|
|
|
|
"query": {"bool": {"must": []}},
|
|
|
|
|
}
|
|
|
|
|
query_string = {
|
|
|
|
|
"query_string": {
|
|
|
|
|
"query": query,
|
|
|
|
|
# "fields": fields,
|
|
|
|
|
# "default_field": "msg",
|
|
|
|
|
# "type": "best_fields",
|
|
|
|
|
"fuzziness": "AUTO",
|
|
|
|
|
"fuzzy_transpositions": True,
|
|
|
|
|
"fuzzy_max_expansions": 50,
|
|
|
|
|
"fuzzy_prefix_length": 0,
|
|
|
|
|
# "minimum_should_match": 1,
|
|
|
|
|
"default_operator": "or",
|
|
|
|
|
"analyzer": "standard",
|
|
|
|
|
"lenient": True,
|
|
|
|
|
"boost": 1,
|
|
|
|
|
"allow_leading_wildcard": True,
|
|
|
|
|
# "enable_position_increments": False,
|
|
|
|
|
"phrase_slop": 3,
|
|
|
|
|
# "max_determinized_states": 10000,
|
|
|
|
|
"quote_field_suffix": "",
|
|
|
|
|
"quote_analyzer": "standard",
|
|
|
|
|
"analyze_wildcard": False,
|
|
|
|
|
"auto_generate_synonyms_phrase_query": True,
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
query_tokens = {
|
|
|
|
|
"simple_query_string": {
|
|
|
|
|
# "tokens": query,
|
|
|
|
|
"query": query,
|
|
|
|
|
"fields": ["tokens"],
|
|
|
|
|
"flags": "ALL",
|
|
|
|
|
"fuzzy_transpositions": True,
|
|
|
|
|
"fuzzy_max_expansions": 50,
|
|
|
|
|
"fuzzy_prefix_length": 0,
|
|
|
|
|
"default_operator": "and",
|
|
|
|
|
"analyzer": "standard",
|
|
|
|
|
"lenient": True,
|
|
|
|
|
"boost": 1,
|
|
|
|
|
"quote_field_suffix": "",
|
|
|
|
|
"analyze_wildcard": False,
|
|
|
|
|
"auto_generate_synonyms_phrase_query": False,
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
return query
|
|
|
|
|
if tokens:
|
|
|
|
|
query_base["query"]["bool"]["must"].append(query_tokens)
|
|
|
|
|
# query["query"]["bool"]["must"].append(query_string)
|
|
|
|
|
# query["query"]["bool"]["must"][0]["query_string"]["fields"] = ["tokens"]
|
|
|
|
|
elif use_query_string:
|
|
|
|
|
query_base["query"]["bool"]["must"].append(query_string)
|
|
|
|
|
return query_base
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def run_main_query(client, user, query, custom_query=False, index=None, size=None):
|
|
|
|
@ -261,6 +289,7 @@ def query_results(
|
|
|
|
|
dedup=False,
|
|
|
|
|
dedup_fields=None,
|
|
|
|
|
lookup_hashes=True,
|
|
|
|
|
tags=None,
|
|
|
|
|
):
|
|
|
|
|
"""
|
|
|
|
|
API helper to alter the OpenSearch return format into something
|
|
|
|
@ -276,12 +305,15 @@ def query_results(
|
|
|
|
|
add_top = []
|
|
|
|
|
add_top_negative = []
|
|
|
|
|
sort = None
|
|
|
|
|
query_created = False
|
|
|
|
|
|
|
|
|
|
# Lookup the hash values but don't disclose them to the user
|
|
|
|
|
if lookup_hashes:
|
|
|
|
|
if settings.HASHING:
|
|
|
|
|
query_params = deepcopy(query_params)
|
|
|
|
|
hash_lookup(query_params)
|
|
|
|
|
hash_lookup(request.user, query_params)
|
|
|
|
|
if tags:
|
|
|
|
|
hash_lookup(request.user, tags)
|
|
|
|
|
|
|
|
|
|
if request.user.is_anonymous:
|
|
|
|
|
sizes = settings.OPENSEARCH_MAIN_SIZES_ANON
|
|
|
|
@ -366,15 +398,53 @@ def query_results(
|
|
|
|
|
range_query_precise["match"]["sentiment"] = 0
|
|
|
|
|
add_top_negative.append(range_query_precise)
|
|
|
|
|
|
|
|
|
|
# Only one of query or query_full can be active at once
|
|
|
|
|
# We prefer query because it's simpler
|
|
|
|
|
if "query" in query_params:
|
|
|
|
|
query = query_params["query"]
|
|
|
|
|
search_query = construct_query(query, size)
|
|
|
|
|
search_query = construct_query(query, size, tokens=True)
|
|
|
|
|
query_created = True
|
|
|
|
|
elif "query_full" in query_params:
|
|
|
|
|
query_full = query_params["query_full"]
|
|
|
|
|
if request.user.has_perm("query_search"):
|
|
|
|
|
search_query = construct_query(query_full, size)
|
|
|
|
|
query_created = True
|
|
|
|
|
else:
|
|
|
|
|
message = "You cannot search by query string"
|
|
|
|
|
message_class = "danger"
|
|
|
|
|
return {"message": message, "class": message_class}
|
|
|
|
|
else:
|
|
|
|
|
if custom_query:
|
|
|
|
|
search_query = custom_query
|
|
|
|
|
|
|
|
|
|
if tags:
|
|
|
|
|
# Get a blank search query
|
|
|
|
|
if not query_created:
|
|
|
|
|
search_query = construct_query(None, size, use_query_string=False)
|
|
|
|
|
query_created = True
|
|
|
|
|
for tagname, tagvalue in tags.items():
|
|
|
|
|
add_bool.append({tagname: tagvalue})
|
|
|
|
|
|
|
|
|
|
required_any = ["query_full", "query", "tags"]
|
|
|
|
|
if not any([field in query_params.keys() for field in required_any]):
|
|
|
|
|
if not custom_query:
|
|
|
|
|
message = "Empty query!"
|
|
|
|
|
message_class = "warning"
|
|
|
|
|
return {"message": message, "class": message_class}
|
|
|
|
|
|
|
|
|
|
if add_bool:
|
|
|
|
|
# if "bool" not in search_query["query"]:
|
|
|
|
|
# search_query["query"]["bool"] = {}
|
|
|
|
|
# if "must" not in search_query["query"]["bool"]:
|
|
|
|
|
# search_query["query"]["bool"] = {"must": []}
|
|
|
|
|
|
|
|
|
|
for item in add_bool:
|
|
|
|
|
search_query["query"]["bool"]["must"].append({"match": item})
|
|
|
|
|
k, v = list(item.items())[0]
|
|
|
|
|
if isinstance(v, SearchDenied):
|
|
|
|
|
message = f"Access denied: search by protected field {k}: {v.value}"
|
|
|
|
|
message_class = "danger"
|
|
|
|
|
return {"message": message, "class": message_class}
|
|
|
|
|
search_query["query"]["bool"]["must"].append({"match_phrase": item})
|
|
|
|
|
if add_top:
|
|
|
|
|
for item in add_top:
|
|
|
|
|
search_query["query"]["bool"]["must"].append(item)
|
|
|
|
@ -398,7 +468,6 @@ def query_results(
|
|
|
|
|
return {
|
|
|
|
|
"message": message,
|
|
|
|
|
"class": message_class,
|
|
|
|
|
"params": query_params,
|
|
|
|
|
}
|
|
|
|
|
if index == "meta":
|
|
|
|
|
index = settings.OPENSEARCH_INDEX_META
|
|
|
|
@ -410,7 +479,6 @@ def query_results(
|
|
|
|
|
return {
|
|
|
|
|
"message": message,
|
|
|
|
|
"class": message_class,
|
|
|
|
|
"params": query_params,
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
else:
|
|
|
|
@ -461,7 +529,6 @@ def query_results(
|
|
|
|
|
if not request.user.has_perm("view_plain"):
|
|
|
|
|
if settings.HASHING:
|
|
|
|
|
hash_list(request.user, results_parsed)
|
|
|
|
|
|
|
|
|
|
# process_list(reqults)
|
|
|
|
|
|
|
|
|
|
# IMPORTANT! - DO NOT PASS query_params to the user!
|
|
|
|
|