Improve data security by mandating token search
This commit is contained in:
@@ -5,7 +5,16 @@ from opensearchpy import OpenSearch
|
||||
from opensearchpy.exceptions import NotFoundError, RequestError
|
||||
|
||||
from core.lib.threshold import annotate_num_chans, annotate_num_users, annotate_online
|
||||
from core.views.helpers import dedup_list, encrypt_list, hash_list, hash_lookup
|
||||
from core.views.helpers import (
|
||||
SearchDenied,
|
||||
dedup_list,
|
||||
encrypt_list,
|
||||
hash_list,
|
||||
hash_lookup,
|
||||
)
|
||||
|
||||
# from json import dumps
|
||||
# pp = lambda x: print(dumps(x, indent=2))
|
||||
|
||||
|
||||
def initialise_opensearch():
|
||||
@@ -141,47 +150,66 @@ def filter_blacklisted(user, response):
|
||||
response["hits"]["hits"] = [hit for hit in response["hits"]["hits"] if hit]
|
||||
|
||||
|
||||
def construct_query(query, size):
|
||||
def construct_query(query, size, use_query_string=True, tokens=False):
|
||||
"""
|
||||
Accept some query parameters and construct an OpenSearch query.
|
||||
"""
|
||||
if not size:
|
||||
size = 5
|
||||
query = {
|
||||
query_base = {
|
||||
"size": size,
|
||||
"query": {
|
||||
"bool": {
|
||||
"must": [
|
||||
{
|
||||
"query_string": {
|
||||
"query": query,
|
||||
# "fields": fields,
|
||||
# "default_field": "msg",
|
||||
# "type": "best_fields",
|
||||
"fuzziness": "AUTO",
|
||||
"fuzzy_transpositions": True,
|
||||
"fuzzy_max_expansions": 50,
|
||||
"fuzzy_prefix_length": 0,
|
||||
# "minimum_should_match": 1,
|
||||
"default_operator": "or",
|
||||
"analyzer": "standard",
|
||||
"lenient": True,
|
||||
"boost": 1,
|
||||
"allow_leading_wildcard": True,
|
||||
# "enable_position_increments": False,
|
||||
"phrase_slop": 3,
|
||||
# "max_determinized_states": 10000,
|
||||
"quote_field_suffix": "",
|
||||
"quote_analyzer": "standard",
|
||||
"analyze_wildcard": False,
|
||||
"auto_generate_synonyms_phrase_query": True,
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"query": {"bool": {"must": []}},
|
||||
}
|
||||
return query
|
||||
query_string = {
|
||||
"query_string": {
|
||||
"query": query,
|
||||
# "fields": fields,
|
||||
# "default_field": "msg",
|
||||
# "type": "best_fields",
|
||||
"fuzziness": "AUTO",
|
||||
"fuzzy_transpositions": True,
|
||||
"fuzzy_max_expansions": 50,
|
||||
"fuzzy_prefix_length": 0,
|
||||
# "minimum_should_match": 1,
|
||||
"default_operator": "or",
|
||||
"analyzer": "standard",
|
||||
"lenient": True,
|
||||
"boost": 1,
|
||||
"allow_leading_wildcard": True,
|
||||
# "enable_position_increments": False,
|
||||
"phrase_slop": 3,
|
||||
# "max_determinized_states": 10000,
|
||||
"quote_field_suffix": "",
|
||||
"quote_analyzer": "standard",
|
||||
"analyze_wildcard": False,
|
||||
"auto_generate_synonyms_phrase_query": True,
|
||||
}
|
||||
}
|
||||
query_tokens = {
|
||||
"simple_query_string": {
|
||||
# "tokens": query,
|
||||
"query": query,
|
||||
"fields": ["tokens"],
|
||||
"flags": "ALL",
|
||||
"fuzzy_transpositions": True,
|
||||
"fuzzy_max_expansions": 50,
|
||||
"fuzzy_prefix_length": 0,
|
||||
"default_operator": "and",
|
||||
"analyzer": "standard",
|
||||
"lenient": True,
|
||||
"boost": 1,
|
||||
"quote_field_suffix": "",
|
||||
"analyze_wildcard": False,
|
||||
"auto_generate_synonyms_phrase_query": False,
|
||||
}
|
||||
}
|
||||
if tokens:
|
||||
query_base["query"]["bool"]["must"].append(query_tokens)
|
||||
# query["query"]["bool"]["must"].append(query_string)
|
||||
# query["query"]["bool"]["must"][0]["query_string"]["fields"] = ["tokens"]
|
||||
elif use_query_string:
|
||||
query_base["query"]["bool"]["must"].append(query_string)
|
||||
return query_base
|
||||
|
||||
|
||||
def run_main_query(client, user, query, custom_query=False, index=None, size=None):
|
||||
@@ -261,6 +289,7 @@ def query_results(
|
||||
dedup=False,
|
||||
dedup_fields=None,
|
||||
lookup_hashes=True,
|
||||
tags=None,
|
||||
):
|
||||
"""
|
||||
API helper to alter the OpenSearch return format into something
|
||||
@@ -276,12 +305,15 @@ def query_results(
|
||||
add_top = []
|
||||
add_top_negative = []
|
||||
sort = None
|
||||
query_created = False
|
||||
|
||||
# Lookup the hash values but don't disclose them to the user
|
||||
if lookup_hashes:
|
||||
if settings.HASHING:
|
||||
query_params = deepcopy(query_params)
|
||||
hash_lookup(query_params)
|
||||
hash_lookup(request.user, query_params)
|
||||
if tags:
|
||||
hash_lookup(request.user, tags)
|
||||
|
||||
if request.user.is_anonymous:
|
||||
sizes = settings.OPENSEARCH_MAIN_SIZES_ANON
|
||||
@@ -366,15 +398,53 @@ def query_results(
|
||||
range_query_precise["match"]["sentiment"] = 0
|
||||
add_top_negative.append(range_query_precise)
|
||||
|
||||
# Only one of query or query_full can be active at once
|
||||
# We prefer query because it's simpler
|
||||
if "query" in query_params:
|
||||
query = query_params["query"]
|
||||
search_query = construct_query(query, size)
|
||||
search_query = construct_query(query, size, tokens=True)
|
||||
query_created = True
|
||||
elif "query_full" in query_params:
|
||||
query_full = query_params["query_full"]
|
||||
if request.user.has_perm("query_search"):
|
||||
search_query = construct_query(query_full, size)
|
||||
query_created = True
|
||||
else:
|
||||
message = "You cannot search by query string"
|
||||
message_class = "danger"
|
||||
return {"message": message, "class": message_class}
|
||||
else:
|
||||
if custom_query:
|
||||
search_query = custom_query
|
||||
|
||||
if tags:
|
||||
# Get a blank search query
|
||||
if not query_created:
|
||||
search_query = construct_query(None, size, use_query_string=False)
|
||||
query_created = True
|
||||
for tagname, tagvalue in tags.items():
|
||||
add_bool.append({tagname: tagvalue})
|
||||
|
||||
required_any = ["query_full", "query", "tags"]
|
||||
if not any([field in query_params.keys() for field in required_any]):
|
||||
if not custom_query:
|
||||
message = "Empty query!"
|
||||
message_class = "warning"
|
||||
return {"message": message, "class": message_class}
|
||||
|
||||
if add_bool:
|
||||
# if "bool" not in search_query["query"]:
|
||||
# search_query["query"]["bool"] = {}
|
||||
# if "must" not in search_query["query"]["bool"]:
|
||||
# search_query["query"]["bool"] = {"must": []}
|
||||
|
||||
for item in add_bool:
|
||||
search_query["query"]["bool"]["must"].append({"match": item})
|
||||
k, v = list(item.items())[0]
|
||||
if isinstance(v, SearchDenied):
|
||||
message = f"Access denied: search by protected field {k}: {v.value}"
|
||||
message_class = "danger"
|
||||
return {"message": message, "class": message_class}
|
||||
search_query["query"]["bool"]["must"].append({"match_phrase": item})
|
||||
if add_top:
|
||||
for item in add_top:
|
||||
search_query["query"]["bool"]["must"].append(item)
|
||||
@@ -398,7 +468,6 @@ def query_results(
|
||||
return {
|
||||
"message": message,
|
||||
"class": message_class,
|
||||
"params": query_params,
|
||||
}
|
||||
if index == "meta":
|
||||
index = settings.OPENSEARCH_INDEX_META
|
||||
@@ -410,7 +479,6 @@ def query_results(
|
||||
return {
|
||||
"message": message,
|
||||
"class": message_class,
|
||||
"params": query_params,
|
||||
}
|
||||
|
||||
else:
|
||||
@@ -461,7 +529,6 @@ def query_results(
|
||||
if not request.user.has_perm("view_plain"):
|
||||
if settings.HASHING:
|
||||
hash_list(request.user, results_parsed)
|
||||
|
||||
# process_list(reqults)
|
||||
|
||||
# IMPORTANT! - DO NOT PASS query_params to the user!
|
||||
|
||||
Reference in New Issue
Block a user