Improve data security by mandating token search

This commit is contained in:
2022-08-26 17:16:55 +01:00
parent e85fa910aa
commit 3f02c61463
9 changed files with 309 additions and 143 deletions

View File

@@ -5,7 +5,16 @@ from opensearchpy import OpenSearch
from opensearchpy.exceptions import NotFoundError, RequestError
from core.lib.threshold import annotate_num_chans, annotate_num_users, annotate_online
from core.views.helpers import dedup_list, encrypt_list, hash_list, hash_lookup
from core.views.helpers import (
SearchDenied,
dedup_list,
encrypt_list,
hash_list,
hash_lookup,
)
# from json import dumps
# pp = lambda x: print(dumps(x, indent=2))
def initialise_opensearch():
@@ -141,47 +150,66 @@ def filter_blacklisted(user, response):
response["hits"]["hits"] = [hit for hit in response["hits"]["hits"] if hit]
def construct_query(query, size):
def construct_query(query, size, use_query_string=True, tokens=False):
"""
Accept some query parameters and construct an OpenSearch query.
"""
if not size:
size = 5
query = {
query_base = {
"size": size,
"query": {
"bool": {
"must": [
{
"query_string": {
"query": query,
# "fields": fields,
# "default_field": "msg",
# "type": "best_fields",
"fuzziness": "AUTO",
"fuzzy_transpositions": True,
"fuzzy_max_expansions": 50,
"fuzzy_prefix_length": 0,
# "minimum_should_match": 1,
"default_operator": "or",
"analyzer": "standard",
"lenient": True,
"boost": 1,
"allow_leading_wildcard": True,
# "enable_position_increments": False,
"phrase_slop": 3,
# "max_determinized_states": 10000,
"quote_field_suffix": "",
"quote_analyzer": "standard",
"analyze_wildcard": False,
"auto_generate_synonyms_phrase_query": True,
}
}
]
}
},
"query": {"bool": {"must": []}},
}
return query
query_string = {
"query_string": {
"query": query,
# "fields": fields,
# "default_field": "msg",
# "type": "best_fields",
"fuzziness": "AUTO",
"fuzzy_transpositions": True,
"fuzzy_max_expansions": 50,
"fuzzy_prefix_length": 0,
# "minimum_should_match": 1,
"default_operator": "or",
"analyzer": "standard",
"lenient": True,
"boost": 1,
"allow_leading_wildcard": True,
# "enable_position_increments": False,
"phrase_slop": 3,
# "max_determinized_states": 10000,
"quote_field_suffix": "",
"quote_analyzer": "standard",
"analyze_wildcard": False,
"auto_generate_synonyms_phrase_query": True,
}
}
query_tokens = {
"simple_query_string": {
# "tokens": query,
"query": query,
"fields": ["tokens"],
"flags": "ALL",
"fuzzy_transpositions": True,
"fuzzy_max_expansions": 50,
"fuzzy_prefix_length": 0,
"default_operator": "and",
"analyzer": "standard",
"lenient": True,
"boost": 1,
"quote_field_suffix": "",
"analyze_wildcard": False,
"auto_generate_synonyms_phrase_query": False,
}
}
if tokens:
query_base["query"]["bool"]["must"].append(query_tokens)
# query["query"]["bool"]["must"].append(query_string)
# query["query"]["bool"]["must"][0]["query_string"]["fields"] = ["tokens"]
elif use_query_string:
query_base["query"]["bool"]["must"].append(query_string)
return query_base
def run_main_query(client, user, query, custom_query=False, index=None, size=None):
@@ -261,6 +289,7 @@ def query_results(
dedup=False,
dedup_fields=None,
lookup_hashes=True,
tags=None,
):
"""
API helper to alter the OpenSearch return format into something
@@ -276,12 +305,15 @@ def query_results(
add_top = []
add_top_negative = []
sort = None
query_created = False
# Lookup the hash values but don't disclose them to the user
if lookup_hashes:
if settings.HASHING:
query_params = deepcopy(query_params)
hash_lookup(query_params)
hash_lookup(request.user, query_params)
if tags:
hash_lookup(request.user, tags)
if request.user.is_anonymous:
sizes = settings.OPENSEARCH_MAIN_SIZES_ANON
@@ -366,15 +398,53 @@ def query_results(
range_query_precise["match"]["sentiment"] = 0
add_top_negative.append(range_query_precise)
# Only one of query or query_full can be active at once
# We prefer query because it's simpler
if "query" in query_params:
query = query_params["query"]
search_query = construct_query(query, size)
search_query = construct_query(query, size, tokens=True)
query_created = True
elif "query_full" in query_params:
query_full = query_params["query_full"]
if request.user.has_perm("query_search"):
search_query = construct_query(query_full, size)
query_created = True
else:
message = "You cannot search by query string"
message_class = "danger"
return {"message": message, "class": message_class}
else:
if custom_query:
search_query = custom_query
if tags:
# Get a blank search query
if not query_created:
search_query = construct_query(None, size, use_query_string=False)
query_created = True
for tagname, tagvalue in tags.items():
add_bool.append({tagname: tagvalue})
required_any = ["query_full", "query", "tags"]
if not any([field in query_params.keys() for field in required_any]):
if not custom_query:
message = "Empty query!"
message_class = "warning"
return {"message": message, "class": message_class}
if add_bool:
# if "bool" not in search_query["query"]:
# search_query["query"]["bool"] = {}
# if "must" not in search_query["query"]["bool"]:
# search_query["query"]["bool"] = {"must": []}
for item in add_bool:
search_query["query"]["bool"]["must"].append({"match": item})
k, v = list(item.items())[0]
if isinstance(v, SearchDenied):
message = f"Access denied: search by protected field {k}: {v.value}"
message_class = "danger"
return {"message": message, "class": message_class}
search_query["query"]["bool"]["must"].append({"match_phrase": item})
if add_top:
for item in add_top:
search_query["query"]["bool"]["must"].append(item)
@@ -398,7 +468,6 @@ def query_results(
return {
"message": message,
"class": message_class,
"params": query_params,
}
if index == "meta":
index = settings.OPENSEARCH_INDEX_META
@@ -410,7 +479,6 @@ def query_results(
return {
"message": message,
"class": message_class,
"params": query_params,
}
else:
@@ -461,7 +529,6 @@ def query_results(
if not request.user.has_perm("view_plain"):
if settings.HASHING:
hash_list(request.user, results_parsed)
# process_list(reqults)
# IMPORTANT! - DO NOT PASS query_params to the user!