diff --git a/core/lib/context.py b/core/lib/context.py index 40b560d..ccff818 100644 --- a/core/lib/context.py +++ b/core/lib/context.py @@ -4,9 +4,9 @@ def construct_query(index, net, channel, src, num, size, type=None, nicks=None): extra_should = [] extra_should2 = [] if num: - extra_must.append({"match": {"num": num}}) + extra_must.append({"equals": {"num": num}}) if net: - extra_must.append({"match": {"net": net}}) + extra_must.append({"match_phrase": {"net": net}}) if channel: extra_must.append({"match": {"channel": channel}}) if nicks: @@ -52,31 +52,36 @@ def construct_query(index, net, channel, src, num, size, type=None, nicks=None): extra_should.append({"match": {"nick": channel}}) else: for ctype in types: - extra_should.append({"match": {"mtype": ctype}}) + extra_should.append({"equals": {"mtype": ctype}}) else: for ctype in types: extra_should.append({"match": {"type": ctype}}) query = { - "size": size, + "index": index, + "limit": size, "query": { "bool": { "must": [ - {"match": {"src": src}}, - { - "bool": { - "should": [*extra_should], - } - }, - { - "bool": { - "should": [*extra_should2], - } - }, + # {"equals": {"src": src}}, + # { + # "bool": { + # "should": [*extra_should], + # } + # }, + # { + # "bool": { + # "should": [*extra_should2], + # } + # }, *extra_must, ] } }, "fields": fields, - "_source": False, + # "_source": False, } + if extra_should: + query["query"]["bool"]["must"].append({"bool": {"should": [*extra_should]}}) + if extra_should2: + query["query"]["bool"]["must"].append({"bool": {"should": [*extra_should2]}}) return query diff --git a/core/lib/manticore.py b/core/lib/manticore.py index 6d2a13a..9d56104 100644 --- a/core/lib/manticore.py +++ b/core/lib/manticore.py @@ -1,10 +1,13 @@ -from re import search -from django.conf import settings -from core.lib.opensearch import annotate_results, filter_blacklisted, parse_results -import manticoresearch -from core.views.helpers import dedup_list +from datetime import datetime from pprint import pprint +import manticoresearch +from django.conf import settings + +from core.lib.processing import annotate_results, filter_blacklisted, parse_results +from core.views.helpers import dedup_list + + def initialise_manticore(): """ Initialise the Manticore client @@ -15,8 +18,10 @@ def initialise_manticore(): return (api_client, api_instance) + api_client, client = initialise_manticore() + def construct_query(query, size, index, blank=False): """ Accept some query parameters and construct an OpenSearch query. @@ -35,12 +40,14 @@ def construct_query(query, size, index, blank=False): query_base["query"]["bool"]["must"].append(query_string) return query_base + def run_query(client, user, search_query): response = client.search(search_query) response = response.to_dict() filter_blacklisted(user, response) return response + def query_results( request, query_params, @@ -110,6 +117,9 @@ def query_results( query = query_params["query"] search_query = construct_query(query, size, index) query_created = True + else: + if custom_query: + search_query = custom_query if tags: # Get a blank search query @@ -159,13 +169,16 @@ def query_results( add_top.append(add_top_tmp) print("AFTER", add_top) - # Date/time range if set({"from_date", "to_date", "from_time", "to_time"}).issubset( query_params.keys() ): from_ts = f"{query_params['from_date']}T{query_params['from_time']}Z" to_ts = f"{query_params['to_date']}T{query_params['to_time']}Z" + from_ts = datetime.strptime(from_ts, "%Y-%m-%dT%H:%MZ") + to_ts = datetime.strptime(to_ts, "%Y-%m-%dT%H:%MZ") + from_ts = int(from_ts.timestamp()) + to_ts = int(to_ts.timestamp()) range_query = { "range": { "ts": { @@ -247,7 +260,6 @@ def query_results( if sort: search_query["sort"] = sort - pprint(search_query) results = run_query( client, @@ -256,7 +268,7 @@ def query_results( ) if not results: return False - #results = results.to_dict() + # results = results.to_dict() results_parsed = parse_results(results) if annotate: annotate_results(results_parsed) @@ -280,4 +292,5 @@ def query_results( "card": results["hits"]["total"], "took": results["took"], } - return context \ No newline at end of file + print("RTRN", context) + return context diff --git a/core/lib/opensearch.py b/core/lib/opensearch.py index 6a1c5a4..032cb1a 100644 --- a/core/lib/opensearch.py +++ b/core/lib/opensearch.py @@ -5,11 +5,10 @@ from django.conf import settings from opensearchpy import OpenSearch from opensearchpy.exceptions import NotFoundError, RequestError -from core.lib.threshold import annotate_num_chans, annotate_num_users, annotate_online -from core.views.helpers import dedup_list -from datetime import datetime # from json import dumps # pp = lambda x: print(dumps(x, indent=2)) +from core.lib.processing import annotate_results, filter_blacklisted, parse_results +from core.views.helpers import dedup_list def initialise_opensearch(): @@ -37,114 +36,6 @@ def initialise_opensearch(): client = initialise_opensearch() -def annotate_results(results_parsed): - """ - Accept a list of dict objects, search for the number of channels and users. - Add them to the object. - Mutate it in place. Does not return anything. - """ - # Figure out items with net (not discord) - nets = set() - for x in results_parsed: - if "net" in x: - nets.add(x["net"]) - - for net in nets: - # Annotate the online attribute from Threshold - nicks = list( - set( - [ - x["nick"] - for x in results_parsed - if {"nick", "src", "net"}.issubset(x) - and x["src"] == "irc" - and x["net"] == net - ] - ) - ) - channels = list( - set( - [ - x["channel"] - for x in results_parsed - if {"channel", "src", "net"}.issubset(x) - and x["src"] == "irc" - and x["net"] == net - ] - ) - ) - online_info = annotate_online(net, nicks) - # Annotate the number of users in the channel - num_users = annotate_num_users(net, channels) - # Annotate the number channels the user is on - num_chans = annotate_num_chans(net, nicks) - for item in results_parsed: - if "net" in item: - if item["net"] == net: - if "nick" in item: - if item["nick"] in online_info: - item["online"] = online_info[item["nick"]] - if "channel" in item: - if item["channel"] in num_users: - item["num_users"] = num_users[item["channel"]] - if "nick" in item: - if item["nick"] in num_chans: - item["num_chans"] = num_chans[item["nick"]] - - -def filter_blacklisted(user, response): - """ - Low level filter to take the raw OpenSearch response and remove - objects from it we want to keep secret. - Does not return, the object is mutated in place. - """ - response["redacted"] = 0 - response["exemption"] = None - if user.is_superuser: - response["exemption"] = True - # is_anonymous = isinstance(user, AnonymousUser) - # For every hit from ES - for index, item in enumerate(list(response["hits"]["hits"])): - # For every blacklisted type - for blacklisted_type in settings.OPENSEARCH_BLACKLISTED.keys(): - # Check this field we are matching exists - if "_source" in item.keys(): - data_index = "_source" - elif "fields" in item.keys(): - data_index = "fields" - else: - return False - if blacklisted_type in item[data_index].keys(): - content = item[data_index][blacklisted_type] - # For every item in the blacklisted array for the type - for blacklisted_item in settings.OPENSEARCH_BLACKLISTED[ - blacklisted_type - ]: - if blacklisted_item == str(content): - # Remove the item - if item in response["hits"]["hits"]: - # Let the UI know something was redacted - if ( - "exemption" - not in response["hits"]["hits"][index][data_index] - ): - response["redacted"] += 1 - # Anonymous - if user.is_anonymous: - # Just set it to none so the index is not off - response["hits"]["hits"][index] = None - else: - if not user.has_perm("core.bypass_blacklist"): - response["hits"]["hits"][index] = None - else: - response["hits"]["hits"][index][data_index][ - "exemption" - ] = True - - # Actually get rid of all the things we set to None - response["hits"]["hits"] = [hit for hit in response["hits"]["hits"] if hit] - - def construct_query(query, size, use_query_string=True, tokens=False): """ Accept some query parameters and construct an OpenSearch query. @@ -233,54 +124,6 @@ def run_main_query(client, user, query, custom_query=False, index=None, size=Non return response -def parse_results(results): - results_parsed = [] - stringify = ["host", "channel"] - if "hits" in results.keys(): - if "hits" in results["hits"]: - for item in results["hits"]["hits"]: - if "_source" in item.keys(): - data_index = "_source" - elif "fields" in item.keys(): - data_index = "fields" - else: - return False - element = item[data_index] - for field in stringify: - if field in element: - element[field] = str(element[field]) - # Why are fields in lists... - if data_index == "fields": - element = {k: v[0] for k, v in element.items() if len(v)} - element["id"] = item["_id"] - - # Split the timestamp into date and time - if "ts" not in element: - if "time" in element: # will fix data later - ts = element["time"] - del element["time"] - element["ts"] = ts - if "ts" in element: - if isinstance(element["ts"], str): - ts = element["ts"] - else: - ts = datetime.utcfromtimestamp(element["ts"]).strftime('%Y-%m-%dT%H:%M:%S') - ts_spl = ts.split("T") - date = ts_spl[0] - time = ts_spl[1] - element["date"] = date - if "." in time: - time_spl = time.split(".") - if len(time_spl) == 2: - element["time"] = time.split(".")[0] - else: - element["time"] = time - else: - element["time"] = time - results_parsed.append(element) - return results_parsed - - def query_results( request, query_params, diff --git a/core/lib/processing.py b/core/lib/processing.py new file mode 100644 index 0000000..d3607dc --- /dev/null +++ b/core/lib/processing.py @@ -0,0 +1,164 @@ +from core.lib.threshold import annotate_num_chans, annotate_num_users, annotate_online +from django.conf import settings +from datetime import datetime + + +def annotate_results(results_parsed): + """ + Accept a list of dict objects, search for the number of channels and users. + Add them to the object. + Mutate it in place. Does not return anything. + """ + # Figure out items with net (not discord) + nets = set() + for x in results_parsed: + if "net" in x: + nets.add(x["net"]) + + for net in nets: + # Annotate the online attribute from Threshold + nicks = list( + set( + [ + x["nick"] + for x in results_parsed + if {"nick", "src", "net"}.issubset(x) + and x["src"] == "irc" + and x["net"] == net + ] + ) + ) + channels = list( + set( + [ + x["channel"] + for x in results_parsed + if {"channel", "src", "net"}.issubset(x) + and x["src"] == "irc" + and x["net"] == net + ] + ) + ) + online_info = annotate_online(net, nicks) + # Annotate the number of users in the channel + num_users = annotate_num_users(net, channels) + # Annotate the number channels the user is on + num_chans = annotate_num_chans(net, nicks) + for item in results_parsed: + if "net" in item: + if item["net"] == net: + if "nick" in item: + if item["nick"] in online_info: + item["online"] = online_info[item["nick"]] + if "channel" in item: + if item["channel"] in num_users: + item["num_users"] = num_users[item["channel"]] + if "nick" in item: + if item["nick"] in num_chans: + item["num_chans"] = num_chans[item["nick"]] + + +def filter_blacklisted(user, response): + """ + Low level filter to take the raw OpenSearch response and remove + objects from it we want to keep secret. + Does not return, the object is mutated in place. + """ + response["redacted"] = 0 + response["exemption"] = None + if user.is_superuser: + response["exemption"] = True + # is_anonymous = isinstance(user, AnonymousUser) + # For every hit from ES + for index, item in enumerate(list(response["hits"]["hits"])): + # For every blacklisted type + for blacklisted_type in settings.OPENSEARCH_BLACKLISTED.keys(): + # Check this field we are matching exists + if "_source" in item.keys(): + data_index = "_source" + elif "fields" in item.keys(): + data_index = "fields" + else: + return False + if blacklisted_type in item[data_index].keys(): + content = item[data_index][blacklisted_type] + # For every item in the blacklisted array for the type + for blacklisted_item in settings.OPENSEARCH_BLACKLISTED[ + blacklisted_type + ]: + if blacklisted_item == str(content): + # Remove the item + if item in response["hits"]["hits"]: + # Let the UI know something was redacted + if ( + "exemption" + not in response["hits"]["hits"][index][data_index] + ): + response["redacted"] += 1 + # Anonymous + if user.is_anonymous: + # Just set it to none so the index is not off + response["hits"]["hits"][index] = None + else: + if not user.has_perm("core.bypass_blacklist"): + response["hits"]["hits"][index] = None + else: + response["hits"]["hits"][index][data_index][ + "exemption" + ] = True + + # Actually get rid of all the things we set to None + response["hits"]["hits"] = [hit for hit in response["hits"]["hits"] if hit] + + +def parse_results(results): + results_parsed = [] + stringify = ["host", "channel"] + if "hits" in results.keys(): + if "hits" in results["hits"]: + for item in results["hits"]["hits"]: + if "_source" in item.keys(): + data_index = "_source" + elif "fields" in item.keys(): + data_index = "fields" + else: + return False + element = item[data_index] + for field in stringify: + if field in element: + element[field] = str(element[field]) + # Why are fields in lists... + if data_index == "fields": + element = {k: v[0] for k, v in element.items() if len(v)} + element["id"] = item["_id"] + + # Remove empty values + for field in list(element.keys()): + if element[field] == "": + del element[field] + + # Split the timestamp into date and time + if "ts" not in element: + if "time" in element: # will fix data later + ts = element["time"] + del element["time"] + element["ts"] = ts + if "ts" in element: + if isinstance(element["ts"], str): + ts = element["ts"] + else: + ts = datetime.utcfromtimestamp(element["ts"]).strftime('%Y-%m-%dT%H:%M:%S') + ts_spl = ts.split("T") + date = ts_spl[0] + time = ts_spl[1] + element["date"] = date + if "." in time: + time_spl = time.split(".") + if len(time_spl) == 2: + element["time"] = time.split(".")[0] + else: + element["time"] = time + else: + element["time"] = time + results_parsed.append(element) + return results_parsed \ No newline at end of file diff --git a/core/templates/ui/drilldown/search_partial.html b/core/templates/ui/drilldown/search_partial.html index e67bec5..a6da2fd 100644 --- a/core/templates/ui/drilldown/search_partial.html +++ b/core/templates/ui/drilldown/search_partial.html @@ -17,7 +17,7 @@ value="{{ params.query }}" class="input" type="text" - placeholder="Token search: (science | tech | art) + (interest) -hello"> + placeholder="Search something"> @@ -76,28 +76,6 @@
-