Implement Druid DB fetching

This commit is contained in:
2022-09-30 07:22:22 +01:00
parent 202a13cccb
commit bb00475029
5 changed files with 234 additions and 90 deletions

View File

@@ -1,17 +1,9 @@
import logging
import random
import string
import time
from datetime import datetime
from math import floor, log10
from pprint import pprint
import orjson
import requests
from django.conf import settings
from siphashc import siphash
from core import r
from core.db import StorageBackend
from core.db.processing import parse_druid
from core.views import helpers
@@ -32,30 +24,26 @@ class DruidBackend(StorageBackend):
"limit": size,
"queryType": "scan",
"dataSource": index,
"filter": {
"type": "and",
"fields": [
],
},
# "resultFormat": "list",
# "columns":[],
"intervals": ["1000-01-01/3000-01-01"],
# "batchSize": 20480,
"intervals": ["1999-01-01/2999-01-01"],
}
to_add = {
"type": "search",
"dimension": "msg",
"query": {
"type": "insensitive_contains",
"value": query,
},
},
base_filter = {
"type": "and",
"fields": [],
}
to_add = {
"type": "search",
"dimension": "msg",
"query": {
"type": "insensitive_contains",
"value": query,
},
}
if blank:
return search_query
else:
search_query["filter"] = base_filter
search_query["filter"]["fields"].append(to_add)
return search_query
@@ -65,12 +53,15 @@ class DruidBackend(StorageBackend):
return parsed
def run_query(self, user, search_query):
ss = orjson.dumps(search_query, option=orjson.OPT_INDENT_2)
ss = ss.decode()
print(ss)
response = requests.post("http://broker:8082/druid/v2", json=search_query)
response = orjson.loads(response.text)
print("RESPONSE LEN", len(response))
ss = orjson.dumps(list(response), option=orjson.OPT_INDENT_2)
ss = ss.decode()
print(ss)
# ss = orjson.dumps(response, option=orjson.OPT_INDENT_2)
# ss = ss.decode()
# print(ss)
return response
def filter_blacklisted(self, user, response):
@@ -89,12 +80,24 @@ class DruidBackend(StorageBackend):
tags=None,
):
add_bool = []
add_top = []
add_in = {}
helpers.add_defaults(query_params)
# Check size
# Now, run the helpers for SIQTSRSS/ADR
# S - Size
# I - Index
# Q - Query
# T - Tags
# S - Source
# R - Ranges
# S - Sort
# S - Sentiment
# A - Annotate
# D - Dedup
# R - Reverse
# S - Size
if request.user.is_anonymous:
sizes = settings.MAIN_SIZES_ANON
else:
@@ -104,37 +107,80 @@ class DruidBackend(StorageBackend):
if isinstance(size, dict):
return size
# Check index
# I - Index
index = self.parse_index(request.user, query_params)
if isinstance(index, dict):
return index
# Create the search query
search_query = self.parse_query(query_params, tags, size, index, custom_query, add_bool)
if isinstance(search_query, dict):
# Q/T - Query/Tags
search_query = self.parse_query(
query_params, tags, size, index, custom_query, add_bool
)
# Query should be a dict, so check if it contains message here
if "message" in search_query:
return search_query
# S - Sources
sources = self.parse_source(request.user, query_params)
# TODO
add_top_tmp = {"bool": {"should": []}}
total_count = 0
for source_iter in sources:
add_top_tmp["bool"]["should"].append({"equals": {"src": source_iter}})
total_count += 1
total_sources = len(settings.MAIN_SOURCES) + len(
settings.SOURCES_RESTRICTED
)
if not total_count == total_sources:
add_top.append(add_top_tmp)
if isinstance(sources, dict):
return sources
total_count = len(sources)
total_sources = len(settings.MAIN_SOURCES) + len(settings.SOURCES_RESTRICTED)
if total_count != total_sources:
add_in["src"] = sources
print("SIZE IS", size)
# R - Ranges
from_ts, to_ts = self.parse_date_time(query_params)
if from_ts:
addendum = f"{from_ts}/{to_ts}"
search_query["intervals"] = [addendum]
# S - Sort
sort = self.parse_sort(query_params)
if isinstance(sort, dict):
return sort
if sort:
search_query["order"] = sort
# S - Sentiment
sentiment_r = self.parse_sentiment(query_params)
if isinstance(sentiment_r, dict):
return sentiment_r
if sentiment_r:
sentiment_method, sentiment = sentiment_r
sentiment_query = {"type": "bound", "dimension": "sentiment"}
if sentiment_method == "below":
sentiment_query["upper"] = sentiment
elif sentiment_method == "above":
sentiment_query["lower"] = sentiment
elif sentiment_method == "exact":
sentiment_query["lower"] = sentiment
sentiment_query["upper"] = sentiment
elif sentiment_method == "nonzero":
sentiment_query["lower"] = -0.0001
sentiment_query["upper"] = 0.0001
sentiment_query["lowerStrict"] = True
sentiment_query["upperStrict"] = True
# add_bool.append(sentiment_query)
self.add_filter(search_query)
search_query["filter"]["fields"].append(sentiment_query)
# Add in the additional information we already populated
if add_bool:
self.add_bool(search_query, add_bool)
self.add_type("and", search_query, add_bool)
if add_in:
self.add_in(search_query, add_in)
response = self.query(request.user, search_query)
# print("RESP", response)
# A/D/R - Annotate/Dedup/Reverse
self.process_results(
response,
annotate=annotate,
dedup=dedup,
dedup_fields=dedup_fields,
reverse=reverse,
)
# ss = orjson.dumps(list(response), option=orjson.OPT_INDENT_2)
# ss = ss.decode()
# print(ss)
@@ -143,11 +189,29 @@ class DruidBackend(StorageBackend):
context = response
return context
def add_bool(self, search_query, add_bool):
if "filter" in search_query:
if "fields" in search_query["filter"]:
search_query["filter"]["fields"].append({"bool": {"should": add_bool}})
else:
search_query["filter"]["fields"] = [{"bool": {"should": add_bool}}]
else:
search_query["filter"] = {"bool": {"should": add_bool}}
def add_filter(self, search_query):
if "filter" not in search_query:
search_query["filter"] = {
"type": "and",
"fields": [],
}
def add_in(self, search_query, add_in):
self.add_filter(search_query)
for key, value in add_in.items():
to_add = {"type": "in", "dimension": key, "values": value}
search_query["filter"]["fields"].append(to_add)
def add_type(self, gate, search_query, add_bool):
top_level_bool = {"type": gate, "fields": []}
self.add_filter(search_query)
for item in add_bool:
for key, value in item.items():
to_add = {"type": "selector", "dimension": key, "value": value}
top_level_bool["fields"].append(to_add)
search_query["filter"]["fields"].append(top_level_bool)
def check_valid_query(self, query_params, custom_query):
# We can do blank queries with this data source
pass