149 lines
5.4 KiB
Python
149 lines
5.4 KiB
Python
from datetime import datetime
|
|
import ast
|
|
from core.lib.threshold import annotate_num_chans, annotate_num_users, annotate_online
|
|
|
|
|
|
def annotate_results(results):
|
|
"""
|
|
Accept a list of dict objects, search for the number of channels and users.
|
|
Add them to the object.
|
|
Mutate it in place. Does not return anything.
|
|
"""
|
|
# Figure out items with net (not discord)
|
|
nets = set()
|
|
for x in results:
|
|
if "net" in x:
|
|
nets.add(x["net"])
|
|
|
|
for net in nets:
|
|
# Annotate the online attribute from Threshold
|
|
nicks = list(
|
|
set(
|
|
[
|
|
x["nick"]
|
|
for x in results
|
|
if {"nick", "src", "net"}.issubset(x)
|
|
and x["src"] == "irc"
|
|
and x["net"] == net
|
|
]
|
|
)
|
|
)
|
|
channels = list(
|
|
set(
|
|
[
|
|
x["channel"]
|
|
for x in results
|
|
if {"channel", "src", "net"}.issubset(x)
|
|
and x["src"] == "irc"
|
|
and x["net"] == net
|
|
]
|
|
)
|
|
)
|
|
online_info = None
|
|
num_users = None
|
|
num_chans = None
|
|
if nicks:
|
|
online_info = annotate_online(net, nicks)
|
|
# Annotate the number of users in the channel
|
|
if channels:
|
|
num_users = annotate_num_users(net, channels)
|
|
# Annotate the number channels the user is on
|
|
if nicks:
|
|
num_chans = annotate_num_chans(net, nicks)
|
|
for item in results:
|
|
if "net" in item:
|
|
if item["net"] == net:
|
|
if "nick" in item:
|
|
if online_info:
|
|
if item["nick"] in online_info:
|
|
item["online"] = online_info[item["nick"]]
|
|
if "channel" in item:
|
|
if num_users:
|
|
if item["channel"] in num_users:
|
|
item["num_users"] = num_users[item["channel"]]
|
|
if "nick" in item:
|
|
if num_chans:
|
|
if item["nick"] in num_chans:
|
|
item["num_chans"] = num_chans[item["nick"]]
|
|
|
|
|
|
def parse_results(results, meta=None):
|
|
results_parsed = []
|
|
stringify = ["host", "channel"]
|
|
if "hits" in results.keys():
|
|
if "hits" in results["hits"]:
|
|
for item in results["hits"]["hits"]:
|
|
if "_source" in item.keys():
|
|
data_index = "_source"
|
|
elif "fields" in item.keys():
|
|
data_index = "fields"
|
|
else:
|
|
return False
|
|
element = item[data_index]
|
|
for field in stringify:
|
|
if field in element:
|
|
element[field] = str(element[field])
|
|
# Why are fields in lists...
|
|
if data_index == "fields":
|
|
element = {k: v[0] for k, v in element.items() if len(v)}
|
|
element["id"] = item["_id"]
|
|
|
|
# Remove empty values
|
|
for field in list(element.keys()):
|
|
if element[field] == "":
|
|
del element[field]
|
|
# Unfold the tokens
|
|
if "tokens" in element:
|
|
if element["tokens"].startswith('["'):
|
|
tokens_parsed = ast.literal_eval(element["tokens"])
|
|
element["tokens"] = tokens_parsed
|
|
|
|
# Split the timestamp into date and time
|
|
if "ts" not in element:
|
|
if "time" in element: # will fix data later
|
|
ts = element["time"]
|
|
del element["time"]
|
|
element["ts"] = ts
|
|
if "ts" in element:
|
|
if isinstance(element["ts"], str):
|
|
ts = element["ts"]
|
|
else:
|
|
ts = datetime.utcfromtimestamp(element["ts"]).strftime(
|
|
"%Y-%m-%dT%H:%M:%S"
|
|
)
|
|
ts_spl = ts.split("T")
|
|
date = ts_spl[0]
|
|
time = ts_spl[1]
|
|
element["date"] = date
|
|
if "." in time:
|
|
time_spl = time.split(".")
|
|
if len(time_spl) == 2:
|
|
element["time"] = time.split(".")[0]
|
|
else:
|
|
element["time"] = time
|
|
else:
|
|
element["time"] = time
|
|
results_parsed.append(element)
|
|
if meta:
|
|
meta = {"aggs": {}}
|
|
if "aggregations" in results:
|
|
for field in ["avg_sentiment"]: # Add other number fields here
|
|
if field in results["aggregations"]:
|
|
meta["aggs"][field] = results["aggregations"][field]
|
|
total_hits = results["hits"]["total"]["value"]
|
|
meta["total_hits"] = total_hits
|
|
return (meta, results_parsed)
|
|
|
|
return results_parsed
|
|
|
|
|
|
def parse_druid(response):
|
|
results_parsed = []
|
|
for item in response:
|
|
if "events" in item:
|
|
for event in item["events"]:
|
|
results_parsed.append(event)
|
|
else:
|
|
raise Exception(f"events not in item {item}")
|
|
return results_parsed
|