neptune/core/db/processing.py

133 lines
4.8 KiB
Python
Raw Normal View History

2022-09-06 10:53:32 +00:00
from datetime import datetime
2022-09-06 11:18:58 +00:00
from core.lib.threshold import annotate_num_chans, annotate_num_users, annotate_online
2022-09-06 10:53:32 +00:00
def annotate_results(results):
2022-09-06 10:53:32 +00:00
"""
Accept a list of dict objects, search for the number of channels and users.
Add them to the object.
Mutate it in place. Does not return anything.
"""
# Figure out items with net (not discord)
nets = set()
for x in results:
2022-09-06 10:53:32 +00:00
if "net" in x:
nets.add(x["net"])
for net in nets:
# Annotate the online attribute from Threshold
nicks = list(
set(
[
x["nick"]
for x in results
2022-09-06 10:53:32 +00:00
if {"nick", "src", "net"}.issubset(x)
and x["src"] == "irc"
and x["net"] == net
]
)
)
channels = list(
set(
[
x["channel"]
for x in results
2022-09-06 10:53:32 +00:00
if {"channel", "src", "net"}.issubset(x)
and x["src"] == "irc"
and x["net"] == net
]
)
)
online_info = annotate_online(net, nicks)
# Annotate the number of users in the channel
num_users = annotate_num_users(net, channels)
# Annotate the number channels the user is on
num_chans = annotate_num_chans(net, nicks)
for item in results:
2022-09-06 10:53:32 +00:00
if "net" in item:
if item["net"] == net:
if "nick" in item:
if item["nick"] in online_info:
item["online"] = online_info[item["nick"]]
if "channel" in item:
if item["channel"] in num_users:
item["num_users"] = num_users[item["channel"]]
if "nick" in item:
if item["nick"] in num_chans:
item["num_chans"] = num_chans[item["nick"]]
def parse_results(results, aggs=None):
2022-09-06 10:53:32 +00:00
results_parsed = []
stringify = ["host", "channel"]
if "hits" in results.keys():
if "hits" in results["hits"]:
for item in results["hits"]["hits"]:
if "_source" in item.keys():
data_index = "_source"
elif "fields" in item.keys():
data_index = "fields"
else:
return False
element = item[data_index]
for field in stringify:
if field in element:
element[field] = str(element[field])
# Why are fields in lists...
if data_index == "fields":
element = {k: v[0] for k, v in element.items() if len(v)}
element["id"] = item["_id"]
# Remove empty values
for field in list(element.keys()):
if element[field] == "":
del element[field]
# Split the timestamp into date and time
if "ts" not in element:
if "time" in element: # will fix data later
ts = element["time"]
del element["time"]
element["ts"] = ts
if "ts" in element:
if isinstance(element["ts"], str):
ts = element["ts"]
else:
2022-09-06 11:18:58 +00:00
ts = datetime.utcfromtimestamp(element["ts"]).strftime(
"%Y-%m-%dT%H:%M:%S"
)
2022-09-06 10:53:32 +00:00
ts_spl = ts.split("T")
date = ts_spl[0]
time = ts_spl[1]
element["date"] = date
if "." in time:
time_spl = time.split(".")
if len(time_spl) == 2:
element["time"] = time.split(".")[0]
else:
element["time"] = time
else:
element["time"] = time
results_parsed.append(element)
if aggs:
aggregations = {}
if "aggregations" in results:
for field in ["avg_sentiment"]: # Add other number fields here
if field in results["aggregations"]:
aggregations[field] = results["aggregations"][field]
return (aggregations, results_parsed)
2022-09-06 11:18:58 +00:00
return results_parsed
2022-09-27 14:15:08 +00:00
def parse_druid(response):
results_parsed = []
for item in response:
if "events" in item:
for event in item["events"]:
results_parsed.append(event)
else:
raise Exception(f"events not in item {item}")
return results_parsed