neptune/core/lib/processing.py

from datetime import datetime

from django.conf import settings

from core.lib.threshold import annotate_num_chans, annotate_num_users, annotate_online


def annotate_results(results_parsed):
    """
    Accept a list of dict objects, search for the number of channels and users.
    Add them to the object.
    Mutate it in place. Does not return anything.
    """
    # Figure out items with net (not discord)
    nets = set()
    for x in results_parsed:
        if "net" in x:
            nets.add(x["net"])

    for net in nets:
        # Annotate the online attribute from Threshold
        nicks = list(
            set(
                [
                    x["nick"]
                    for x in results_parsed
                    if {"nick", "src", "net"}.issubset(x)
                    and x["src"] == "irc"
                    and x["net"] == net
                ]
            )
        )
        channels = list(
            set(
                [
                    x["channel"]
                    for x in results_parsed
                    if {"channel", "src", "net"}.issubset(x)
                    and x["src"] == "irc"
                    and x["net"] == net
                ]
            )
        )
        online_info = annotate_online(net, nicks)
        # Annotate the number of users in the channel
        num_users = annotate_num_users(net, channels)
        # Annotate the number channels the user is on
        num_chans = annotate_num_chans(net, nicks)
        for item in results_parsed:
            if "net" in item:
                if item["net"] == net:
                    if "nick" in item:
                        if item["nick"] in online_info:
                            item["online"] = online_info[item["nick"]]
                    if "channel" in item:
                        if item["channel"] in num_users:
                            item["num_users"] = num_users[item["channel"]]
                    if "nick" in item:
                        if item["nick"] in num_chans:
                            item["num_chans"] = num_chans[item["nick"]]


def filter_blacklisted(user, response):
    """
    Low level filter to take the raw OpenSearch response and remove
    objects from it we want to keep secret.
    Does not return, the object is mutated in place.
    """
    response["redacted"] = 0
    response["exemption"] = None
    if user.is_superuser:
        response["exemption"] = True
    # is_anonymous = isinstance(user, AnonymousUser)
    # For every hit from ES
    for index, item in enumerate(list(response["hits"]["hits"])):
        # For every blacklisted type
        for blacklisted_type in settings.OPENSEARCH_BLACKLISTED.keys():
            # Check this field we are matching exists
            if "_source" in item.keys():
                data_index = "_source"
            elif "fields" in item.keys():
                data_index = "fields"
            else:
                return False
            if blacklisted_type in item[data_index].keys():
                content = item[data_index][blacklisted_type]
                # For every item in the blacklisted array for the type
                for blacklisted_item in settings.OPENSEARCH_BLACKLISTED[
                    blacklisted_type
                ]:
                    if blacklisted_item == str(content):
                        # Remove the item
                        if item in response["hits"]["hits"]:
                            # Let the UI know something was redacted
                            if (
                                "exemption"
                                not in response["hits"]["hits"][index][data_index]
                            ):
                                response["redacted"] += 1
                            # Anonymous
                            if user.is_anonymous:
                                # Just set it to none so the index is not off
                                response["hits"]["hits"][index] = None
                            else:
                                if not user.has_perm("core.bypass_blacklist"):
                                    response["hits"]["hits"][index] = None
                                else:
                                    response["hits"]["hits"][index][data_index][
                                        "exemption"
                                    ] = True

    # Actually get rid of all the things we set to None
    response["hits"]["hits"] = [hit for hit in response["hits"]["hits"] if hit]


def parse_results(results):
    results_parsed = []
    stringify = ["host", "channel"]
    if "hits" in results.keys():
        if "hits" in results["hits"]:
            for item in results["hits"]["hits"]:
                if "_source" in item.keys():
                    data_index = "_source"
                elif "fields" in item.keys():
                    data_index = "fields"
                else:
                    return False
                element = item[data_index]
                for field in stringify:
                    if field in element:
                        element[field] = str(element[field])
                # Why are fields in lists...
                if data_index == "fields":
                    element = {k: v[0] for k, v in element.items() if len(v)}
                element["id"] = item["_id"]

                # Remove empty values
                for field in list(element.keys()):
                    if element[field] == "":
                        del element[field]

                # Split the timestamp into date and time
                if "ts" not in element:
                    if "time" in element:  # will fix data later
                        ts = element["time"]
                        del element["time"]
                        element["ts"] = ts
                if "ts" in element:
                    if isinstance(element["ts"], str):
                        ts = element["ts"]
                    else:
                        ts = datetime.utcfromtimestamp(element["ts"]).strftime(
                            "%Y-%m-%dT%H:%M:%S"
                        )
                    ts_spl = ts.split("T")
                    date = ts_spl[0]
                    time = ts_spl[1]
                    element["date"] = date
                    if "." in time:
                        time_spl = time.split(".")
                        if len(time_spl) == 2:
                            element["time"] = time.split(".")[0]
                        else:
                            element["time"] = time
                    else:
                        element["time"] = time
                results_parsed.append(element)
    return results_parsed