Implement hashing fields

2022-08-18 07:20:30 +01:00
parent 3d8519154b
commit c984e70689
14 changed files with 261 additions and 38 deletions
--- a/core/views/helpers.py
+++ b/core/views/helpers.py
@@ -1,3 +1,15 @@
+import re
+from base64 import b64encode
+
+from cryptography.hazmat.primitives.ciphers import Cipher, algorithms
+from cryptography.hazmat.primitives.ciphers.modes import ECB
+from django.conf import settings
+from siphashc import siphash
+from sortedcontainers import SortedSet
+
+from core import r
+
+
 def dedup_list(data, check_keys):
    """
    Remove duplicate dictionaries from list.
@@ -35,3 +47,124 @@ def dedup_list(data, check_keys):

 # # sh-5.1$ python helpers.py
 # # 1.0805372429895215
+
+
+def base36encode(number, alphabet="0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"):
+    """Converts an integer to a base36 string."""
+    if not isinstance(number, (int)):
+        raise TypeError("number must be an integer")
+
+    base36 = ""
+    sign = ""
+
+    if number < 0:
+        sign = "-"
+        number = -number
+
+    if 0 <= number < len(alphabet):
+        return sign + alphabet[number]
+
+    while number != 0:
+        number, i = divmod(number, len(alphabet))
+        base36 = alphabet[i] + base36
+
+    return sign + base36
+
+
+def base36decode(number):
+    return int(number, 36)
+
+
+def hash_list(data, hash_keys=False):
+    """
+    Hash a list of dicts or a list with SipHash42.
+    """
+    cache = "cache.hash"
+    hash_table = {}
+    if isinstance(data, dict):
+        data_copy = [{x: data[x]} for x in data]
+    else:
+        data_copy = type(data)((data))
+    for index, item in enumerate(data_copy):
+        if isinstance(item, dict):
+            for key, value in list(item.items()):
+                if key not in settings.WHITELIST_FIELDS:
+                    if isinstance(value, int):
+                        value = str(value)
+                    if isinstance(value, bool):
+                        continue
+                    if value is None:
+                        continue
+                    if hash_keys:
+                        hashed = siphash(settings.HASHING_KEY, key)
+                    else:
+                        hashed = siphash(settings.HASHING_KEY, value)
+                    encoded = base36encode(hashed)
+                    if encoded not in hash_table:
+                        if hash_keys:
+                            hash_table[encoded] = key
+                        else:
+                            hash_table[encoded] = value
+                    if hash_keys:
+                        # Rename the dict key
+                        data[encoded] = data.pop(key)
+                    else:
+                        data[index][key] = encoded
+        elif isinstance(item, str):
+            hashed = siphash(settings.HASHING_KEY, item)
+            encoded = base36encode(hashed)
+            if encoded not in hash_table:
+                hash_table[encoded] = item
+            data[index] = encoded
+    if hash_table:
+        r.hmset(cache, hash_table)
+
+
+def hash_lookup(data_dict):
+    cache = "cache.hash"
+    hash_list = SortedSet()
+    for key, value in data_dict.items():
+        if not value:
+            continue
+        hashes = re.findall("\|([^\|]*)\|", value)  # noqa
+        if not hashes:
+            continue
+        for hash in hashes:
+            hash_list.add(hash)
+
+    if hash_list:
+        values = r.hmget(cache, *hash_list)
+        if not values:
+            return
+        for index, val in enumerate(values):
+            if not val:
+                values[index] = "ERR"
+        values = [x.decode() for x in values]
+        total = dict(zip(hash_list, values))
+        for key in data_dict.keys():
+            for hash in total:
+                if data_dict[key]:
+                    if hash in data_dict[key]:
+                        data_dict[key] = data_dict[key].replace(
+                            f"|{hash}|", total[hash]
+                        )
+
+
+def encrypt_list(data, secret):
+    cipher = Cipher(algorithms.AES(secret), ECB())
+    for index, item in enumerate(data):
+        for key, value in item.items():
+            if key not in settings.WHITELIST_FIELDS:
+                encryptor = cipher.encryptor()
+                if isinstance(value, int):
+                    value = str(value)
+                if isinstance(value, bool):
+                    continue
+                if value is None:
+                    continue
+                decoded = value.encode("utf8", "replace")
+                length = 16 - (len(decoded) % 16)
+                decoded += bytes([length]) * length
+                ct = encryptor.update(decoded) + encryptor.finalize()
+                final_str = b64encode(ct)
+                data[index][key] = final_str.decode("utf-8", "replace")
--- a/core/views/ui/drilldown.py
+++ b/core/views/ui/drilldown.py
@@ -1,5 +1,6 @@
 import json
 import urllib
+from copy import deepcopy

 from django.conf import settings
 from django.http import HttpResponse, JsonResponse
@@ -18,6 +19,7 @@ from core.lib.threshold import (
    get_chans,
    get_users,
 )
+from core.views.helpers import hash_list, hash_lookup
 from core.views.ui.tables import DrilldownTable


@@ -266,58 +268,65 @@ class DrilldownContextModal(APIView):
            if key not in query_params:
                query_params[key] = None

+        # Lookup the hash values but don't disclose them to the user
+        if settings.HASHING:
+            SAFE_PARAMS = deepcopy(query_params)
+            hash_lookup(SAFE_PARAMS)
+
        type = None
        # SUPERUSER BLOCK #
        if request.user.is_superuser:
-            if "type" in query_params:
-                type = query_params["type"]
+            if "type" in SAFE_PARAMS:
+                type = SAFE_PARAMS["type"]
                if type == "znc":
-                    query_params["channel"] = "*status"
+                    SAFE_PARAMS["channel"] = "*status"

            if type in ["query", "notice"]:
-                nicks = [query_params["channel"], query_params["nick"]]
+                nicks = [SAFE_PARAMS["channel"], SAFE_PARAMS["nick"]]
                query = True

            if (
-                query_params["index"] == "int"
-                and query_params["mtype"] == "msg"
+                SAFE_PARAMS["index"] == "int"
+                and SAFE_PARAMS["mtype"] == "msg"
                and not type == "query"
            ):
-                query_params["index"] = "main"
+                SAFE_PARAMS["index"] = "main"

-            if query_params["type"] in ["znc", "auth"]:
+            if SAFE_PARAMS["type"] in ["znc", "auth"]:
                query = True

        # SUPERUSER BLOCK #

        if not request.user.is_superuser:
-            if "index" in query_params:
-                query_params["index"] = "main"
+            if "index" in SAFE_PARAMS:
+                SAFE_PARAMS["index"] = "main"

-        query_params["sorting"] = "desc"
+        SAFE_PARAMS["sorting"] = "desc"

+        annotate = False
+        if SAFE_PARAMS["src"] == "irc":
+            if SAFE_PARAMS["type"] in ["query", "notice", "msg", "highlight"]:
+                annotate = True
        # Create the query with the context helper
        search_query = construct_query(
-            query_params["index"],
-            query_params["net"],
-            query_params["channel"],
-            query_params["src"],
-            query_params["num"],
+            SAFE_PARAMS["index"],
+            SAFE_PARAMS["net"],
+            SAFE_PARAMS["channel"],
+            SAFE_PARAMS["src"],
+            SAFE_PARAMS["num"],
            size,
            type=type,
            nicks=nicks,
        )
-        annotate = False
-        if query_params["src"] == "irc":
-            if query_params["type"] in ["query", "notice", "msg", "highlight"]:
-                annotate = True
+
        results = query_results(
            request,
-            query_params,
+            SAFE_PARAMS,
            annotate=annotate,
            custom_query=search_query,
            reverse=True,
            dedup_fields=["net", "type", "msg"],
+            lookup_hashes=False,
        )
        if "message" in results:
            return render(request, self.template_name, results)
@@ -362,21 +371,43 @@ class ThresholdInfoModal(APIView):
            return JsonResponse({"success": False})
        if "channel" not in request.data:
            return JsonResponse({"success": False})
+
        net = request.data["net"]
        nick = request.data["nick"]
        channel = request.data["channel"]
-        channels = get_chans(net, [nick])
-        users = get_users(net, [channel])
-        num_users = annotate_num_users(net, channels)
-        num_chans = annotate_num_chans(net, users)
+
+        # SAFE BLOCK #
+        # Lookup the hash values but don't disclose them to the user
+        if settings.HASHING:
+            SAFE_PARAMS = request.data.dict()
+            hash_lookup(SAFE_PARAMS)
+        safe_net = SAFE_PARAMS["net"]
+        safe_nick = SAFE_PARAMS["nick"]
+        safe_channel = SAFE_PARAMS["channel"]
+        channels = get_chans(safe_net, [safe_nick])
+        users = get_users(safe_net, [safe_channel])
+        num_users = annotate_num_users(safe_net, channels)
+        num_chans = annotate_num_chans(safe_net, users)
        if channels:
-            inter_users = get_users(net, channels)
+            inter_users = get_users(safe_net, channels)
        else:
            inter_users = []
        if users:
-            inter_chans = get_chans(net, users)
+            inter_chans = get_chans(safe_net, users)
        else:
            inter_chans = []
+        hash_list(inter_chans)
+        hash_list(inter_users)
+
+        hash_list(num_chans, hash_keys=True)
+        hash_list(num_users, hash_keys=True)
+
+        hash_list(channels)
+        hash_list(users)
+
+        # SAFE BLOCK END #
+        nick = nick.replace("|", "")
+        channel = channel.replace("|", "")
        context = {
            "net": net,
            "nick": nick,
--- a/core/views/ui/tables.py
+++ b/core/views/ui/tables.py
@@ -56,7 +56,7 @@ class DrilldownTable(Table):
    sentiment = Column()
    status = Column()
    user = Column()
-    version_sentiment = Column()
+    # version_sentiment = Column()
    exemption = Column()
    num_chans = Column()
    num_users = Column()