Implement hashing fields

This commit is contained in:
2022-08-18 07:20:30 +01:00
parent 3d8519154b
commit c984e70689
14 changed files with 261 additions and 38 deletions

View File

@@ -1,3 +1,15 @@
import re
from base64 import b64encode
from cryptography.hazmat.primitives.ciphers import Cipher, algorithms
from cryptography.hazmat.primitives.ciphers.modes import ECB
from django.conf import settings
from siphashc import siphash
from sortedcontainers import SortedSet
from core import r
def dedup_list(data, check_keys):
"""
Remove duplicate dictionaries from list.
@@ -35,3 +47,124 @@ def dedup_list(data, check_keys):
# # sh-5.1$ python helpers.py
# # 1.0805372429895215
def base36encode(number, alphabet="0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"):
"""Converts an integer to a base36 string."""
if not isinstance(number, (int)):
raise TypeError("number must be an integer")
base36 = ""
sign = ""
if number < 0:
sign = "-"
number = -number
if 0 <= number < len(alphabet):
return sign + alphabet[number]
while number != 0:
number, i = divmod(number, len(alphabet))
base36 = alphabet[i] + base36
return sign + base36
def base36decode(number):
return int(number, 36)
def hash_list(data, hash_keys=False):
"""
Hash a list of dicts or a list with SipHash42.
"""
cache = "cache.hash"
hash_table = {}
if isinstance(data, dict):
data_copy = [{x: data[x]} for x in data]
else:
data_copy = type(data)((data))
for index, item in enumerate(data_copy):
if isinstance(item, dict):
for key, value in list(item.items()):
if key not in settings.WHITELIST_FIELDS:
if isinstance(value, int):
value = str(value)
if isinstance(value, bool):
continue
if value is None:
continue
if hash_keys:
hashed = siphash(settings.HASHING_KEY, key)
else:
hashed = siphash(settings.HASHING_KEY, value)
encoded = base36encode(hashed)
if encoded not in hash_table:
if hash_keys:
hash_table[encoded] = key
else:
hash_table[encoded] = value
if hash_keys:
# Rename the dict key
data[encoded] = data.pop(key)
else:
data[index][key] = encoded
elif isinstance(item, str):
hashed = siphash(settings.HASHING_KEY, item)
encoded = base36encode(hashed)
if encoded not in hash_table:
hash_table[encoded] = item
data[index] = encoded
if hash_table:
r.hmset(cache, hash_table)
def hash_lookup(data_dict):
cache = "cache.hash"
hash_list = SortedSet()
for key, value in data_dict.items():
if not value:
continue
hashes = re.findall("\|([^\|]*)\|", value) # noqa
if not hashes:
continue
for hash in hashes:
hash_list.add(hash)
if hash_list:
values = r.hmget(cache, *hash_list)
if not values:
return
for index, val in enumerate(values):
if not val:
values[index] = "ERR"
values = [x.decode() for x in values]
total = dict(zip(hash_list, values))
for key in data_dict.keys():
for hash in total:
if data_dict[key]:
if hash in data_dict[key]:
data_dict[key] = data_dict[key].replace(
f"|{hash}|", total[hash]
)
def encrypt_list(data, secret):
cipher = Cipher(algorithms.AES(secret), ECB())
for index, item in enumerate(data):
for key, value in item.items():
if key not in settings.WHITELIST_FIELDS:
encryptor = cipher.encryptor()
if isinstance(value, int):
value = str(value)
if isinstance(value, bool):
continue
if value is None:
continue
decoded = value.encode("utf8", "replace")
length = 16 - (len(decoded) % 16)
decoded += bytes([length]) * length
ct = encryptor.update(decoded) + encryptor.finalize()
final_str = b64encode(ct)
data[index][key] = final_str.decode("utf-8", "replace")

View File

@@ -1,5 +1,6 @@
import json
import urllib
from copy import deepcopy
from django.conf import settings
from django.http import HttpResponse, JsonResponse
@@ -18,6 +19,7 @@ from core.lib.threshold import (
get_chans,
get_users,
)
from core.views.helpers import hash_list, hash_lookup
from core.views.ui.tables import DrilldownTable
@@ -266,58 +268,65 @@ class DrilldownContextModal(APIView):
if key not in query_params:
query_params[key] = None
# Lookup the hash values but don't disclose them to the user
if settings.HASHING:
SAFE_PARAMS = deepcopy(query_params)
hash_lookup(SAFE_PARAMS)
type = None
# SUPERUSER BLOCK #
if request.user.is_superuser:
if "type" in query_params:
type = query_params["type"]
if "type" in SAFE_PARAMS:
type = SAFE_PARAMS["type"]
if type == "znc":
query_params["channel"] = "*status"
SAFE_PARAMS["channel"] = "*status"
if type in ["query", "notice"]:
nicks = [query_params["channel"], query_params["nick"]]
nicks = [SAFE_PARAMS["channel"], SAFE_PARAMS["nick"]]
query = True
if (
query_params["index"] == "int"
and query_params["mtype"] == "msg"
SAFE_PARAMS["index"] == "int"
and SAFE_PARAMS["mtype"] == "msg"
and not type == "query"
):
query_params["index"] = "main"
SAFE_PARAMS["index"] = "main"
if query_params["type"] in ["znc", "auth"]:
if SAFE_PARAMS["type"] in ["znc", "auth"]:
query = True
# SUPERUSER BLOCK #
if not request.user.is_superuser:
if "index" in query_params:
query_params["index"] = "main"
if "index" in SAFE_PARAMS:
SAFE_PARAMS["index"] = "main"
query_params["sorting"] = "desc"
SAFE_PARAMS["sorting"] = "desc"
annotate = False
if SAFE_PARAMS["src"] == "irc":
if SAFE_PARAMS["type"] in ["query", "notice", "msg", "highlight"]:
annotate = True
# Create the query with the context helper
search_query = construct_query(
query_params["index"],
query_params["net"],
query_params["channel"],
query_params["src"],
query_params["num"],
SAFE_PARAMS["index"],
SAFE_PARAMS["net"],
SAFE_PARAMS["channel"],
SAFE_PARAMS["src"],
SAFE_PARAMS["num"],
size,
type=type,
nicks=nicks,
)
annotate = False
if query_params["src"] == "irc":
if query_params["type"] in ["query", "notice", "msg", "highlight"]:
annotate = True
results = query_results(
request,
query_params,
SAFE_PARAMS,
annotate=annotate,
custom_query=search_query,
reverse=True,
dedup_fields=["net", "type", "msg"],
lookup_hashes=False,
)
if "message" in results:
return render(request, self.template_name, results)
@@ -362,21 +371,43 @@ class ThresholdInfoModal(APIView):
return JsonResponse({"success": False})
if "channel" not in request.data:
return JsonResponse({"success": False})
net = request.data["net"]
nick = request.data["nick"]
channel = request.data["channel"]
channels = get_chans(net, [nick])
users = get_users(net, [channel])
num_users = annotate_num_users(net, channels)
num_chans = annotate_num_chans(net, users)
# SAFE BLOCK #
# Lookup the hash values but don't disclose them to the user
if settings.HASHING:
SAFE_PARAMS = request.data.dict()
hash_lookup(SAFE_PARAMS)
safe_net = SAFE_PARAMS["net"]
safe_nick = SAFE_PARAMS["nick"]
safe_channel = SAFE_PARAMS["channel"]
channels = get_chans(safe_net, [safe_nick])
users = get_users(safe_net, [safe_channel])
num_users = annotate_num_users(safe_net, channels)
num_chans = annotate_num_chans(safe_net, users)
if channels:
inter_users = get_users(net, channels)
inter_users = get_users(safe_net, channels)
else:
inter_users = []
if users:
inter_chans = get_chans(net, users)
inter_chans = get_chans(safe_net, users)
else:
inter_chans = []
hash_list(inter_chans)
hash_list(inter_users)
hash_list(num_chans, hash_keys=True)
hash_list(num_users, hash_keys=True)
hash_list(channels)
hash_list(users)
# SAFE BLOCK END #
nick = nick.replace("|", "")
channel = channel.replace("|", "")
context = {
"net": net,
"nick": nick,

View File

@@ -56,7 +56,7 @@ class DrilldownTable(Table):
sentiment = Column()
status = Column()
user = Column()
version_sentiment = Column()
# version_sentiment = Column()
exemption = Column()
num_chans = Column()
num_users = Column()