Implement hashing fields

This commit is contained in:
Mark Veidemanis 2022-08-18 07:20:30 +01:00
parent 3d8519154b
commit c984e70689
Signed by: m
GPG Key ID: 5ACFCEED46C0904F
14 changed files with 261 additions and 38 deletions

View File

@ -253,7 +253,7 @@ urlpatterns = [
name="threshold_irc_network_list", name="threshold_irc_network_list",
), ),
path( path(
"manage/threshold/irc/msg/<str:net>/<int:num>/", "manage/threshold/irc/msg/<str:net>/<str:num>/",
ThresholdIRCSendMessage.as_view(), ThresholdIRCSendMessage.as_view(),
name="threshold_irc_msg", name="threshold_irc_msg",
), ),

View File

@ -1,5 +1,8 @@
import stripe import stripe
from django.conf import settings from django.conf import settings
from redis import StrictRedis
r = StrictRedis(unix_socket_path="/var/run/redis/redis.sock", db=0)
if settings.STRIPE_TEST: if settings.STRIPE_TEST:
stripe.api_key = settings.STRIPE_API_KEY_TEST stripe.api_key = settings.STRIPE_API_KEY_TEST

View File

@ -1,9 +1,11 @@
from copy import deepcopy
from django.conf import settings from django.conf import settings
from opensearchpy import OpenSearch from opensearchpy import OpenSearch
from opensearchpy.exceptions import NotFoundError, RequestError from opensearchpy.exceptions import NotFoundError, RequestError
from core.lib.threshold import annotate_num_chans, annotate_num_users, annotate_online from core.lib.threshold import annotate_num_chans, annotate_num_users, annotate_online
from core.views.helpers import dedup_list from core.views.helpers import dedup_list, encrypt_list, hash_list, hash_lookup
def initialise_opensearch(): def initialise_opensearch():
@ -258,6 +260,7 @@ def query_results(
reverse=False, reverse=False,
dedup=False, dedup=False,
dedup_fields=None, dedup_fields=None,
lookup_hashes=True,
): ):
""" """
API helper to alter the OpenSearch return format into something API helper to alter the OpenSearch return format into something
@ -273,6 +276,13 @@ def query_results(
add_top = [] add_top = []
add_top_negative = [] add_top_negative = []
sort = None sort = None
# Lookup the hash values but don't disclose them to the user
if lookup_hashes:
if settings.HASHING:
query_params = deepcopy(query_params)
hash_lookup(query_params)
if request.user.is_anonymous: if request.user.is_anonymous:
sizes = settings.OPENSEARCH_MAIN_SIZES_ANON sizes = settings.OPENSEARCH_MAIN_SIZES_ANON
else: else:
@ -397,6 +407,7 @@ def query_results(
return {"message": message, "class": message_class} return {"message": message, "class": message_class}
else: else:
index = settings.OPENSEARCH_INDEX_MAIN index = settings.OPENSEARCH_INDEX_MAIN
results = run_main_query( results = run_main_query(
client, client,
request.user, # passed through run_main_query to filter_blacklisted request.user, # passed through run_main_query to filter_blacklisted
@ -436,6 +447,15 @@ def query_results(
dedup_fields = ["msg", "nick", "ident", "host", "net", "channel"] dedup_fields = ["msg", "nick", "ident", "host", "net", "channel"]
results_parsed = dedup_list(results_parsed, dedup_fields) results_parsed = dedup_list(results_parsed, dedup_fields)
if settings.ENCRYPTION:
encrypt_list(results_parsed, settings.ENCRYPTION_KEY)
if settings.HASHING:
hash_list(results_parsed)
# process_list(reqults)
# IMPORTANT! - DO NOT PASS query_params to the user!
context = { context = {
"object_list": results_parsed, "object_list": results_parsed,
"card": results["hits"]["total"]["value"], "card": results["hits"]["total"]["value"],

View File

@ -44,9 +44,8 @@ $(document).ready(function(){
"num_users": "off", "num_users": "off",
"num_chans": "off", "num_chans": "off",
"exemption": "off", "exemption": "off",
"version_sentiment": "off", // "version_sentiment": "off",
"num": "off", "num": "off",
"exemption": "off",
"online": "off", "online": "off",
"mtype": "off", "mtype": "off",
"realname": "off", "realname": "off",

View File

@ -49,8 +49,15 @@
populateSearch(field, value); populateSearch(field, value);
}); });
} }
var plain_fields = ["ts", "date", "time", "sentiment", "version_sentiment", "tokens", "num_chans", "num_users", "tokens", "src", "exemption", "hidden"];
function populateSearch(field, value) { function populateSearch(field, value) {
var queryElement = document.getElementById('query'); var queryElement = document.getElementById('query');
if (!plain_fields.includes(field)) {
if (!value.startsWith("|") && !value.endsWith("|")) {
value = `|${value}|`;
}
}
var present = true; var present = true;
if (present == true) { if (present == true) {
var combinations = [`${field}: "${value}"`, var combinations = [`${field}: "${value}"`,

View File

@ -238,16 +238,16 @@
class="has-text-grey is-underlined" class="has-text-grey is-underlined"
hx-headers='{"X-CSRFToken": "{{ csrf_token }}"}' hx-headers='{"X-CSRFToken": "{{ csrf_token }}"}'
hx-post="{% url 'modal_context' %}" hx-post="{% url 'modal_context' %}"
hx-vals='{"net": "{{ row.cells.net|escapejs }}", hx-vals='{"net": "|{{ row.cells.net|escapejs }}|",
"num": "{{ row.cells.num|escapejs }}", "num": "|{{ row.cells.num|escapejs }}|",
"src": "{{ row.cells.src|escapejs }}", "src": "{{ row.cells.src|escapejs }}",
"channel": "{{ row.cells.channel|escapejs }}", "channel": "|{{ row.cells.channel|escapejs }}|",
"time": "{{ row.cells.time|escapejs }}", "time": "{{ row.cells.time|escapejs }}",
"date": "{{ row.cells.date|escapejs }}", "date": "{{ row.cells.date|escapejs }}",
"index": "{{ params.index }}", "index": "{{ params.index }}",
"type": "{{ row.cells.type }}", "type": "|{{ row.cells.type }}|",
"mtype": "{{ row.cells.mtype }}", "mtype": "{{ row.cells.mtype }}",
"nick": "{{ row.cells.nick|escapejs }}", "nick": "|{{ row.cells.nick|escapejs }}|",
"dedup": "{{ params.dedup }}"}' "dedup": "{{ params.dedup }}"}'
hx-target="#modals-here" hx-target="#modals-here"
hx-trigger="click" hx-trigger="click"
@ -281,7 +281,7 @@
<button <button
hx-headers='{"X-CSRFToken": "{{ csrf_token }}"}' hx-headers='{"X-CSRFToken": "{{ csrf_token }}"}'
hx-post="{% url 'modal_drilldown' %}" hx-post="{% url 'modal_drilldown' %}"
hx-vals='{"net": "{{ row.cells.net }}", "nick": "{{ row.cells.nick }}", "channel": "{{ row.cells.channel }}"}' hx-vals='{"net": "|{{ row.cells.net }}|", "nick": "|{{ row.cells.nick }}|", "channel": "|{{ row.cells.channel }}|"}'
hx-target="#modals-here" hx-target="#modals-here"
hx-trigger="click" hx-trigger="click"
class="button is-small"> class="button is-small">

View File

@ -1,3 +1,15 @@
import re
from base64 import b64encode
from cryptography.hazmat.primitives.ciphers import Cipher, algorithms
from cryptography.hazmat.primitives.ciphers.modes import ECB
from django.conf import settings
from siphashc import siphash
from sortedcontainers import SortedSet
from core import r
def dedup_list(data, check_keys): def dedup_list(data, check_keys):
""" """
Remove duplicate dictionaries from list. Remove duplicate dictionaries from list.
@ -35,3 +47,124 @@ def dedup_list(data, check_keys):
# # sh-5.1$ python helpers.py # # sh-5.1$ python helpers.py
# # 1.0805372429895215 # # 1.0805372429895215
def base36encode(number, alphabet="0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"):
"""Converts an integer to a base36 string."""
if not isinstance(number, (int)):
raise TypeError("number must be an integer")
base36 = ""
sign = ""
if number < 0:
sign = "-"
number = -number
if 0 <= number < len(alphabet):
return sign + alphabet[number]
while number != 0:
number, i = divmod(number, len(alphabet))
base36 = alphabet[i] + base36
return sign + base36
def base36decode(number):
return int(number, 36)
def hash_list(data, hash_keys=False):
"""
Hash a list of dicts or a list with SipHash42.
"""
cache = "cache.hash"
hash_table = {}
if isinstance(data, dict):
data_copy = [{x: data[x]} for x in data]
else:
data_copy = type(data)((data))
for index, item in enumerate(data_copy):
if isinstance(item, dict):
for key, value in list(item.items()):
if key not in settings.WHITELIST_FIELDS:
if isinstance(value, int):
value = str(value)
if isinstance(value, bool):
continue
if value is None:
continue
if hash_keys:
hashed = siphash(settings.HASHING_KEY, key)
else:
hashed = siphash(settings.HASHING_KEY, value)
encoded = base36encode(hashed)
if encoded not in hash_table:
if hash_keys:
hash_table[encoded] = key
else:
hash_table[encoded] = value
if hash_keys:
# Rename the dict key
data[encoded] = data.pop(key)
else:
data[index][key] = encoded
elif isinstance(item, str):
hashed = siphash(settings.HASHING_KEY, item)
encoded = base36encode(hashed)
if encoded not in hash_table:
hash_table[encoded] = item
data[index] = encoded
if hash_table:
r.hmset(cache, hash_table)
def hash_lookup(data_dict):
cache = "cache.hash"
hash_list = SortedSet()
for key, value in data_dict.items():
if not value:
continue
hashes = re.findall("\|([^\|]*)\|", value) # noqa
if not hashes:
continue
for hash in hashes:
hash_list.add(hash)
if hash_list:
values = r.hmget(cache, *hash_list)
if not values:
return
for index, val in enumerate(values):
if not val:
values[index] = "ERR"
values = [x.decode() for x in values]
total = dict(zip(hash_list, values))
for key in data_dict.keys():
for hash in total:
if data_dict[key]:
if hash in data_dict[key]:
data_dict[key] = data_dict[key].replace(
f"|{hash}|", total[hash]
)
def encrypt_list(data, secret):
cipher = Cipher(algorithms.AES(secret), ECB())
for index, item in enumerate(data):
for key, value in item.items():
if key not in settings.WHITELIST_FIELDS:
encryptor = cipher.encryptor()
if isinstance(value, int):
value = str(value)
if isinstance(value, bool):
continue
if value is None:
continue
decoded = value.encode("utf8", "replace")
length = 16 - (len(decoded) % 16)
decoded += bytes([length]) * length
ct = encryptor.update(decoded) + encryptor.finalize()
final_str = b64encode(ct)
data[index][key] = final_str.decode("utf-8", "replace")

View File

@ -1,5 +1,6 @@
import json import json
import urllib import urllib
from copy import deepcopy
from django.conf import settings from django.conf import settings
from django.http import HttpResponse, JsonResponse from django.http import HttpResponse, JsonResponse
@ -18,6 +19,7 @@ from core.lib.threshold import (
get_chans, get_chans,
get_users, get_users,
) )
from core.views.helpers import hash_list, hash_lookup
from core.views.ui.tables import DrilldownTable from core.views.ui.tables import DrilldownTable
@ -266,58 +268,65 @@ class DrilldownContextModal(APIView):
if key not in query_params: if key not in query_params:
query_params[key] = None query_params[key] = None
# Lookup the hash values but don't disclose them to the user
if settings.HASHING:
SAFE_PARAMS = deepcopy(query_params)
hash_lookup(SAFE_PARAMS)
type = None type = None
# SUPERUSER BLOCK # # SUPERUSER BLOCK #
if request.user.is_superuser: if request.user.is_superuser:
if "type" in query_params: if "type" in SAFE_PARAMS:
type = query_params["type"] type = SAFE_PARAMS["type"]
if type == "znc": if type == "znc":
query_params["channel"] = "*status" SAFE_PARAMS["channel"] = "*status"
if type in ["query", "notice"]: if type in ["query", "notice"]:
nicks = [query_params["channel"], query_params["nick"]] nicks = [SAFE_PARAMS["channel"], SAFE_PARAMS["nick"]]
query = True query = True
if ( if (
query_params["index"] == "int" SAFE_PARAMS["index"] == "int"
and query_params["mtype"] == "msg" and SAFE_PARAMS["mtype"] == "msg"
and not type == "query" and not type == "query"
): ):
query_params["index"] = "main" SAFE_PARAMS["index"] = "main"
if query_params["type"] in ["znc", "auth"]: if SAFE_PARAMS["type"] in ["znc", "auth"]:
query = True query = True
# SUPERUSER BLOCK # # SUPERUSER BLOCK #
if not request.user.is_superuser: if not request.user.is_superuser:
if "index" in query_params: if "index" in SAFE_PARAMS:
query_params["index"] = "main" SAFE_PARAMS["index"] = "main"
query_params["sorting"] = "desc" SAFE_PARAMS["sorting"] = "desc"
annotate = False
if SAFE_PARAMS["src"] == "irc":
if SAFE_PARAMS["type"] in ["query", "notice", "msg", "highlight"]:
annotate = True
# Create the query with the context helper # Create the query with the context helper
search_query = construct_query( search_query = construct_query(
query_params["index"], SAFE_PARAMS["index"],
query_params["net"], SAFE_PARAMS["net"],
query_params["channel"], SAFE_PARAMS["channel"],
query_params["src"], SAFE_PARAMS["src"],
query_params["num"], SAFE_PARAMS["num"],
size, size,
type=type, type=type,
nicks=nicks, nicks=nicks,
) )
annotate = False
if query_params["src"] == "irc":
if query_params["type"] in ["query", "notice", "msg", "highlight"]:
annotate = True
results = query_results( results = query_results(
request, request,
query_params, SAFE_PARAMS,
annotate=annotate, annotate=annotate,
custom_query=search_query, custom_query=search_query,
reverse=True, reverse=True,
dedup_fields=["net", "type", "msg"], dedup_fields=["net", "type", "msg"],
lookup_hashes=False,
) )
if "message" in results: if "message" in results:
return render(request, self.template_name, results) return render(request, self.template_name, results)
@ -362,21 +371,43 @@ class ThresholdInfoModal(APIView):
return JsonResponse({"success": False}) return JsonResponse({"success": False})
if "channel" not in request.data: if "channel" not in request.data:
return JsonResponse({"success": False}) return JsonResponse({"success": False})
net = request.data["net"] net = request.data["net"]
nick = request.data["nick"] nick = request.data["nick"]
channel = request.data["channel"] channel = request.data["channel"]
channels = get_chans(net, [nick])
users = get_users(net, [channel]) # SAFE BLOCK #
num_users = annotate_num_users(net, channels) # Lookup the hash values but don't disclose them to the user
num_chans = annotate_num_chans(net, users) if settings.HASHING:
SAFE_PARAMS = request.data.dict()
hash_lookup(SAFE_PARAMS)
safe_net = SAFE_PARAMS["net"]
safe_nick = SAFE_PARAMS["nick"]
safe_channel = SAFE_PARAMS["channel"]
channels = get_chans(safe_net, [safe_nick])
users = get_users(safe_net, [safe_channel])
num_users = annotate_num_users(safe_net, channels)
num_chans = annotate_num_chans(safe_net, users)
if channels: if channels:
inter_users = get_users(net, channels) inter_users = get_users(safe_net, channels)
else: else:
inter_users = [] inter_users = []
if users: if users:
inter_chans = get_chans(net, users) inter_chans = get_chans(safe_net, users)
else: else:
inter_chans = [] inter_chans = []
hash_list(inter_chans)
hash_list(inter_users)
hash_list(num_chans, hash_keys=True)
hash_list(num_users, hash_keys=True)
hash_list(channels)
hash_list(users)
# SAFE BLOCK END #
nick = nick.replace("|", "")
channel = channel.replace("|", "")
context = { context = {
"net": net, "net": net,
"nick": nick, "nick": nick,

View File

@ -56,7 +56,7 @@ class DrilldownTable(Table):
sentiment = Column() sentiment = Column()
status = Column() status = Column()
user = Column() user = Column()
version_sentiment = Column() # version_sentiment = Column()
exemption = Column() exemption = Column()
num_chans = Column() num_chans = Column()
num_users = Column() num_users = Column()

View File

@ -12,6 +12,8 @@ services:
- "${NEPTUNE_PORT}:8000" - "${NEPTUNE_PORT}:8000"
env_file: env_file:
- .env - .env
volumes_from:
- tmp
# pyroscope: # pyroscope:
# image: pyroscope/pyroscope # image: pyroscope/pyroscope
@ -22,6 +24,20 @@ services:
# command: # command:
# - 'server' # - 'server'
tmp:
image: busybox
command: chmod -R 777 /var/run/redis
volumes:
- /var/run/redis
redis:
image: redis
command: redis-server /etc/redis.conf
volumes:
- ${PORTAINER_GIT_DIR}/docker/redis.conf:/etc/redis.conf
volumes_from:
- tmp
networks: networks:
default: default:
external: external:

View File

@ -10,3 +10,7 @@ uwsgi
django-tables2 django-tables2
django-tables2-bulma-template django-tables2-bulma-template
django-htmx django-htmx
cryptography
siphashc
redis
sortedcontainers

2
docker/redis.conf Normal file
View File

@ -0,0 +1,2 @@
unixsocket /var/run/redis/redis.sock
unixsocketperm 777

View File

@ -9,3 +9,7 @@ numpy
django-tables2 django-tables2
django-tables2-bulma-template django-tables2-bulma-template
django-htmx django-htmx
cryptography
siphashc
redis
sortedcontainers

View File

@ -10,3 +10,7 @@ numpy
django-tables2 django-tables2
django-tables2-bulma-template django-tables2-bulma-template
django-htmx django-htmx
cryptography
siphashc
redis
sortedcontainers