Implement hashing fields
This commit is contained in:
parent
3d8519154b
commit
c984e70689
|
@ -253,7 +253,7 @@ urlpatterns = [
|
|||
name="threshold_irc_network_list",
|
||||
),
|
||||
path(
|
||||
"manage/threshold/irc/msg/<str:net>/<int:num>/",
|
||||
"manage/threshold/irc/msg/<str:net>/<str:num>/",
|
||||
ThresholdIRCSendMessage.as_view(),
|
||||
name="threshold_irc_msg",
|
||||
),
|
||||
|
|
|
@ -1,5 +1,8 @@
|
|||
import stripe
|
||||
from django.conf import settings
|
||||
from redis import StrictRedis
|
||||
|
||||
r = StrictRedis(unix_socket_path="/var/run/redis/redis.sock", db=0)
|
||||
|
||||
if settings.STRIPE_TEST:
|
||||
stripe.api_key = settings.STRIPE_API_KEY_TEST
|
||||
|
|
|
@ -1,9 +1,11 @@
|
|||
from copy import deepcopy
|
||||
|
||||
from django.conf import settings
|
||||
from opensearchpy import OpenSearch
|
||||
from opensearchpy.exceptions import NotFoundError, RequestError
|
||||
|
||||
from core.lib.threshold import annotate_num_chans, annotate_num_users, annotate_online
|
||||
from core.views.helpers import dedup_list
|
||||
from core.views.helpers import dedup_list, encrypt_list, hash_list, hash_lookup
|
||||
|
||||
|
||||
def initialise_opensearch():
|
||||
|
@ -258,6 +260,7 @@ def query_results(
|
|||
reverse=False,
|
||||
dedup=False,
|
||||
dedup_fields=None,
|
||||
lookup_hashes=True,
|
||||
):
|
||||
"""
|
||||
API helper to alter the OpenSearch return format into something
|
||||
|
@ -273,6 +276,13 @@ def query_results(
|
|||
add_top = []
|
||||
add_top_negative = []
|
||||
sort = None
|
||||
|
||||
# Lookup the hash values but don't disclose them to the user
|
||||
if lookup_hashes:
|
||||
if settings.HASHING:
|
||||
query_params = deepcopy(query_params)
|
||||
hash_lookup(query_params)
|
||||
|
||||
if request.user.is_anonymous:
|
||||
sizes = settings.OPENSEARCH_MAIN_SIZES_ANON
|
||||
else:
|
||||
|
@ -397,6 +407,7 @@ def query_results(
|
|||
return {"message": message, "class": message_class}
|
||||
else:
|
||||
index = settings.OPENSEARCH_INDEX_MAIN
|
||||
|
||||
results = run_main_query(
|
||||
client,
|
||||
request.user, # passed through run_main_query to filter_blacklisted
|
||||
|
@ -436,6 +447,15 @@ def query_results(
|
|||
dedup_fields = ["msg", "nick", "ident", "host", "net", "channel"]
|
||||
results_parsed = dedup_list(results_parsed, dedup_fields)
|
||||
|
||||
if settings.ENCRYPTION:
|
||||
encrypt_list(results_parsed, settings.ENCRYPTION_KEY)
|
||||
|
||||
if settings.HASHING:
|
||||
hash_list(results_parsed)
|
||||
|
||||
# process_list(reqults)
|
||||
|
||||
# IMPORTANT! - DO NOT PASS query_params to the user!
|
||||
context = {
|
||||
"object_list": results_parsed,
|
||||
"card": results["hits"]["total"]["value"],
|
||||
|
|
|
@ -44,9 +44,8 @@ $(document).ready(function(){
|
|||
"num_users": "off",
|
||||
"num_chans": "off",
|
||||
"exemption": "off",
|
||||
"version_sentiment": "off",
|
||||
// "version_sentiment": "off",
|
||||
"num": "off",
|
||||
"exemption": "off",
|
||||
"online": "off",
|
||||
"mtype": "off",
|
||||
"realname": "off",
|
||||
|
|
|
@ -49,8 +49,15 @@
|
|||
populateSearch(field, value);
|
||||
});
|
||||
}
|
||||
var plain_fields = ["ts", "date", "time", "sentiment", "version_sentiment", "tokens", "num_chans", "num_users", "tokens", "src", "exemption", "hidden"];
|
||||
function populateSearch(field, value) {
|
||||
var queryElement = document.getElementById('query');
|
||||
|
||||
if (!plain_fields.includes(field)) {
|
||||
if (!value.startsWith("|") && !value.endsWith("|")) {
|
||||
value = `|${value}|`;
|
||||
}
|
||||
}
|
||||
var present = true;
|
||||
if (present == true) {
|
||||
var combinations = [`${field}: "${value}"`,
|
||||
|
|
|
@ -238,16 +238,16 @@
|
|||
class="has-text-grey is-underlined"
|
||||
hx-headers='{"X-CSRFToken": "{{ csrf_token }}"}'
|
||||
hx-post="{% url 'modal_context' %}"
|
||||
hx-vals='{"net": "{{ row.cells.net|escapejs }}",
|
||||
"num": "{{ row.cells.num|escapejs }}",
|
||||
hx-vals='{"net": "|{{ row.cells.net|escapejs }}|",
|
||||
"num": "|{{ row.cells.num|escapejs }}|",
|
||||
"src": "{{ row.cells.src|escapejs }}",
|
||||
"channel": "{{ row.cells.channel|escapejs }}",
|
||||
"channel": "|{{ row.cells.channel|escapejs }}|",
|
||||
"time": "{{ row.cells.time|escapejs }}",
|
||||
"date": "{{ row.cells.date|escapejs }}",
|
||||
"index": "{{ params.index }}",
|
||||
"type": "{{ row.cells.type }}",
|
||||
"type": "|{{ row.cells.type }}|",
|
||||
"mtype": "{{ row.cells.mtype }}",
|
||||
"nick": "{{ row.cells.nick|escapejs }}",
|
||||
"nick": "|{{ row.cells.nick|escapejs }}|",
|
||||
"dedup": "{{ params.dedup }}"}'
|
||||
hx-target="#modals-here"
|
||||
hx-trigger="click"
|
||||
|
@ -281,7 +281,7 @@
|
|||
<button
|
||||
hx-headers='{"X-CSRFToken": "{{ csrf_token }}"}'
|
||||
hx-post="{% url 'modal_drilldown' %}"
|
||||
hx-vals='{"net": "{{ row.cells.net }}", "nick": "{{ row.cells.nick }}", "channel": "{{ row.cells.channel }}"}'
|
||||
hx-vals='{"net": "|{{ row.cells.net }}|", "nick": "|{{ row.cells.nick }}|", "channel": "|{{ row.cells.channel }}|"}'
|
||||
hx-target="#modals-here"
|
||||
hx-trigger="click"
|
||||
class="button is-small">
|
||||
|
|
|
@ -1,3 +1,15 @@
|
|||
import re
|
||||
from base64 import b64encode
|
||||
|
||||
from cryptography.hazmat.primitives.ciphers import Cipher, algorithms
|
||||
from cryptography.hazmat.primitives.ciphers.modes import ECB
|
||||
from django.conf import settings
|
||||
from siphashc import siphash
|
||||
from sortedcontainers import SortedSet
|
||||
|
||||
from core import r
|
||||
|
||||
|
||||
def dedup_list(data, check_keys):
|
||||
"""
|
||||
Remove duplicate dictionaries from list.
|
||||
|
@ -35,3 +47,124 @@ def dedup_list(data, check_keys):
|
|||
|
||||
# # sh-5.1$ python helpers.py
|
||||
# # 1.0805372429895215
|
||||
|
||||
|
||||
def base36encode(number, alphabet="0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"):
|
||||
"""Converts an integer to a base36 string."""
|
||||
if not isinstance(number, (int)):
|
||||
raise TypeError("number must be an integer")
|
||||
|
||||
base36 = ""
|
||||
sign = ""
|
||||
|
||||
if number < 0:
|
||||
sign = "-"
|
||||
number = -number
|
||||
|
||||
if 0 <= number < len(alphabet):
|
||||
return sign + alphabet[number]
|
||||
|
||||
while number != 0:
|
||||
number, i = divmod(number, len(alphabet))
|
||||
base36 = alphabet[i] + base36
|
||||
|
||||
return sign + base36
|
||||
|
||||
|
||||
def base36decode(number):
|
||||
return int(number, 36)
|
||||
|
||||
|
||||
def hash_list(data, hash_keys=False):
|
||||
"""
|
||||
Hash a list of dicts or a list with SipHash42.
|
||||
"""
|
||||
cache = "cache.hash"
|
||||
hash_table = {}
|
||||
if isinstance(data, dict):
|
||||
data_copy = [{x: data[x]} for x in data]
|
||||
else:
|
||||
data_copy = type(data)((data))
|
||||
for index, item in enumerate(data_copy):
|
||||
if isinstance(item, dict):
|
||||
for key, value in list(item.items()):
|
||||
if key not in settings.WHITELIST_FIELDS:
|
||||
if isinstance(value, int):
|
||||
value = str(value)
|
||||
if isinstance(value, bool):
|
||||
continue
|
||||
if value is None:
|
||||
continue
|
||||
if hash_keys:
|
||||
hashed = siphash(settings.HASHING_KEY, key)
|
||||
else:
|
||||
hashed = siphash(settings.HASHING_KEY, value)
|
||||
encoded = base36encode(hashed)
|
||||
if encoded not in hash_table:
|
||||
if hash_keys:
|
||||
hash_table[encoded] = key
|
||||
else:
|
||||
hash_table[encoded] = value
|
||||
if hash_keys:
|
||||
# Rename the dict key
|
||||
data[encoded] = data.pop(key)
|
||||
else:
|
||||
data[index][key] = encoded
|
||||
elif isinstance(item, str):
|
||||
hashed = siphash(settings.HASHING_KEY, item)
|
||||
encoded = base36encode(hashed)
|
||||
if encoded not in hash_table:
|
||||
hash_table[encoded] = item
|
||||
data[index] = encoded
|
||||
if hash_table:
|
||||
r.hmset(cache, hash_table)
|
||||
|
||||
|
||||
def hash_lookup(data_dict):
|
||||
cache = "cache.hash"
|
||||
hash_list = SortedSet()
|
||||
for key, value in data_dict.items():
|
||||
if not value:
|
||||
continue
|
||||
hashes = re.findall("\|([^\|]*)\|", value) # noqa
|
||||
if not hashes:
|
||||
continue
|
||||
for hash in hashes:
|
||||
hash_list.add(hash)
|
||||
|
||||
if hash_list:
|
||||
values = r.hmget(cache, *hash_list)
|
||||
if not values:
|
||||
return
|
||||
for index, val in enumerate(values):
|
||||
if not val:
|
||||
values[index] = "ERR"
|
||||
values = [x.decode() for x in values]
|
||||
total = dict(zip(hash_list, values))
|
||||
for key in data_dict.keys():
|
||||
for hash in total:
|
||||
if data_dict[key]:
|
||||
if hash in data_dict[key]:
|
||||
data_dict[key] = data_dict[key].replace(
|
||||
f"|{hash}|", total[hash]
|
||||
)
|
||||
|
||||
|
||||
def encrypt_list(data, secret):
|
||||
cipher = Cipher(algorithms.AES(secret), ECB())
|
||||
for index, item in enumerate(data):
|
||||
for key, value in item.items():
|
||||
if key not in settings.WHITELIST_FIELDS:
|
||||
encryptor = cipher.encryptor()
|
||||
if isinstance(value, int):
|
||||
value = str(value)
|
||||
if isinstance(value, bool):
|
||||
continue
|
||||
if value is None:
|
||||
continue
|
||||
decoded = value.encode("utf8", "replace")
|
||||
length = 16 - (len(decoded) % 16)
|
||||
decoded += bytes([length]) * length
|
||||
ct = encryptor.update(decoded) + encryptor.finalize()
|
||||
final_str = b64encode(ct)
|
||||
data[index][key] = final_str.decode("utf-8", "replace")
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
import json
|
||||
import urllib
|
||||
from copy import deepcopy
|
||||
|
||||
from django.conf import settings
|
||||
from django.http import HttpResponse, JsonResponse
|
||||
|
@ -18,6 +19,7 @@ from core.lib.threshold import (
|
|||
get_chans,
|
||||
get_users,
|
||||
)
|
||||
from core.views.helpers import hash_list, hash_lookup
|
||||
from core.views.ui.tables import DrilldownTable
|
||||
|
||||
|
||||
|
@ -266,58 +268,65 @@ class DrilldownContextModal(APIView):
|
|||
if key not in query_params:
|
||||
query_params[key] = None
|
||||
|
||||
# Lookup the hash values but don't disclose them to the user
|
||||
if settings.HASHING:
|
||||
SAFE_PARAMS = deepcopy(query_params)
|
||||
hash_lookup(SAFE_PARAMS)
|
||||
|
||||
type = None
|
||||
# SUPERUSER BLOCK #
|
||||
if request.user.is_superuser:
|
||||
if "type" in query_params:
|
||||
type = query_params["type"]
|
||||
if "type" in SAFE_PARAMS:
|
||||
type = SAFE_PARAMS["type"]
|
||||
if type == "znc":
|
||||
query_params["channel"] = "*status"
|
||||
SAFE_PARAMS["channel"] = "*status"
|
||||
|
||||
if type in ["query", "notice"]:
|
||||
nicks = [query_params["channel"], query_params["nick"]]
|
||||
nicks = [SAFE_PARAMS["channel"], SAFE_PARAMS["nick"]]
|
||||
query = True
|
||||
|
||||
if (
|
||||
query_params["index"] == "int"
|
||||
and query_params["mtype"] == "msg"
|
||||
SAFE_PARAMS["index"] == "int"
|
||||
and SAFE_PARAMS["mtype"] == "msg"
|
||||
and not type == "query"
|
||||
):
|
||||
query_params["index"] = "main"
|
||||
SAFE_PARAMS["index"] = "main"
|
||||
|
||||
if query_params["type"] in ["znc", "auth"]:
|
||||
if SAFE_PARAMS["type"] in ["znc", "auth"]:
|
||||
query = True
|
||||
|
||||
# SUPERUSER BLOCK #
|
||||
|
||||
if not request.user.is_superuser:
|
||||
if "index" in query_params:
|
||||
query_params["index"] = "main"
|
||||
if "index" in SAFE_PARAMS:
|
||||
SAFE_PARAMS["index"] = "main"
|
||||
|
||||
query_params["sorting"] = "desc"
|
||||
SAFE_PARAMS["sorting"] = "desc"
|
||||
|
||||
annotate = False
|
||||
if SAFE_PARAMS["src"] == "irc":
|
||||
if SAFE_PARAMS["type"] in ["query", "notice", "msg", "highlight"]:
|
||||
annotate = True
|
||||
# Create the query with the context helper
|
||||
search_query = construct_query(
|
||||
query_params["index"],
|
||||
query_params["net"],
|
||||
query_params["channel"],
|
||||
query_params["src"],
|
||||
query_params["num"],
|
||||
SAFE_PARAMS["index"],
|
||||
SAFE_PARAMS["net"],
|
||||
SAFE_PARAMS["channel"],
|
||||
SAFE_PARAMS["src"],
|
||||
SAFE_PARAMS["num"],
|
||||
size,
|
||||
type=type,
|
||||
nicks=nicks,
|
||||
)
|
||||
annotate = False
|
||||
if query_params["src"] == "irc":
|
||||
if query_params["type"] in ["query", "notice", "msg", "highlight"]:
|
||||
annotate = True
|
||||
|
||||
results = query_results(
|
||||
request,
|
||||
query_params,
|
||||
SAFE_PARAMS,
|
||||
annotate=annotate,
|
||||
custom_query=search_query,
|
||||
reverse=True,
|
||||
dedup_fields=["net", "type", "msg"],
|
||||
lookup_hashes=False,
|
||||
)
|
||||
if "message" in results:
|
||||
return render(request, self.template_name, results)
|
||||
|
@ -362,21 +371,43 @@ class ThresholdInfoModal(APIView):
|
|||
return JsonResponse({"success": False})
|
||||
if "channel" not in request.data:
|
||||
return JsonResponse({"success": False})
|
||||
|
||||
net = request.data["net"]
|
||||
nick = request.data["nick"]
|
||||
channel = request.data["channel"]
|
||||
channels = get_chans(net, [nick])
|
||||
users = get_users(net, [channel])
|
||||
num_users = annotate_num_users(net, channels)
|
||||
num_chans = annotate_num_chans(net, users)
|
||||
|
||||
# SAFE BLOCK #
|
||||
# Lookup the hash values but don't disclose them to the user
|
||||
if settings.HASHING:
|
||||
SAFE_PARAMS = request.data.dict()
|
||||
hash_lookup(SAFE_PARAMS)
|
||||
safe_net = SAFE_PARAMS["net"]
|
||||
safe_nick = SAFE_PARAMS["nick"]
|
||||
safe_channel = SAFE_PARAMS["channel"]
|
||||
channels = get_chans(safe_net, [safe_nick])
|
||||
users = get_users(safe_net, [safe_channel])
|
||||
num_users = annotate_num_users(safe_net, channels)
|
||||
num_chans = annotate_num_chans(safe_net, users)
|
||||
if channels:
|
||||
inter_users = get_users(net, channels)
|
||||
inter_users = get_users(safe_net, channels)
|
||||
else:
|
||||
inter_users = []
|
||||
if users:
|
||||
inter_chans = get_chans(net, users)
|
||||
inter_chans = get_chans(safe_net, users)
|
||||
else:
|
||||
inter_chans = []
|
||||
hash_list(inter_chans)
|
||||
hash_list(inter_users)
|
||||
|
||||
hash_list(num_chans, hash_keys=True)
|
||||
hash_list(num_users, hash_keys=True)
|
||||
|
||||
hash_list(channels)
|
||||
hash_list(users)
|
||||
|
||||
# SAFE BLOCK END #
|
||||
nick = nick.replace("|", "")
|
||||
channel = channel.replace("|", "")
|
||||
context = {
|
||||
"net": net,
|
||||
"nick": nick,
|
||||
|
|
|
@ -56,7 +56,7 @@ class DrilldownTable(Table):
|
|||
sentiment = Column()
|
||||
status = Column()
|
||||
user = Column()
|
||||
version_sentiment = Column()
|
||||
# version_sentiment = Column()
|
||||
exemption = Column()
|
||||
num_chans = Column()
|
||||
num_users = Column()
|
||||
|
|
|
@ -12,6 +12,8 @@ services:
|
|||
- "${NEPTUNE_PORT}:8000"
|
||||
env_file:
|
||||
- .env
|
||||
volumes_from:
|
||||
- tmp
|
||||
|
||||
# pyroscope:
|
||||
# image: pyroscope/pyroscope
|
||||
|
@ -22,6 +24,20 @@ services:
|
|||
# command:
|
||||
# - 'server'
|
||||
|
||||
tmp:
|
||||
image: busybox
|
||||
command: chmod -R 777 /var/run/redis
|
||||
volumes:
|
||||
- /var/run/redis
|
||||
|
||||
redis:
|
||||
image: redis
|
||||
command: redis-server /etc/redis.conf
|
||||
volumes:
|
||||
- ${PORTAINER_GIT_DIR}/docker/redis.conf:/etc/redis.conf
|
||||
volumes_from:
|
||||
- tmp
|
||||
|
||||
networks:
|
||||
default:
|
||||
external:
|
||||
|
|
|
@ -10,3 +10,7 @@ uwsgi
|
|||
django-tables2
|
||||
django-tables2-bulma-template
|
||||
django-htmx
|
||||
cryptography
|
||||
siphashc
|
||||
redis
|
||||
sortedcontainers
|
||||
|
|
|
@ -0,0 +1,2 @@
|
|||
unixsocket /var/run/redis/redis.sock
|
||||
unixsocketperm 777
|
|
@ -9,3 +9,7 @@ numpy
|
|||
django-tables2
|
||||
django-tables2-bulma-template
|
||||
django-htmx
|
||||
cryptography
|
||||
siphashc
|
||||
redis
|
||||
sortedcontainers
|
||||
|
|
|
@ -10,3 +10,7 @@ numpy
|
|||
django-tables2
|
||||
django-tables2-bulma-template
|
||||
django-htmx
|
||||
cryptography
|
||||
siphashc
|
||||
redis
|
||||
sortedcontainers
|
||||
|
|
Loading…
Reference in New Issue