2022-09-14 17:32:32 +00:00
|
|
|
import random
|
2022-09-04 20:40:04 +00:00
|
|
|
|
2022-09-07 06:20:30 +00:00
|
|
|
import aioredis
|
2022-09-16 16:09:49 +00:00
|
|
|
import orjson
|
|
|
|
|
|
|
|
# Kafka
|
2022-09-14 17:32:32 +00:00
|
|
|
from aiokafka import AIOKafkaProducer
|
2022-09-02 21:30:45 +00:00
|
|
|
from redis import StrictRedis
|
|
|
|
|
|
|
|
import util
|
2022-09-13 21:17:46 +00:00
|
|
|
|
2022-09-16 16:09:49 +00:00
|
|
|
# KAFKA_TOPIC = "msg"
|
2022-09-13 21:17:46 +00:00
|
|
|
|
2022-09-02 21:30:45 +00:00
|
|
|
log = util.get_logger("db")
|
2022-09-07 06:20:30 +00:00
|
|
|
|
|
|
|
# Redis (legacy)
|
2022-09-04 12:47:32 +00:00
|
|
|
r = StrictRedis(unix_socket_path="/var/run/redis/redis.sock", db=0)
|
2022-09-07 06:20:30 +00:00
|
|
|
|
|
|
|
# AIORedis
|
|
|
|
ar = aioredis.from_url("unix:///var/run/redis/redis.sock", db=0)
|
|
|
|
|
|
|
|
TYPES_MAIN = [
|
|
|
|
"msg",
|
|
|
|
"notice",
|
|
|
|
"action",
|
|
|
|
"part",
|
|
|
|
"join",
|
|
|
|
"kick",
|
|
|
|
"quit",
|
|
|
|
"nick",
|
|
|
|
"mode",
|
|
|
|
"topic",
|
2022-09-07 06:20:30 +00:00
|
|
|
"update",
|
2022-09-07 06:20:30 +00:00
|
|
|
]
|
2022-09-18 12:01:19 +00:00
|
|
|
MAIN_SRC_MAP = {
|
|
|
|
"dis": "main",
|
|
|
|
"irc": "restricted",
|
|
|
|
"4ch": "main",
|
|
|
|
}
|
|
|
|
|
2022-09-07 06:20:30 +00:00
|
|
|
TYPES_META = ["who"]
|
|
|
|
TYPES_INT = ["conn", "highlight", "znc", "query", "self"]
|
2022-09-13 21:17:46 +00:00
|
|
|
KEYPREFIX = "queue."
|
|
|
|
|
|
|
|
|
|
|
|
async def store_kafka_batch(data):
|
2022-09-16 16:09:49 +00:00
|
|
|
log.debug(f"Storing Kafka batch of {len(data)} messages")
|
2022-09-14 17:32:32 +00:00
|
|
|
producer = AIOKafkaProducer(bootstrap_servers="kafka:9092")
|
2022-09-13 21:17:46 +00:00
|
|
|
await producer.start()
|
2022-09-20 22:03:02 +00:00
|
|
|
topicmap = {}
|
2022-09-13 21:17:46 +00:00
|
|
|
for msg in data:
|
|
|
|
if msg["type"] in TYPES_MAIN:
|
2022-09-18 12:02:06 +00:00
|
|
|
# index = "main"
|
2022-09-18 12:01:19 +00:00
|
|
|
index = MAIN_SRC_MAP[msg["src"]]
|
2022-09-16 16:09:49 +00:00
|
|
|
# schema = mc_s.schema_main
|
2022-09-13 21:17:46 +00:00
|
|
|
elif msg["type"] in TYPES_META:
|
|
|
|
index = "meta"
|
2022-09-16 16:09:49 +00:00
|
|
|
# schema = mc_s.schema_meta
|
2022-09-13 21:17:46 +00:00
|
|
|
elif msg["type"] in TYPES_INT:
|
|
|
|
index = "internal"
|
2022-09-16 16:09:49 +00:00
|
|
|
# schema = mc_s.schema_int
|
|
|
|
|
|
|
|
KAFKA_TOPIC = index
|
2022-09-13 21:17:46 +00:00
|
|
|
# normalise fields
|
|
|
|
for key, value in list(msg.items()):
|
|
|
|
if value is None:
|
|
|
|
del msg[key]
|
2022-09-16 16:09:49 +00:00
|
|
|
# if key in schema:
|
|
|
|
# if isinstance(value, int):
|
|
|
|
# if schema[key].startswith("string") or schema[key].startswith(
|
|
|
|
# "text"
|
|
|
|
# ):
|
|
|
|
# msg[key] = str(value)
|
|
|
|
body = orjson.dumps(msg)
|
2022-09-14 17:32:32 +00:00
|
|
|
if "ts" not in msg:
|
2022-09-16 16:09:49 +00:00
|
|
|
raise Exception("No TS in msg")
|
2022-09-20 22:03:02 +00:00
|
|
|
if KAFKA_TOPIC not in topicmap:
|
|
|
|
topicmap[KAFKA_TOPIC] = [body]
|
|
|
|
else:
|
|
|
|
topicmap[KAFKA_TOPIC].append(body)
|
|
|
|
|
|
|
|
for topic, messages in topicmap.items():
|
|
|
|
batch = producer.create_batch()
|
|
|
|
for body in messages:
|
|
|
|
metadata = batch.append(key=None, value=body, timestamp=msg["ts"])
|
|
|
|
if metadata is None:
|
|
|
|
partitions = await producer.partitions_for(topic)
|
|
|
|
partition = random.choice(tuple(partitions))
|
|
|
|
await producer.send_batch(batch, topic, partition=partition)
|
|
|
|
log.debug(
|
|
|
|
(
|
|
|
|
f"{batch.record_count()} messages sent to topic "
|
|
|
|
f"{topic} partition {partition}"
|
|
|
|
)
|
|
|
|
)
|
|
|
|
batch = producer.create_batch()
|
|
|
|
continue
|
|
|
|
|
|
|
|
partitions = await producer.partitions_for(topic)
|
|
|
|
partition = random.choice(tuple(partitions))
|
|
|
|
await producer.send_batch(batch, topic, partition=partition)
|
|
|
|
log.debug(
|
|
|
|
(
|
|
|
|
f"{batch.record_count()} messages sent to topic "
|
|
|
|
f"{topic} partition {partition}"
|
|
|
|
)
|
|
|
|
)
|
2022-09-13 21:17:46 +00:00
|
|
|
await producer.stop()
|
|
|
|
|
2022-09-14 17:32:32 +00:00
|
|
|
|
2022-09-13 21:17:46 +00:00
|
|
|
async def queue_message(msg):
|
|
|
|
"""
|
|
|
|
Queue a message on the Redis buffer.
|
|
|
|
"""
|
|
|
|
src = msg["src"]
|
2022-09-16 16:09:49 +00:00
|
|
|
message = orjson.dumps(msg)
|
2022-09-07 06:20:30 +00:00
|
|
|
|
2022-09-14 17:32:32 +00:00
|
|
|
key = f"{KEYPREFIX}{src}"
|
2022-09-16 16:09:49 +00:00
|
|
|
# log.debug(f"Queueing single message of string length {len(message)}")
|
2022-09-13 21:17:46 +00:00
|
|
|
await ar.sadd(key, message)
|
2022-09-04 20:40:04 +00:00
|
|
|
|
2022-09-14 17:32:32 +00:00
|
|
|
|
2022-09-13 21:17:46 +00:00
|
|
|
async def queue_message_bulk(data):
|
2022-09-02 21:30:45 +00:00
|
|
|
"""
|
2022-09-13 21:17:46 +00:00
|
|
|
Queue multiple messages on the Redis buffer.
|
2022-09-02 21:30:45 +00:00
|
|
|
"""
|
2022-09-16 16:09:49 +00:00
|
|
|
# log.debug(f"Queueing message batch of length {len(data)}")
|
2022-09-13 21:17:46 +00:00
|
|
|
for msg in data:
|
|
|
|
src = msg["src"]
|
2022-09-16 16:09:49 +00:00
|
|
|
message = orjson.dumps(msg)
|
2022-09-13 21:17:46 +00:00
|
|
|
|
2022-09-14 17:32:32 +00:00
|
|
|
key = f"{KEYPREFIX}{src}"
|
2022-09-13 21:17:46 +00:00
|
|
|
await ar.sadd(key, message)
|