|
|
@ -1,20 +1,17 @@
|
|
|
|
import random
|
|
|
|
|
|
|
|
from os import getenv
|
|
|
|
from os import getenv
|
|
|
|
|
|
|
|
|
|
|
|
import aioredis
|
|
|
|
import aioredis
|
|
|
|
import orjson
|
|
|
|
import orjson
|
|
|
|
import redis
|
|
|
|
import redis
|
|
|
|
|
|
|
|
|
|
|
|
# Kafka
|
|
|
|
# Elasticsearch
|
|
|
|
from aiokafka import AIOKafkaProducer
|
|
|
|
from elasticsearch import AsyncElasticsearch
|
|
|
|
|
|
|
|
|
|
|
|
import util
|
|
|
|
import util
|
|
|
|
|
|
|
|
|
|
|
|
trues = ("true", "1", "t", True)
|
|
|
|
trues = ("true", "1", "t", True)
|
|
|
|
|
|
|
|
|
|
|
|
MONOLITH_KAFKA_ENABLED = getenv("MONOLITH_KAFKA_ENABLED", "false").lower() in trues
|
|
|
|
# INDEX = "msg"
|
|
|
|
|
|
|
|
|
|
|
|
# KAFKA_TOPIC = "msg"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
log = util.get_logger("db")
|
|
|
|
log = util.get_logger("db")
|
|
|
|
|
|
|
|
|
|
|
@ -47,15 +44,54 @@ TYPES_META = ["who"]
|
|
|
|
TYPES_INT = ["conn", "highlight", "znc", "query", "self"]
|
|
|
|
TYPES_INT = ["conn", "highlight", "znc", "query", "self"]
|
|
|
|
KEYNAME = "queue"
|
|
|
|
KEYNAME = "queue"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
ELASTICSEARCH_USERNAME = getenv("ELASTICSEARCH_USERNAME", "elastic")
|
|
|
|
|
|
|
|
ELASTICSEARCH_PASSWORD = getenv("ELASTICSEARCH_PASSWORD", "changeme")
|
|
|
|
|
|
|
|
ELASTICSEARCH_HOST = getenv("ELASTICSEARCH_HOST", "localhost")
|
|
|
|
|
|
|
|
ELASTICSEARCH_PORT = int(getenv("ELASTICSEARCH_PORT", "9200"))
|
|
|
|
|
|
|
|
ELASTICSEARCH_TLS = getenv("ELASTICSEARCH_TLS", "false") in trues
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
client = None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# These are sometimes numeric, sometimes strings.
|
|
|
|
|
|
|
|
# If they are seen to be numeric first, ES will erroneously
|
|
|
|
|
|
|
|
# index them as "long" and then subsequently fail to index messages
|
|
|
|
|
|
|
|
# with strings in the field.
|
|
|
|
|
|
|
|
keyword_fields = ["nick_id", "user_id", "net_id"]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
mapping = {
|
|
|
|
|
|
|
|
"mappings": {
|
|
|
|
|
|
|
|
"properties": {
|
|
|
|
|
|
|
|
"ts": {"type": "date", "format": "epoch_second"},
|
|
|
|
|
|
|
|
"file_tim": {"type": "date", "format": "epoch_millis"},
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
for field in keyword_fields:
|
|
|
|
|
|
|
|
mapping["mappings"]["properties"][field] = {"type": "text"}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
async def initialise_elasticsearch():
|
|
|
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
Initialise the Elasticsearch client.
|
|
|
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
auth = (ELASTICSEARCH_USERNAME, ELASTICSEARCH_PASSWORD)
|
|
|
|
|
|
|
|
client = AsyncElasticsearch(ELASTICSEARCH_HOST, http_auth=auth, verify_certs=False)
|
|
|
|
|
|
|
|
for index in ("main", "restricted"):
|
|
|
|
|
|
|
|
if await client.indices.exists(index=index):
|
|
|
|
|
|
|
|
# update index with mapping
|
|
|
|
|
|
|
|
await client.indices.put_mapping(
|
|
|
|
|
|
|
|
index=index, properties=mapping["mappings"]["properties"]
|
|
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
else:
|
|
|
|
|
|
|
|
await client.indices.create(index=index, mappings=mapping["mappings"])
|
|
|
|
|
|
|
|
return client
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
async def store_kafka_batch(data):
|
|
|
|
async def store_batch(data):
|
|
|
|
if not MONOLITH_KAFKA_ENABLED:
|
|
|
|
global client
|
|
|
|
log.info(f"Not storing Kafka batch of length {len(data)}, Kafka is disabled.")
|
|
|
|
if not client:
|
|
|
|
return
|
|
|
|
client = await initialise_elasticsearch()
|
|
|
|
# log.debug(f"Storing Kafka batch of {len(data)} messages")
|
|
|
|
indexmap = {}
|
|
|
|
producer = AIOKafkaProducer(bootstrap_servers="kafka:9092")
|
|
|
|
|
|
|
|
await producer.start()
|
|
|
|
|
|
|
|
topicmap = {}
|
|
|
|
|
|
|
|
for msg in data:
|
|
|
|
for msg in data:
|
|
|
|
if msg["type"] in TYPES_MAIN:
|
|
|
|
if msg["type"] in TYPES_MAIN:
|
|
|
|
# index = "main"
|
|
|
|
# index = "main"
|
|
|
@ -68,7 +104,7 @@ async def store_kafka_batch(data):
|
|
|
|
index = "internal"
|
|
|
|
index = "internal"
|
|
|
|
# schema = mc_s.schema_int
|
|
|
|
# schema = mc_s.schema_int
|
|
|
|
|
|
|
|
|
|
|
|
KAFKA_TOPIC = index
|
|
|
|
INDEX = index
|
|
|
|
|
|
|
|
|
|
|
|
# if key in schema:
|
|
|
|
# if key in schema:
|
|
|
|
# if isinstance(value, int):
|
|
|
|
# if isinstance(value, int):
|
|
|
@ -76,45 +112,20 @@ async def store_kafka_batch(data):
|
|
|
|
# "text"
|
|
|
|
# "text"
|
|
|
|
# ):
|
|
|
|
# ):
|
|
|
|
# msg[key] = str(value)
|
|
|
|
# msg[key] = str(value)
|
|
|
|
body = orjson.dumps(msg)
|
|
|
|
# body = orjson.dumps(msg)
|
|
|
|
if "ts" not in msg:
|
|
|
|
if "ts" not in msg:
|
|
|
|
raise Exception("No TS in msg")
|
|
|
|
raise Exception("No TS in msg")
|
|
|
|
if KAFKA_TOPIC not in topicmap:
|
|
|
|
if INDEX not in indexmap:
|
|
|
|
topicmap[KAFKA_TOPIC] = [body]
|
|
|
|
indexmap[INDEX] = [msg]
|
|
|
|
else:
|
|
|
|
else:
|
|
|
|
topicmap[KAFKA_TOPIC].append(body)
|
|
|
|
indexmap[INDEX].append(msg)
|
|
|
|
|
|
|
|
|
|
|
|
for topic, messages in topicmap.items():
|
|
|
|
for index, index_messages in indexmap.items():
|
|
|
|
batch = producer.create_batch()
|
|
|
|
for message in index_messages:
|
|
|
|
for body in messages:
|
|
|
|
result = await client.index(index=index, body=message)
|
|
|
|
metadata = batch.append(key=None, value=body, timestamp=msg["ts"])
|
|
|
|
if not result["result"] == "created":
|
|
|
|
if metadata is None:
|
|
|
|
log.error(f"Indexing failed: {result}")
|
|
|
|
partitions = await producer.partitions_for(topic)
|
|
|
|
log.debug(f"Indexed {len(data)} messages in ES")
|
|
|
|
partition = random.choice(tuple(partitions))
|
|
|
|
|
|
|
|
await producer.send_batch(batch, topic, partition=partition)
|
|
|
|
|
|
|
|
# log.debug(
|
|
|
|
|
|
|
|
# (
|
|
|
|
|
|
|
|
# f"{batch.record_count()} messages sent to topic "
|
|
|
|
|
|
|
|
# f"{topic} partition {partition}"
|
|
|
|
|
|
|
|
# )
|
|
|
|
|
|
|
|
# )
|
|
|
|
|
|
|
|
batch = producer.create_batch()
|
|
|
|
|
|
|
|
continue
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
partitions = await producer.partitions_for(topic)
|
|
|
|
|
|
|
|
partition = random.choice(tuple(partitions))
|
|
|
|
|
|
|
|
await producer.send_batch(batch, topic, partition=partition)
|
|
|
|
|
|
|
|
# log.debug(
|
|
|
|
|
|
|
|
# (
|
|
|
|
|
|
|
|
# f"{batch.record_count()} messages sent to topic "
|
|
|
|
|
|
|
|
# f"{topic} partition {partition}"
|
|
|
|
|
|
|
|
# )
|
|
|
|
|
|
|
|
# )
|
|
|
|
|
|
|
|
log.debug(
|
|
|
|
|
|
|
|
"Kafka batches sent: "
|
|
|
|
|
|
|
|
+ ", ".join([tpc + ": " + str(len(topicmap[tpc])) for tpc in topicmap])
|
|
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
await producer.stop()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
async def queue_message(msg):
|
|
|
|
async def queue_message(msg):
|
|
|
|