monolith/db.py

from math import ceil

import aioredis
import manticoresearch
import ujson
from manticoresearch.rest import ApiException
from numpy import array_split
from redis import StrictRedis

import util
import random
from aiokafka import AIOKafkaProducer

# Manticore schema
from schemas import mc_s

# Manticore
configuration = manticoresearch.Configuration(host="http://monolith-db-1:9308")
api_client = manticoresearch.ApiClient(configuration)
api_instance = manticoresearch.IndexApi(api_client)

# Kafka
from aiokafka import AIOKafkaProducer
KAFKA_TOPIC = "msg"

log = util.get_logger("db")

# Redis (legacy)
r = StrictRedis(unix_socket_path="/var/run/redis/redis.sock", db=0)

# AIORedis
ar = aioredis.from_url("unix:///var/run/redis/redis.sock", db=0)

TYPES_MAIN = [
    "msg",
    "notice",
    "action",
    "part",
    "join",
    "kick",
    "quit",
    "nick",
    "mode",
    "topic",
    "update",
]
TYPES_META = ["who"]
TYPES_INT = ["conn", "highlight", "znc", "query", "self"]
KEYPREFIX = "queue."


async def store_kafka_batch(data):
    print("STORING KAFKA BATCH")
    producer = AIOKafkaProducer(bootstrap_servers='kafka:9092')
    await producer.start()
    batch = producer.create_batch()
    for msg in data:
        if msg["type"] in TYPES_MAIN:
            index = "main"
            schema = mc_s.schema_main
        elif msg["type"] in TYPES_META:
            index = "meta"
            schema = mc_s.schema_meta
        elif msg["type"] in TYPES_INT:
            index = "internal"
            schema = mc_s.schema_int
        # normalise fields
        for key, value in list(msg.items()):
            if value is None:
                del msg[key]
            if key in schema:
                if isinstance(value, int):
                    if schema[key].startswith("string") or schema[key].startswith("text"):
                        msg[key] = str(value)
        message = ujson.dumps(msg)
        body = str.encode(message)
        metadata = batch.append(key=None, value=body, timestamp=msg["ts"])
        if metadata is None:
            partitions = await producer.partitions_for(KAFKA_TOPIC)
            partition = random.choice(tuple(partitions))
            await producer.send_batch(batch, KAFKA_TOPIC, partition=partition)
            print("%d messages sent to partition %d"
                  % (batch.record_count(), partition))
            batch = producer.create_batch()
            continue

    partitions = await producer.partitions_for(KAFKA_TOPIC)
    partition = random.choice(tuple(partitions))
    await producer.send_batch(batch, KAFKA_TOPIC, partition=partition)
    print("%d messages sent to partition %d"
          % (batch.record_count(), partition))
    await producer.stop()

# def store_message(msg):
#     """
#     Store a message into Manticore
#     :param msg: dict
#     """
#     store_kafka(msg)
    # # Duplicated to avoid extra function call
    # if msg["type"] in TYPES_MAIN:
    #     index = "main"
    #     schema = mc_s.schema_main
    # elif msg["type"] in TYPES_META:
    #     index = "meta"
    #     schema = mc_s.schema_meta
    # elif msg["type"] in TYPES_INT:
    #     index = "internal"
    #     schema = mc_s.schema_int
    # # normalise fields
    # for key, value in list(msg.items()):
    #     if value is None:
    #         del msg[key]
    #     if key in schema:
    #         if isinstance(value, int):
    #             if schema[key].startswith("string") or schema[key].startswith("text"):
    #                 msg[key] = str(value)

    # body = [{"insert": {"index": index, "doc": msg}}]
    # body_post = ""
    # for item in body:
    #     body_post += ujson.dumps(item)
    #     body_post += "\n"

    # # print(body_post)
    # try:
    #     # Bulk index operations
    #     print("FAKE POST")
    #     #api_response = api_instance.bulk(body_post)  # , async_req=True
    #     # print(api_response)
    # except ApiException as e:
    #     print("Exception when calling IndexApi->bulk: %s\n" % e)
    #     print("ATTEMPT", body_post)

async def queue_message(msg):
    """
    Queue a message on the Redis buffer.
    """
    src = msg["src"]
    message = ujson.dumps(msg)

    key = "{KEYPREFIX}{src}"
    await ar.sadd(key, message)

async def queue_message_bulk(data):
    """
    Queue multiple messages on the Redis buffer.
    """
    for msg in data:
        src = msg["src"]
        message = ujson.dumps(msg)

        key = "{KEYPREFIX}{src}"
        await ar.sadd(key, message)


# For now, make a normal function until we go full async
def queue_message_bulk_sync(data):
    """
    Queue multiple messages on the Redis buffer.
    """
    for msg in data:
        src = msg["src"]
        message = ujson.dumps(msg)

        key = "{KEYPREFIX}{src}"
        r.sadd(key, message)


# def store_message_bulk(data):
#     """
#     Store a message into Manticore
#     :param msg: dict
#     """
#     if not data:
#         return
#     for msg in data:
#         store_kafka(msg)
    # # 10000: maximum inserts we can submit to
    # # Manticore as of Sept 2022
    # split_posts = array_split(data, ceil(len(data) / 10000))
    # for messages in split_posts:
    #     total = []
    #     for msg in messages:
    #         # Duplicated to avoid extra function call (see above)
    #         if msg["type"] in TYPES_MAIN:
    #             index = "main"
    #             schema = mc_s.schema_main
    #         elif msg["type"] in TYPES_META:
    #             index = "meta"
    #             schema = mc_s.schema_meta
    #         elif msg["type"] in TYPES_INT:
    #             index = "internal"
    #             schema = mc_s.schema_int
    #         # normalise fields
    #         for key, value in list(msg.items()):
    #             if value is None:
    #                 del msg[key]
    #             if key in schema:
    #                 if isinstance(value, int):
    #                     if schema[key].startswith("string") or schema[key].startswith(
    #                         "text"
    #                     ):
    #                         msg[key] = str(value)

    #         body = {"insert": {"index": index, "doc": msg}}
    #         total.append(body)

    #     body_post = ""
    #     for item in total:
    #         body_post += ujson.dumps(item)
    #         body_post += "\n"

    #     # print(body_post)
    #     try:
    #         # Bulk index operations
    #         print("FAKE POST")
    #         #api_response = api_instance.bulk(body_post)  # , async_req=True
    #         #print(api_response)
    #     except ApiException as e:
    #         print("Exception when calling IndexApi->bulk: %s\n" % e)
    #         print("ATTEMPT", body_post)


# def update_schema():
#     pass


# def create_index(api_client):
#     util_instance = manticoresearch.UtilsApi(api_client)
#     schemas = {
#         "main": mc_s.schema_main,
#         "meta": mc_s.schema_meta,
#         "internal": mc_s.schema_int,
#     }
#     for name, schema in schemas.items():
#         schema_types = ", ".join([f"{k} {v}" for k, v in schema.items()])

#         create_query = (
#             f"create table if not exists {name}({schema_types}) engine='columnar'"
#         )
#         print("Schema types", create_query)
#         util_instance.sql(create_query)


#create_index(api_client)
#update_schema()