from math import ceil import aioredis import manticoresearch import ujson from manticoresearch.rest import ApiException from numpy import array_split from redis import StrictRedis import util import random from aiokafka import AIOKafkaProducer # Manticore schema from schemas import mc_s # Manticore configuration = manticoresearch.Configuration(host="http://monolith-db-1:9308") api_client = manticoresearch.ApiClient(configuration) api_instance = manticoresearch.IndexApi(api_client) # Kafka from aiokafka import AIOKafkaProducer KAFKA_TOPIC = "msg" log = util.get_logger("db") # Redis (legacy) r = StrictRedis(unix_socket_path="/var/run/redis/redis.sock", db=0) # AIORedis ar = aioredis.from_url("unix:///var/run/redis/redis.sock", db=0) TYPES_MAIN = [ "msg", "notice", "action", "part", "join", "kick", "quit", "nick", "mode", "topic", "update", ] TYPES_META = ["who"] TYPES_INT = ["conn", "highlight", "znc", "query", "self"] KEYPREFIX = "queue." async def store_kafka_batch(data): print("STORING KAFKA BATCH") producer = AIOKafkaProducer(bootstrap_servers='kafka:9092') await producer.start() batch = producer.create_batch() for msg in data: if msg["type"] in TYPES_MAIN: index = "main" schema = mc_s.schema_main elif msg["type"] in TYPES_META: index = "meta" schema = mc_s.schema_meta elif msg["type"] in TYPES_INT: index = "internal" schema = mc_s.schema_int # normalise fields for key, value in list(msg.items()): if value is None: del msg[key] if key in schema: if isinstance(value, int): if schema[key].startswith("string") or schema[key].startswith("text"): msg[key] = str(value) message = ujson.dumps(msg) body = str.encode(message) metadata = batch.append(key=None, value=body, timestamp=msg["ts"]) if metadata is None: partitions = await producer.partitions_for(KAFKA_TOPIC) partition = random.choice(tuple(partitions)) await producer.send_batch(batch, KAFKA_TOPIC, partition=partition) print("%d messages sent to partition %d" % (batch.record_count(), partition)) batch = producer.create_batch() continue partitions = await producer.partitions_for(KAFKA_TOPIC) partition = random.choice(tuple(partitions)) await producer.send_batch(batch, KAFKA_TOPIC, partition=partition) print("%d messages sent to partition %d" % (batch.record_count(), partition)) await producer.stop() # def store_message(msg): # """ # Store a message into Manticore # :param msg: dict # """ # store_kafka(msg) # # Duplicated to avoid extra function call # if msg["type"] in TYPES_MAIN: # index = "main" # schema = mc_s.schema_main # elif msg["type"] in TYPES_META: # index = "meta" # schema = mc_s.schema_meta # elif msg["type"] in TYPES_INT: # index = "internal" # schema = mc_s.schema_int # # normalise fields # for key, value in list(msg.items()): # if value is None: # del msg[key] # if key in schema: # if isinstance(value, int): # if schema[key].startswith("string") or schema[key].startswith("text"): # msg[key] = str(value) # body = [{"insert": {"index": index, "doc": msg}}] # body_post = "" # for item in body: # body_post += ujson.dumps(item) # body_post += "\n" # # print(body_post) # try: # # Bulk index operations # print("FAKE POST") # #api_response = api_instance.bulk(body_post) # , async_req=True # # print(api_response) # except ApiException as e: # print("Exception when calling IndexApi->bulk: %s\n" % e) # print("ATTEMPT", body_post) async def queue_message(msg): """ Queue a message on the Redis buffer. """ src = msg["src"] message = ujson.dumps(msg) key = "{KEYPREFIX}{src}" await ar.sadd(key, message) async def queue_message_bulk(data): """ Queue multiple messages on the Redis buffer. """ for msg in data: src = msg["src"] message = ujson.dumps(msg) key = "{KEYPREFIX}{src}" await ar.sadd(key, message) # For now, make a normal function until we go full async def queue_message_bulk_sync(data): """ Queue multiple messages on the Redis buffer. """ for msg in data: src = msg["src"] message = ujson.dumps(msg) key = "{KEYPREFIX}{src}" r.sadd(key, message) # def store_message_bulk(data): # """ # Store a message into Manticore # :param msg: dict # """ # if not data: # return # for msg in data: # store_kafka(msg) # # 10000: maximum inserts we can submit to # # Manticore as of Sept 2022 # split_posts = array_split(data, ceil(len(data) / 10000)) # for messages in split_posts: # total = [] # for msg in messages: # # Duplicated to avoid extra function call (see above) # if msg["type"] in TYPES_MAIN: # index = "main" # schema = mc_s.schema_main # elif msg["type"] in TYPES_META: # index = "meta" # schema = mc_s.schema_meta # elif msg["type"] in TYPES_INT: # index = "internal" # schema = mc_s.schema_int # # normalise fields # for key, value in list(msg.items()): # if value is None: # del msg[key] # if key in schema: # if isinstance(value, int): # if schema[key].startswith("string") or schema[key].startswith( # "text" # ): # msg[key] = str(value) # body = {"insert": {"index": index, "doc": msg}} # total.append(body) # body_post = "" # for item in total: # body_post += ujson.dumps(item) # body_post += "\n" # # print(body_post) # try: # # Bulk index operations # print("FAKE POST") # #api_response = api_instance.bulk(body_post) # , async_req=True # #print(api_response) # except ApiException as e: # print("Exception when calling IndexApi->bulk: %s\n" % e) # print("ATTEMPT", body_post) # def update_schema(): # pass # def create_index(api_client): # util_instance = manticoresearch.UtilsApi(api_client) # schemas = { # "main": mc_s.schema_main, # "meta": mc_s.schema_meta, # "internal": mc_s.schema_int, # } # for name, schema in schemas.items(): # schema_types = ", ".join([f"{k} {v}" for k, v in schema.items()]) # create_query = ( # f"create table if not exists {name}({schema_types}) engine='columnar'" # ) # print("Schema types", create_query) # util_instance.sql(create_query) #create_index(api_client) #update_schema()