You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

154 lines
4.1 KiB
Python

from math import ceil
import aioredis
import manticoresearch
import ujson
from manticoresearch.rest import ApiException
from numpy import array_split
from redis import StrictRedis
import util
from schemas import mc_s
configuration = manticoresearch.Configuration(host="http://monolith-db-1:9308")
api_client = manticoresearch.ApiClient(configuration)
api_instance = manticoresearch.IndexApi(api_client)
log = util.get_logger("db")
# Redis (legacy)
r = StrictRedis(unix_socket_path="/var/run/redis/redis.sock", db=0)
# AIORedis
ar = aioredis.from_url("unix:///var/run/redis/redis.sock", db=0)
TYPES_MAIN = [
"msg",
"notice",
"action",
"part",
"join",
"kick",
"quit",
"nick",
"mode",
"topic",
]
TYPES_META = ["who"]
TYPES_INT = ["conn", "highlight", "znc", "query", "self"]
def store_message(msg):
"""
Store a message into Manticore
:param msg: dict
"""
# Duplicated to avoid extra function call
if msg["type"] in TYPES_MAIN:
index = "main"
schema = mc_s.schema_main
elif msg["type"] in TYPES_META:
index = "meta"
schema = mc_s.schema_meta
elif msg["type"] in TYPES_INT:
index = "internal"
schema = mc_s.schema_int
# normalise fields
for key, value in list(msg.items()):
if value is None:
del msg[key]
if key in schema:
if isinstance(value, int):
if schema[key].startswith("string"):
msg[key] = str(value)
body = [{"insert": {"index": index, "doc": msg}}]
body_post = ""
for item in body:
body_post += ujson.dumps(item)
body_post += "\n"
# print(body_post)
try:
# Bulk index operations
api_response = api_instance.bulk(body_post) # , async_req=True
# print(api_response)
except ApiException as e:
print("Exception when calling IndexApi->bulk: %s\n" % e)
print("ATTEMPT", body_post)
def store_message_bulk(data):
"""
Store a message into Manticore
:param msg: dict
"""
if not data:
return
# 10000: maximum inserts we can submit to
# Manticore as of Sept 2022
split_posts = array_split(data, ceil(len(data) / 10000))
for messages in split_posts:
total = []
for msg in messages:
# Duplicated to avoid extra function call (see above)
if msg["type"] in TYPES_MAIN:
index = "main"
schema = mc_s.schema_main
elif msg["type"] in TYPES_META:
index = "meta"
schema = mc_s.schema_meta
elif msg["type"] in TYPES_INT:
index = "internal"
schema = mc_s.schema_int
# normalise fields
for key, value in list(msg.items()):
if value is None:
del msg[key]
if key in schema:
if isinstance(value, int):
if schema[key].startswith("string"):
msg[key] = str(value)
body = {"insert": {"index": index, "doc": msg}}
total.append(body)
body_post = ""
for item in total:
body_post += ujson.dumps(item)
body_post += "\n"
# print(body_post)
try:
# Bulk index operations
api_response = api_instance.bulk(body_post) # , async_req=True
print(api_response)
except ApiException as e:
print("Exception when calling IndexApi->bulk: %s\n" % e)
print("ATTEMPT", body_post)
def update_schema():
pass
def create_index(api_client):
util_instance = manticoresearch.UtilsApi(api_client)
schemas = {
"main": mc_s.schema_main,
"meta": mc_s.schema_meta,
"internal": mc_s.schema_int,
}
for name, schema in schemas.items():
schema_types = ", ".join([f"{k} {v}" for k, v in schema.items()])
create_query = (
f"create table if not exists {name}({schema_types}) engine='columnar'"
)
print("Schema types", create_query)
util_instance.sql(create_query)
create_index(api_client)
update_schema()