2022-09-04 20:40:04 +00:00
|
|
|
from math import ceil
|
|
|
|
|
2022-09-07 06:20:30 +00:00
|
|
|
import aioredis
|
2022-09-04 12:47:32 +00:00
|
|
|
import manticoresearch
|
2022-09-04 20:40:04 +00:00
|
|
|
import ujson
|
2022-09-04 12:47:32 +00:00
|
|
|
from manticoresearch.rest import ApiException
|
2022-09-04 20:40:04 +00:00
|
|
|
from numpy import array_split
|
2022-09-02 21:30:45 +00:00
|
|
|
from redis import StrictRedis
|
|
|
|
|
|
|
|
import util
|
2022-09-07 06:20:30 +00:00
|
|
|
from schemas import mc_s
|
|
|
|
|
2022-09-04 12:47:32 +00:00
|
|
|
configuration = manticoresearch.Configuration(host="http://monolith-db-1:9308")
|
|
|
|
api_client = manticoresearch.ApiClient(configuration)
|
|
|
|
api_instance = manticoresearch.IndexApi(api_client)
|
2022-09-02 21:30:45 +00:00
|
|
|
|
|
|
|
log = util.get_logger("db")
|
2022-09-07 06:20:30 +00:00
|
|
|
|
|
|
|
# Redis (legacy)
|
2022-09-04 12:47:32 +00:00
|
|
|
r = StrictRedis(unix_socket_path="/var/run/redis/redis.sock", db=0)
|
2022-09-07 06:20:30 +00:00
|
|
|
|
|
|
|
# AIORedis
|
|
|
|
ar = aioredis.from_url("unix:///var/run/redis/redis.sock", db=0)
|
|
|
|
|
|
|
|
TYPES_MAIN = [
|
|
|
|
"msg",
|
|
|
|
"notice",
|
|
|
|
"action",
|
|
|
|
"part",
|
|
|
|
"join",
|
|
|
|
"kick",
|
|
|
|
"quit",
|
|
|
|
"nick",
|
|
|
|
"mode",
|
|
|
|
"topic",
|
2022-09-07 06:20:30 +00:00
|
|
|
"update",
|
2022-09-07 06:20:30 +00:00
|
|
|
]
|
|
|
|
TYPES_META = ["who"]
|
|
|
|
TYPES_INT = ["conn", "highlight", "znc", "query", "self"]
|
|
|
|
|
2022-09-04 20:40:04 +00:00
|
|
|
|
2022-09-02 21:30:45 +00:00
|
|
|
def store_message(msg):
|
|
|
|
"""
|
|
|
|
Store a message into Manticore
|
|
|
|
:param msg: dict
|
|
|
|
"""
|
2022-09-07 06:20:30 +00:00
|
|
|
# Duplicated to avoid extra function call
|
|
|
|
if msg["type"] in TYPES_MAIN:
|
|
|
|
index = "main"
|
|
|
|
schema = mc_s.schema_main
|
|
|
|
elif msg["type"] in TYPES_META:
|
|
|
|
index = "meta"
|
|
|
|
schema = mc_s.schema_meta
|
|
|
|
elif msg["type"] in TYPES_INT:
|
|
|
|
index = "internal"
|
|
|
|
schema = mc_s.schema_int
|
2022-09-04 12:47:32 +00:00
|
|
|
# normalise fields
|
|
|
|
for key, value in list(msg.items()):
|
|
|
|
if value is None:
|
|
|
|
del msg[key]
|
|
|
|
if key in schema:
|
|
|
|
if isinstance(value, int):
|
|
|
|
if schema[key].startswith("string"):
|
|
|
|
msg[key] = str(value)
|
2022-09-02 21:30:45 +00:00
|
|
|
|
2022-09-07 06:20:30 +00:00
|
|
|
body = [{"insert": {"index": index, "doc": msg}}]
|
2022-09-04 12:47:32 +00:00
|
|
|
body_post = ""
|
|
|
|
for item in body:
|
2022-09-04 20:29:00 +00:00
|
|
|
body_post += ujson.dumps(item)
|
2022-09-04 12:47:32 +00:00
|
|
|
body_post += "\n"
|
|
|
|
|
2022-09-04 20:40:04 +00:00
|
|
|
# print(body_post)
|
2022-09-04 12:47:32 +00:00
|
|
|
try:
|
|
|
|
# Bulk index operations
|
2022-09-05 06:20:30 +00:00
|
|
|
api_response = api_instance.bulk(body_post) # , async_req=True
|
2022-09-07 06:20:30 +00:00
|
|
|
# print(api_response)
|
2022-09-04 18:44:25 +00:00
|
|
|
except ApiException as e:
|
|
|
|
print("Exception when calling IndexApi->bulk: %s\n" % e)
|
2022-09-07 06:20:30 +00:00
|
|
|
print("ATTEMPT", body_post)
|
2022-09-04 18:44:25 +00:00
|
|
|
|
2022-09-04 20:40:04 +00:00
|
|
|
|
2022-09-04 20:29:00 +00:00
|
|
|
def store_message_bulk(data):
|
2022-09-04 18:44:25 +00:00
|
|
|
"""
|
|
|
|
Store a message into Manticore
|
|
|
|
:param msg: dict
|
|
|
|
"""
|
2022-09-05 06:20:30 +00:00
|
|
|
if not data:
|
|
|
|
return
|
2022-09-05 06:20:30 +00:00
|
|
|
# 10000: maximum inserts we can submit to
|
|
|
|
# Manticore as of Sept 2022
|
2022-09-04 20:29:00 +00:00
|
|
|
split_posts = array_split(data, ceil(len(data) / 10000))
|
|
|
|
for messages in split_posts:
|
|
|
|
total = []
|
|
|
|
for msg in messages:
|
2022-09-07 06:20:30 +00:00
|
|
|
# Duplicated to avoid extra function call (see above)
|
|
|
|
if msg["type"] in TYPES_MAIN:
|
|
|
|
index = "main"
|
|
|
|
schema = mc_s.schema_main
|
|
|
|
elif msg["type"] in TYPES_META:
|
|
|
|
index = "meta"
|
|
|
|
schema = mc_s.schema_meta
|
|
|
|
elif msg["type"] in TYPES_INT:
|
|
|
|
index = "internal"
|
|
|
|
schema = mc_s.schema_int
|
2022-09-04 20:29:00 +00:00
|
|
|
# normalise fields
|
|
|
|
for key, value in list(msg.items()):
|
|
|
|
if value is None:
|
|
|
|
del msg[key]
|
|
|
|
if key in schema:
|
|
|
|
if isinstance(value, int):
|
|
|
|
if schema[key].startswith("string"):
|
|
|
|
msg[key] = str(value)
|
2022-09-04 18:44:25 +00:00
|
|
|
|
2022-09-07 06:20:30 +00:00
|
|
|
body = {"insert": {"index": index, "doc": msg}}
|
2022-09-04 20:29:00 +00:00
|
|
|
total.append(body)
|
2022-09-04 20:40:04 +00:00
|
|
|
|
2022-09-04 20:29:00 +00:00
|
|
|
body_post = ""
|
|
|
|
for item in total:
|
|
|
|
body_post += ujson.dumps(item)
|
|
|
|
body_post += "\n"
|
2022-09-04 18:44:25 +00:00
|
|
|
|
2022-09-04 20:40:04 +00:00
|
|
|
# print(body_post)
|
2022-09-04 20:29:00 +00:00
|
|
|
try:
|
|
|
|
# Bulk index operations
|
2022-09-05 06:20:30 +00:00
|
|
|
api_response = api_instance.bulk(body_post) # , async_req=True
|
2022-09-07 06:20:30 +00:00
|
|
|
print(api_response)
|
2022-09-04 20:29:00 +00:00
|
|
|
except ApiException as e:
|
|
|
|
print("Exception when calling IndexApi->bulk: %s\n" % e)
|
2022-09-07 06:20:30 +00:00
|
|
|
print("ATTEMPT", body_post)
|
2022-09-04 12:47:32 +00:00
|
|
|
|
2022-09-04 20:40:04 +00:00
|
|
|
|
2022-09-04 12:47:32 +00:00
|
|
|
def update_schema():
|
|
|
|
pass
|
|
|
|
|
2022-09-04 20:40:04 +00:00
|
|
|
|
2022-09-04 12:47:32 +00:00
|
|
|
def create_index(api_client):
|
|
|
|
util_instance = manticoresearch.UtilsApi(api_client)
|
2022-09-07 06:20:30 +00:00
|
|
|
schemas = {
|
|
|
|
"main": mc_s.schema_main,
|
|
|
|
"meta": mc_s.schema_meta,
|
|
|
|
"internal": mc_s.schema_int,
|
|
|
|
}
|
|
|
|
for name, schema in schemas.items():
|
|
|
|
schema_types = ", ".join([f"{k} {v}" for k, v in schema.items()])
|
|
|
|
|
|
|
|
create_query = (
|
|
|
|
f"create table if not exists {name}({schema_types}) engine='columnar'"
|
|
|
|
)
|
|
|
|
print("Schema types", create_query)
|
|
|
|
util_instance.sql(create_query)
|
2022-09-04 12:47:32 +00:00
|
|
|
|
|
|
|
|
|
|
|
create_index(api_client)
|
|
|
|
update_schema()
|