Update to run with Podman
This commit is contained in:
280
db.py
280
db.py
@@ -1,30 +1,43 @@
|
||||
import asyncio
|
||||
from os import getenv
|
||||
from math import ceil
|
||||
|
||||
import aioredis
|
||||
import msgpack
|
||||
import manticoresearch
|
||||
import orjson
|
||||
import redis
|
||||
|
||||
# Elasticsearch
|
||||
from elasticsearch import AsyncElasticsearch
|
||||
from manticoresearch.rest import ApiException
|
||||
from numpy import array_split
|
||||
from redis import StrictRedis
|
||||
import msgpack
|
||||
import asyncio
|
||||
|
||||
import util
|
||||
from schemas import mc_s
|
||||
from os import getenv
|
||||
from time import sleep
|
||||
|
||||
trues = ("true", "1", "t", True)
|
||||
|
||||
# INDEX = "msg"
|
||||
configuration = manticoresearch.Configuration(host="http://monolith_db:9308")
|
||||
api_client = manticoresearch.ApiClient(configuration)
|
||||
api_instance = manticoresearch.IndexApi(api_client)
|
||||
|
||||
log = util.get_logger("db")
|
||||
|
||||
# Redis (legacy)
|
||||
r = redis.from_url("redis://ssdb:1289", db=0)
|
||||
|
||||
# r = StrictRedis(unix_socket_path="/var/run/redis/redis.sock", db=0)
|
||||
r = StrictRedis(
|
||||
host="ssdb_monolith", # Replace with your Redis server's IP address
|
||||
port=1289, # Replace with your Redis server's port
|
||||
db=0 # Database number
|
||||
)
|
||||
# AIORedis
|
||||
ar = aioredis.from_url("redis://ssdb:1289", db=0)
|
||||
# ar = aioredis.from_url("unix:///var/run/redis/redis.sock", db=0)
|
||||
ar = aioredis.from_url(
|
||||
"redis://ssdb_monolith:1289",
|
||||
db=0
|
||||
)
|
||||
pr = aioredis.from_url("redis://redis_neptune:6379", db=10, password=getenv("REDIS_PASSWORD"))
|
||||
|
||||
KEYNAME = "queue"
|
||||
MESSAGE_KEY = "messages"
|
||||
|
||||
# Neptune redis for PubSub
|
||||
pr = aioredis.from_url("redis://redis_neptune:6379", db=10)
|
||||
|
||||
TYPES_MAIN = [
|
||||
"msg",
|
||||
@@ -39,120 +52,146 @@ TYPES_MAIN = [
|
||||
"topic",
|
||||
"update",
|
||||
]
|
||||
MAIN_SRC_MAP = {
|
||||
"dis": "main",
|
||||
"irc": "restricted",
|
||||
"4ch": "main",
|
||||
}
|
||||
|
||||
TYPES_META = ["who"]
|
||||
TYPES_INT = ["conn", "highlight", "znc", "query", "self"]
|
||||
KEYNAME = "queue"
|
||||
MESSAGE_KEY = "messages"
|
||||
|
||||
ELASTICSEARCH_USERNAME = getenv("ELASTICSEARCH_USERNAME", "elastic")
|
||||
ELASTICSEARCH_PASSWORD = getenv("ELASTICSEARCH_PASSWORD", "changeme")
|
||||
ELASTICSEARCH_HOST = getenv("ELASTICSEARCH_HOST", "localhost")
|
||||
ELASTICSEARCH_TLS = getenv("ELASTICSEARCH_TLS", "false") in trues
|
||||
|
||||
client = None
|
||||
|
||||
# These are sometimes numeric, sometimes strings.
|
||||
# If they are seen to be numeric first, ES will erroneously
|
||||
# index them as "long" and then subsequently fail to index messages
|
||||
# with strings in the field.
|
||||
keyword_fields = ["nick_id", "user_id", "net_id"]
|
||||
|
||||
mapping_int = {
|
||||
"mappings": {
|
||||
"properties": {
|
||||
"ts": {"type": "date", "format": "epoch_second"},
|
||||
"file_tim": {"type": "date", "format": "epoch_millis"},
|
||||
}
|
||||
}
|
||||
}
|
||||
mapping = dict(mapping_int)
|
||||
for field in keyword_fields:
|
||||
mapping["mappings"]["properties"][field] = {"type": "text"}
|
||||
|
||||
|
||||
del mapping_int["mappings"]["properties"]["file_tim"]
|
||||
# def store_message(msg):
|
||||
# """
|
||||
# Store a message into Manticore
|
||||
# :param msg: dict
|
||||
# """
|
||||
# # Duplicated to avoid extra function call
|
||||
# if msg["type"] in TYPES_MAIN:
|
||||
# index = "main"
|
||||
# schema = mc_s.schema_main
|
||||
# elif msg["type"] in TYPES_META:
|
||||
# index = "meta"
|
||||
# schema = mc_s.schema_meta
|
||||
# elif msg["type"] in TYPES_INT:
|
||||
# index = "internal"
|
||||
# schema = mc_s.schema_int
|
||||
# # normalise fields
|
||||
# for key, value in list(msg.items()):
|
||||
# if value is None:
|
||||
# del msg[key]
|
||||
# if key in schema:
|
||||
# if isinstance(value, int):
|
||||
# if schema[key].startswith("string") or schema[key].startswith("text"):
|
||||
# msg[key] = str(value)
|
||||
|
||||
# body = [{"insert": {"index": index, "doc": msg}}]
|
||||
# body_post = ""
|
||||
# for item in body:
|
||||
# body_post += orjson.dumps(item)
|
||||
# body_post += "\n"
|
||||
|
||||
async def initialise_elasticsearch():
|
||||
"""
|
||||
Initialise the Elasticsearch client.
|
||||
"""
|
||||
auth = (ELASTICSEARCH_USERNAME, ELASTICSEARCH_PASSWORD)
|
||||
client = AsyncElasticsearch(ELASTICSEARCH_HOST, http_auth=auth, verify_certs=False)
|
||||
for index in ("main", "meta", "restricted", "internal"):
|
||||
if index == "internal":
|
||||
map_dict = mapping_int
|
||||
else:
|
||||
map_dict = mapping
|
||||
if await client.indices.exists(index=index):
|
||||
# update index with mapping
|
||||
await client.indices.put_mapping(
|
||||
index=index, properties=map_dict["mappings"]["properties"]
|
||||
)
|
||||
else:
|
||||
await client.indices.create(index=index, mappings=map_dict["mappings"])
|
||||
return client
|
||||
# # print(body_post)
|
||||
# try:
|
||||
# # Bulk index operations
|
||||
# api_response = api_instance.bulk(body_post) # , async_req=True
|
||||
# # print(api_response)
|
||||
# except ApiException as e:
|
||||
# print("Exception when calling IndexApi->bulk: %s\n" % e)
|
||||
# print("ATTEMPT", body_post)
|
||||
|
||||
|
||||
async def store_batch(data):
|
||||
global client
|
||||
if not client:
|
||||
client = await initialise_elasticsearch()
|
||||
indexmap = {}
|
||||
for msg in data:
|
||||
if msg["type"] in TYPES_MAIN:
|
||||
# index = "main"
|
||||
index = MAIN_SRC_MAP[msg["src"]]
|
||||
# schema = mc_s.schema_main
|
||||
elif msg["type"] in TYPES_META:
|
||||
index = "meta"
|
||||
# schema = mc_s.schema_meta
|
||||
elif msg["type"] in TYPES_INT:
|
||||
index = "internal"
|
||||
# schema = mc_s.schema_int
|
||||
"""
|
||||
Store a message into Manticore
|
||||
:param msg: dict
|
||||
"""
|
||||
if not data:
|
||||
return
|
||||
# 10000: maximum inserts we can submit to
|
||||
# Manticore as of Sept 2022
|
||||
split_posts = array_split(data, ceil(len(data) / 10000))
|
||||
for messages in split_posts:
|
||||
total = []
|
||||
indexmap = {}
|
||||
for msg in messages:
|
||||
if msg["type"] in TYPES_MAIN:
|
||||
index = "main"
|
||||
schema = mc_s.schema_main
|
||||
elif msg["type"] in TYPES_META:
|
||||
index = "meta"
|
||||
schema = mc_s.schema_meta
|
||||
elif msg["type"] in TYPES_INT:
|
||||
index = "internal"
|
||||
schema = mc_s.schema_int
|
||||
# normalise fields
|
||||
for key, value in list(msg.items()):
|
||||
if value is None:
|
||||
del msg[key]
|
||||
if key in schema:
|
||||
if isinstance(value, int):
|
||||
if schema[key].startswith("string") or schema[key].startswith(
|
||||
"text"
|
||||
):
|
||||
msg[key] = str(value)
|
||||
|
||||
INDEX = index
|
||||
body = {"insert": {"index": index, "doc": msg}}
|
||||
total.append(body)
|
||||
if "ts" not in msg:
|
||||
raise Exception("No TS in msg")
|
||||
if index not in indexmap:
|
||||
indexmap[index] = [msg]
|
||||
else:
|
||||
indexmap[index].append(msg)
|
||||
# END MSG IN MESSAGES
|
||||
|
||||
# Pack the indexmap with msgpack and publish it to Neptune
|
||||
packed_index = msgpack.packb(indexmap, use_bin_type=True)
|
||||
completed_publish = False
|
||||
for i in range(10):
|
||||
if completed_publish:
|
||||
break
|
||||
try:
|
||||
await pr.publish(MESSAGE_KEY, packed_index)
|
||||
completed_publish = True
|
||||
except aioredis.exceptions.ConnectionError as e:
|
||||
raise e
|
||||
await asyncio.sleep(0.1)
|
||||
if not completed_publish:
|
||||
log.error("Failed to publish to Neptune")
|
||||
|
||||
body_post = ""
|
||||
for item in total:
|
||||
print("ITEM", item)
|
||||
body_post += orjson.dumps(item).decode("utf-8")
|
||||
body_post += "\n"
|
||||
|
||||
# print(body_post)
|
||||
|
||||
# if key in schema:
|
||||
# if isinstance(value, int):
|
||||
# if schema[key].startswith("string") or schema[key].startswith(
|
||||
# "text"
|
||||
# ):
|
||||
# msg[key] = str(value)
|
||||
# body = orjson.dumps(msg)
|
||||
if "ts" not in msg:
|
||||
raise Exception("No TS in msg")
|
||||
if INDEX not in indexmap:
|
||||
indexmap[INDEX] = [msg]
|
||||
else:
|
||||
indexmap[INDEX].append(msg)
|
||||
|
||||
# Pack the indexmap with msgpack and publish it to Neptune
|
||||
packed_index = msgpack.packb(indexmap, use_bin_type=True)
|
||||
completed_publish = False
|
||||
for i in range(10):
|
||||
if completed_publish:
|
||||
break
|
||||
try:
|
||||
await pr.publish(MESSAGE_KEY, packed_index)
|
||||
completed_publish = True
|
||||
except aioredis.exceptions.ConnectionError:
|
||||
await asyncio.sleep(0.1)
|
||||
if not completed_publish:
|
||||
log.error("Failed to publish to Neptune")
|
||||
# Bulk index operations
|
||||
api_response = api_instance.bulk(body_post) # , async_req=True
|
||||
except ApiException as e:
|
||||
print("Exception when calling IndexApi->bulk: %s\n" % e)
|
||||
print(f"Completed ingest to MC of length {len(total)}")
|
||||
# END MESSAGES IN SPLIT
|
||||
|
||||
for index, index_messages in indexmap.items():
|
||||
for message in index_messages:
|
||||
result = await client.index(index=index, body=message)
|
||||
if not result["result"] == "created":
|
||||
log.error(f"Indexing failed: {result}")
|
||||
log.debug(f"Indexed {len(data)} messages in ES")
|
||||
|
||||
def update_schema():
|
||||
pass
|
||||
|
||||
|
||||
def create_index(api_client):
|
||||
util_instance = manticoresearch.UtilsApi(api_client)
|
||||
schemas = {
|
||||
"main": mc_s.schema_main,
|
||||
"meta": mc_s.schema_meta,
|
||||
"internal": mc_s.schema_int,
|
||||
}
|
||||
for name, schema in schemas.items():
|
||||
schema_types = ", ".join([f"{k} {v}" for k, v in schema.items()])
|
||||
|
||||
create_query = (
|
||||
f"create table if not exists {name}({schema_types}) engine='columnar'"
|
||||
)
|
||||
print("Schema types", create_query)
|
||||
util_instance.sql(create_query)
|
||||
|
||||
|
||||
async def queue_message(msg):
|
||||
@@ -172,3 +211,14 @@ async def queue_message_bulk(data):
|
||||
# TODO: msgpack
|
||||
message = orjson.dumps(msg)
|
||||
await ar.lpush(KEYNAME, message)
|
||||
|
||||
|
||||
created = False
|
||||
while not created:
|
||||
try:
|
||||
create_index(api_client)
|
||||
created = True
|
||||
except Exception as e:
|
||||
print(f"Error creating index: {e}")
|
||||
sleep(1) # Block the thread, just wait for the DB
|
||||
update_schema()
|
||||
|
||||
Reference in New Issue
Block a user