Update to run with Podman
This commit is contained in:
parent
5ea4e5f460
commit
054e9caca0
@ -13,7 +13,7 @@ ENV PYTHONDONTWRITEBYTECODE=1
|
|||||||
ENV PYTHONUNBUFFERED=1
|
ENV PYTHONUNBUFFERED=1
|
||||||
WORKDIR /code
|
WORKDIR /code
|
||||||
COPY requirements.txt /code/
|
COPY requirements.txt /code/
|
||||||
COPY discord-patched.tgz /code/
|
COPY docker/discord-patched.tgz /code/
|
||||||
|
|
||||||
RUN python -m venv /venv
|
RUN python -m venv /venv
|
||||||
RUN . /venv/bin/activate && pip install -r requirements.txt
|
RUN . /venv/bin/activate && pip install -r requirements.txt
|
14
Makefile
14
Makefile
@ -1,20 +1,20 @@
|
|||||||
run:
|
run:
|
||||||
docker-compose -f docker/docker-compose.prod.yml --env-file=stack.env up -d
|
docker-compose -f docker-compose.prod.yml --env-file=stack.env up -d
|
||||||
|
|
||||||
build:
|
build:
|
||||||
docker-compose -f docker/docker-compose.prod.yml --env-file=stack.env build
|
docker-compose -f docker-compose.prod.yml --env-file=stack.env build
|
||||||
|
|
||||||
stop:
|
stop:
|
||||||
docker-compose -f docker/docker-compose.prod.yml --env-file=stack.env down
|
docker-compose -f docker-compose.prod.yml --env-file=stack.env down
|
||||||
|
|
||||||
log:
|
log:
|
||||||
docker-compose -f docker/docker-compose.prod.yml --env-file=stack.env logs -f
|
docker-compose -f docker-compose.prod.yml --env-file=stack.env logs -f
|
||||||
|
|
||||||
run-infra:
|
run-infra:
|
||||||
docker-compose -f docker/docker-compose.infra.yml --env-file=stack.env up -d
|
docker-compose -f docker-compose.infra.yml --env-file=stack.env up -d
|
||||||
|
|
||||||
stop-infra:
|
stop-infra:
|
||||||
docker-compose -f docker/docker-compose.infra.yml --env-file=stack.env down
|
docker-compose -f docker-compose.infra.yml --env-file=stack.env down
|
||||||
|
|
||||||
log-infra:
|
log-infra:
|
||||||
docker-compose -f docker/docker-compose.infra.yml --env-file=stack.env logs -f
|
docker-compose -f docker-compose.infra.yml --env-file=stack.env logs -f
|
242
db.py
242
db.py
@ -1,30 +1,43 @@
|
|||||||
import asyncio
|
from math import ceil
|
||||||
from os import getenv
|
|
||||||
|
|
||||||
import aioredis
|
import aioredis
|
||||||
import msgpack
|
import manticoresearch
|
||||||
import orjson
|
import orjson
|
||||||
import redis
|
from manticoresearch.rest import ApiException
|
||||||
|
from numpy import array_split
|
||||||
# Elasticsearch
|
from redis import StrictRedis
|
||||||
from elasticsearch import AsyncElasticsearch
|
import msgpack
|
||||||
|
import asyncio
|
||||||
|
|
||||||
import util
|
import util
|
||||||
|
from schemas import mc_s
|
||||||
|
from os import getenv
|
||||||
|
from time import sleep
|
||||||
|
|
||||||
trues = ("true", "1", "t", True)
|
configuration = manticoresearch.Configuration(host="http://monolith_db:9308")
|
||||||
|
api_client = manticoresearch.ApiClient(configuration)
|
||||||
# INDEX = "msg"
|
api_instance = manticoresearch.IndexApi(api_client)
|
||||||
|
|
||||||
log = util.get_logger("db")
|
log = util.get_logger("db")
|
||||||
|
|
||||||
# Redis (legacy)
|
# Redis (legacy)
|
||||||
r = redis.from_url("redis://ssdb:1289", db=0)
|
# r = StrictRedis(unix_socket_path="/var/run/redis/redis.sock", db=0)
|
||||||
|
r = StrictRedis(
|
||||||
|
host="ssdb_monolith", # Replace with your Redis server's IP address
|
||||||
|
port=1289, # Replace with your Redis server's port
|
||||||
|
db=0 # Database number
|
||||||
|
)
|
||||||
# AIORedis
|
# AIORedis
|
||||||
ar = aioredis.from_url("redis://ssdb:1289", db=0)
|
# ar = aioredis.from_url("unix:///var/run/redis/redis.sock", db=0)
|
||||||
|
ar = aioredis.from_url(
|
||||||
|
"redis://ssdb_monolith:1289",
|
||||||
|
db=0
|
||||||
|
)
|
||||||
|
pr = aioredis.from_url("redis://redis_neptune:6379", db=10, password=getenv("REDIS_PASSWORD"))
|
||||||
|
|
||||||
|
KEYNAME = "queue"
|
||||||
|
MESSAGE_KEY = "messages"
|
||||||
|
|
||||||
# Neptune redis for PubSub
|
|
||||||
pr = aioredis.from_url("redis://redis_neptune:6379", db=10)
|
|
||||||
|
|
||||||
TYPES_MAIN = [
|
TYPES_MAIN = [
|
||||||
"msg",
|
"msg",
|
||||||
@ -39,99 +52,93 @@ TYPES_MAIN = [
|
|||||||
"topic",
|
"topic",
|
||||||
"update",
|
"update",
|
||||||
]
|
]
|
||||||
MAIN_SRC_MAP = {
|
|
||||||
"dis": "main",
|
|
||||||
"irc": "restricted",
|
|
||||||
"4ch": "main",
|
|
||||||
}
|
|
||||||
|
|
||||||
TYPES_META = ["who"]
|
TYPES_META = ["who"]
|
||||||
TYPES_INT = ["conn", "highlight", "znc", "query", "self"]
|
TYPES_INT = ["conn", "highlight", "znc", "query", "self"]
|
||||||
KEYNAME = "queue"
|
|
||||||
MESSAGE_KEY = "messages"
|
|
||||||
|
|
||||||
ELASTICSEARCH_USERNAME = getenv("ELASTICSEARCH_USERNAME", "elastic")
|
|
||||||
ELASTICSEARCH_PASSWORD = getenv("ELASTICSEARCH_PASSWORD", "changeme")
|
|
||||||
ELASTICSEARCH_HOST = getenv("ELASTICSEARCH_HOST", "localhost")
|
|
||||||
ELASTICSEARCH_TLS = getenv("ELASTICSEARCH_TLS", "false") in trues
|
|
||||||
|
|
||||||
client = None
|
|
||||||
|
|
||||||
# These are sometimes numeric, sometimes strings.
|
|
||||||
# If they are seen to be numeric first, ES will erroneously
|
|
||||||
# index them as "long" and then subsequently fail to index messages
|
|
||||||
# with strings in the field.
|
|
||||||
keyword_fields = ["nick_id", "user_id", "net_id"]
|
|
||||||
|
|
||||||
mapping_int = {
|
|
||||||
"mappings": {
|
|
||||||
"properties": {
|
|
||||||
"ts": {"type": "date", "format": "epoch_second"},
|
|
||||||
"file_tim": {"type": "date", "format": "epoch_millis"},
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
mapping = dict(mapping_int)
|
|
||||||
for field in keyword_fields:
|
|
||||||
mapping["mappings"]["properties"][field] = {"type": "text"}
|
|
||||||
|
|
||||||
|
|
||||||
del mapping_int["mappings"]["properties"]["file_tim"]
|
# def store_message(msg):
|
||||||
|
# """
|
||||||
|
# Store a message into Manticore
|
||||||
|
# :param msg: dict
|
||||||
|
# """
|
||||||
|
# # Duplicated to avoid extra function call
|
||||||
|
# if msg["type"] in TYPES_MAIN:
|
||||||
|
# index = "main"
|
||||||
|
# schema = mc_s.schema_main
|
||||||
|
# elif msg["type"] in TYPES_META:
|
||||||
|
# index = "meta"
|
||||||
|
# schema = mc_s.schema_meta
|
||||||
|
# elif msg["type"] in TYPES_INT:
|
||||||
|
# index = "internal"
|
||||||
|
# schema = mc_s.schema_int
|
||||||
|
# # normalise fields
|
||||||
|
# for key, value in list(msg.items()):
|
||||||
|
# if value is None:
|
||||||
|
# del msg[key]
|
||||||
|
# if key in schema:
|
||||||
|
# if isinstance(value, int):
|
||||||
|
# if schema[key].startswith("string") or schema[key].startswith("text"):
|
||||||
|
# msg[key] = str(value)
|
||||||
|
|
||||||
|
# body = [{"insert": {"index": index, "doc": msg}}]
|
||||||
|
# body_post = ""
|
||||||
|
# for item in body:
|
||||||
|
# body_post += orjson.dumps(item)
|
||||||
|
# body_post += "\n"
|
||||||
|
|
||||||
async def initialise_elasticsearch():
|
# # print(body_post)
|
||||||
"""
|
# try:
|
||||||
Initialise the Elasticsearch client.
|
# # Bulk index operations
|
||||||
"""
|
# api_response = api_instance.bulk(body_post) # , async_req=True
|
||||||
auth = (ELASTICSEARCH_USERNAME, ELASTICSEARCH_PASSWORD)
|
# # print(api_response)
|
||||||
client = AsyncElasticsearch(ELASTICSEARCH_HOST, http_auth=auth, verify_certs=False)
|
# except ApiException as e:
|
||||||
for index in ("main", "meta", "restricted", "internal"):
|
# print("Exception when calling IndexApi->bulk: %s\n" % e)
|
||||||
if index == "internal":
|
# print("ATTEMPT", body_post)
|
||||||
map_dict = mapping_int
|
|
||||||
else:
|
|
||||||
map_dict = mapping
|
|
||||||
if await client.indices.exists(index=index):
|
|
||||||
# update index with mapping
|
|
||||||
await client.indices.put_mapping(
|
|
||||||
index=index, properties=map_dict["mappings"]["properties"]
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
await client.indices.create(index=index, mappings=map_dict["mappings"])
|
|
||||||
return client
|
|
||||||
|
|
||||||
|
|
||||||
async def store_batch(data):
|
async def store_batch(data):
|
||||||
global client
|
"""
|
||||||
if not client:
|
Store a message into Manticore
|
||||||
client = await initialise_elasticsearch()
|
:param msg: dict
|
||||||
|
"""
|
||||||
|
if not data:
|
||||||
|
return
|
||||||
|
# 10000: maximum inserts we can submit to
|
||||||
|
# Manticore as of Sept 2022
|
||||||
|
split_posts = array_split(data, ceil(len(data) / 10000))
|
||||||
|
for messages in split_posts:
|
||||||
|
total = []
|
||||||
indexmap = {}
|
indexmap = {}
|
||||||
for msg in data:
|
for msg in messages:
|
||||||
if msg["type"] in TYPES_MAIN:
|
if msg["type"] in TYPES_MAIN:
|
||||||
# index = "main"
|
index = "main"
|
||||||
index = MAIN_SRC_MAP[msg["src"]]
|
schema = mc_s.schema_main
|
||||||
# schema = mc_s.schema_main
|
|
||||||
elif msg["type"] in TYPES_META:
|
elif msg["type"] in TYPES_META:
|
||||||
index = "meta"
|
index = "meta"
|
||||||
# schema = mc_s.schema_meta
|
schema = mc_s.schema_meta
|
||||||
elif msg["type"] in TYPES_INT:
|
elif msg["type"] in TYPES_INT:
|
||||||
index = "internal"
|
index = "internal"
|
||||||
# schema = mc_s.schema_int
|
schema = mc_s.schema_int
|
||||||
|
# normalise fields
|
||||||
|
for key, value in list(msg.items()):
|
||||||
|
if value is None:
|
||||||
|
del msg[key]
|
||||||
|
if key in schema:
|
||||||
|
if isinstance(value, int):
|
||||||
|
if schema[key].startswith("string") or schema[key].startswith(
|
||||||
|
"text"
|
||||||
|
):
|
||||||
|
msg[key] = str(value)
|
||||||
|
|
||||||
INDEX = index
|
body = {"insert": {"index": index, "doc": msg}}
|
||||||
|
total.append(body)
|
||||||
# if key in schema:
|
|
||||||
# if isinstance(value, int):
|
|
||||||
# if schema[key].startswith("string") or schema[key].startswith(
|
|
||||||
# "text"
|
|
||||||
# ):
|
|
||||||
# msg[key] = str(value)
|
|
||||||
# body = orjson.dumps(msg)
|
|
||||||
if "ts" not in msg:
|
if "ts" not in msg:
|
||||||
raise Exception("No TS in msg")
|
raise Exception("No TS in msg")
|
||||||
if INDEX not in indexmap:
|
if index not in indexmap:
|
||||||
indexmap[INDEX] = [msg]
|
indexmap[index] = [msg]
|
||||||
else:
|
else:
|
||||||
indexmap[INDEX].append(msg)
|
indexmap[index].append(msg)
|
||||||
|
# END MSG IN MESSAGES
|
||||||
|
|
||||||
# Pack the indexmap with msgpack and publish it to Neptune
|
# Pack the indexmap with msgpack and publish it to Neptune
|
||||||
packed_index = msgpack.packb(indexmap, use_bin_type=True)
|
packed_index = msgpack.packb(indexmap, use_bin_type=True)
|
||||||
@ -142,17 +149,49 @@ async def store_batch(data):
|
|||||||
try:
|
try:
|
||||||
await pr.publish(MESSAGE_KEY, packed_index)
|
await pr.publish(MESSAGE_KEY, packed_index)
|
||||||
completed_publish = True
|
completed_publish = True
|
||||||
except aioredis.exceptions.ConnectionError:
|
except aioredis.exceptions.ConnectionError as e:
|
||||||
|
raise e
|
||||||
await asyncio.sleep(0.1)
|
await asyncio.sleep(0.1)
|
||||||
if not completed_publish:
|
if not completed_publish:
|
||||||
log.error("Failed to publish to Neptune")
|
log.error("Failed to publish to Neptune")
|
||||||
|
|
||||||
for index, index_messages in indexmap.items():
|
body_post = ""
|
||||||
for message in index_messages:
|
for item in total:
|
||||||
result = await client.index(index=index, body=message)
|
print("ITEM", item)
|
||||||
if not result["result"] == "created":
|
body_post += orjson.dumps(item).decode("utf-8")
|
||||||
log.error(f"Indexing failed: {result}")
|
body_post += "\n"
|
||||||
log.debug(f"Indexed {len(data)} messages in ES")
|
|
||||||
|
# print(body_post)
|
||||||
|
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Bulk index operations
|
||||||
|
api_response = api_instance.bulk(body_post) # , async_req=True
|
||||||
|
except ApiException as e:
|
||||||
|
print("Exception when calling IndexApi->bulk: %s\n" % e)
|
||||||
|
print(f"Completed ingest to MC of length {len(total)}")
|
||||||
|
# END MESSAGES IN SPLIT
|
||||||
|
|
||||||
|
|
||||||
|
def update_schema():
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
def create_index(api_client):
|
||||||
|
util_instance = manticoresearch.UtilsApi(api_client)
|
||||||
|
schemas = {
|
||||||
|
"main": mc_s.schema_main,
|
||||||
|
"meta": mc_s.schema_meta,
|
||||||
|
"internal": mc_s.schema_int,
|
||||||
|
}
|
||||||
|
for name, schema in schemas.items():
|
||||||
|
schema_types = ", ".join([f"{k} {v}" for k, v in schema.items()])
|
||||||
|
|
||||||
|
create_query = (
|
||||||
|
f"create table if not exists {name}({schema_types}) engine='columnar'"
|
||||||
|
)
|
||||||
|
print("Schema types", create_query)
|
||||||
|
util_instance.sql(create_query)
|
||||||
|
|
||||||
|
|
||||||
async def queue_message(msg):
|
async def queue_message(msg):
|
||||||
@ -172,3 +211,14 @@ async def queue_message_bulk(data):
|
|||||||
# TODO: msgpack
|
# TODO: msgpack
|
||||||
message = orjson.dumps(msg)
|
message = orjson.dumps(msg)
|
||||||
await ar.lpush(KEYNAME, message)
|
await ar.lpush(KEYNAME, message)
|
||||||
|
|
||||||
|
|
||||||
|
created = False
|
||||||
|
while not created:
|
||||||
|
try:
|
||||||
|
create_index(api_client)
|
||||||
|
created = True
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Error creating index: {e}")
|
||||||
|
sleep(1) # Block the thread, just wait for the DB
|
||||||
|
update_schema()
|
||||||
|
174
db_old_ref.py
Normal file
174
db_old_ref.py
Normal file
@ -0,0 +1,174 @@
|
|||||||
|
import asyncio
|
||||||
|
from os import getenv
|
||||||
|
|
||||||
|
import aioredis
|
||||||
|
import msgpack
|
||||||
|
import orjson
|
||||||
|
import redis
|
||||||
|
|
||||||
|
# Elasticsearch
|
||||||
|
from elasticsearch import AsyncElasticsearch
|
||||||
|
|
||||||
|
import util
|
||||||
|
|
||||||
|
trues = ("true", "1", "t", True)
|
||||||
|
|
||||||
|
# INDEX = "msg"
|
||||||
|
|
||||||
|
log = util.get_logger("db")
|
||||||
|
|
||||||
|
# Redis (legacy)
|
||||||
|
# r = redis.from_url("redis://ssdb:1289", db=0)
|
||||||
|
|
||||||
|
# AIORedis
|
||||||
|
ar = aioredis.from_url("redis://ssdb:1289", db=0)
|
||||||
|
|
||||||
|
# Neptune redis for PubSub
|
||||||
|
pr = aioredis.from_url("redis://redis_neptune:6379", db=10)
|
||||||
|
|
||||||
|
TYPES_MAIN = [
|
||||||
|
"msg",
|
||||||
|
"notice",
|
||||||
|
"action",
|
||||||
|
"part",
|
||||||
|
"join",
|
||||||
|
"kick",
|
||||||
|
"quit",
|
||||||
|
"nick",
|
||||||
|
"mode",
|
||||||
|
"topic",
|
||||||
|
"update",
|
||||||
|
]
|
||||||
|
MAIN_SRC_MAP = {
|
||||||
|
"dis": "main",
|
||||||
|
"irc": "restricted",
|
||||||
|
"4ch": "main",
|
||||||
|
}
|
||||||
|
|
||||||
|
TYPES_META = ["who"]
|
||||||
|
TYPES_INT = ["conn", "highlight", "znc", "query", "self"]
|
||||||
|
KEYNAME = "queue"
|
||||||
|
MESSAGE_KEY = "messages"
|
||||||
|
|
||||||
|
ELASTICSEARCH_USERNAME = getenv("ELASTICSEARCH_USERNAME", "elastic")
|
||||||
|
ELASTICSEARCH_PASSWORD = getenv("ELASTICSEARCH_PASSWORD", "changeme")
|
||||||
|
ELASTICSEARCH_HOST = getenv("ELASTICSEARCH_HOST", "localhost")
|
||||||
|
ELASTICSEARCH_TLS = getenv("ELASTICSEARCH_TLS", "false") in trues
|
||||||
|
|
||||||
|
client = None
|
||||||
|
|
||||||
|
# These are sometimes numeric, sometimes strings.
|
||||||
|
# If they are seen to be numeric first, ES will erroneously
|
||||||
|
# index them as "long" and then subsequently fail to index messages
|
||||||
|
# with strings in the field.
|
||||||
|
keyword_fields = ["nick_id", "user_id", "net_id"]
|
||||||
|
|
||||||
|
mapping_int = {
|
||||||
|
"mappings": {
|
||||||
|
"properties": {
|
||||||
|
"ts": {"type": "date", "format": "epoch_second"},
|
||||||
|
"file_tim": {"type": "date", "format": "epoch_millis"},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
mapping = dict(mapping_int)
|
||||||
|
for field in keyword_fields:
|
||||||
|
mapping["mappings"]["properties"][field] = {"type": "text"}
|
||||||
|
|
||||||
|
|
||||||
|
del mapping_int["mappings"]["properties"]["file_tim"]
|
||||||
|
|
||||||
|
|
||||||
|
async def initialise_elasticsearch():
|
||||||
|
"""
|
||||||
|
Initialise the Elasticsearch client.
|
||||||
|
"""
|
||||||
|
auth = (ELASTICSEARCH_USERNAME, ELASTICSEARCH_PASSWORD)
|
||||||
|
client = AsyncElasticsearch(ELASTICSEARCH_HOST, http_auth=auth, verify_certs=False)
|
||||||
|
for index in ("main", "meta", "restricted", "internal"):
|
||||||
|
if index == "internal":
|
||||||
|
map_dict = mapping_int
|
||||||
|
else:
|
||||||
|
map_dict = mapping
|
||||||
|
if await client.indices.exists(index=index):
|
||||||
|
# update index with mapping
|
||||||
|
await client.indices.put_mapping(
|
||||||
|
index=index, properties=map_dict["mappings"]["properties"]
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
await client.indices.create(index=index, mappings=map_dict["mappings"])
|
||||||
|
return client
|
||||||
|
|
||||||
|
|
||||||
|
async def store_batch(data):
|
||||||
|
global client
|
||||||
|
if not client:
|
||||||
|
client = await initialise_elasticsearch()
|
||||||
|
indexmap = {}
|
||||||
|
for msg in data:
|
||||||
|
if msg["type"] in TYPES_MAIN:
|
||||||
|
# index = "main"
|
||||||
|
index = MAIN_SRC_MAP[msg["src"]]
|
||||||
|
# schema = mc_s.schema_main
|
||||||
|
elif msg["type"] in TYPES_META:
|
||||||
|
index = "meta"
|
||||||
|
# schema = mc_s.schema_meta
|
||||||
|
elif msg["type"] in TYPES_INT:
|
||||||
|
index = "internal"
|
||||||
|
# schema = mc_s.schema_int
|
||||||
|
|
||||||
|
INDEX = index
|
||||||
|
|
||||||
|
# if key in schema:
|
||||||
|
# if isinstance(value, int):
|
||||||
|
# if schema[key].startswith("string") or schema[key].startswith(
|
||||||
|
# "text"
|
||||||
|
# ):
|
||||||
|
# msg[key] = str(value)
|
||||||
|
# body = orjson.dumps(msg)
|
||||||
|
if "ts" not in msg:
|
||||||
|
raise Exception("No TS in msg")
|
||||||
|
if INDEX not in indexmap:
|
||||||
|
indexmap[INDEX] = [msg]
|
||||||
|
else:
|
||||||
|
indexmap[INDEX].append(msg)
|
||||||
|
|
||||||
|
# Pack the indexmap with msgpack and publish it to Neptune
|
||||||
|
packed_index = msgpack.packb(indexmap, use_bin_type=True)
|
||||||
|
completed_publish = False
|
||||||
|
for i in range(10):
|
||||||
|
if completed_publish:
|
||||||
|
break
|
||||||
|
try:
|
||||||
|
await pr.publish(MESSAGE_KEY, packed_index)
|
||||||
|
completed_publish = True
|
||||||
|
except aioredis.exceptions.ConnectionError:
|
||||||
|
await asyncio.sleep(0.1)
|
||||||
|
if not completed_publish:
|
||||||
|
log.error("Failed to publish to Neptune")
|
||||||
|
|
||||||
|
for index, index_messages in indexmap.items():
|
||||||
|
for message in index_messages:
|
||||||
|
result = await client.index(index=index, body=message)
|
||||||
|
if not result["result"] == "created":
|
||||||
|
log.error(f"Indexing failed: {result}")
|
||||||
|
log.debug(f"Indexed {len(data)} messages in ES")
|
||||||
|
|
||||||
|
|
||||||
|
async def queue_message(msg):
|
||||||
|
"""
|
||||||
|
Queue a message on the Redis buffer.
|
||||||
|
"""
|
||||||
|
# TODO: msgpack
|
||||||
|
message = orjson.dumps(msg)
|
||||||
|
await ar.lpush(KEYNAME, message)
|
||||||
|
|
||||||
|
|
||||||
|
async def queue_message_bulk(data):
|
||||||
|
"""
|
||||||
|
Queue multiple messages on the Redis buffer.
|
||||||
|
"""
|
||||||
|
for msg in data:
|
||||||
|
# TODO: msgpack
|
||||||
|
message = orjson.dumps(msg)
|
||||||
|
await ar.lpush(KEYNAME, message)
|
195
docker-compose.prod.yml
Normal file
195
docker-compose.prod.yml
Normal file
@ -0,0 +1,195 @@
|
|||||||
|
version: "2.2"
|
||||||
|
|
||||||
|
services:
|
||||||
|
app:
|
||||||
|
image: pathogen/monolith:latest
|
||||||
|
container_name: monolith
|
||||||
|
build: .
|
||||||
|
volumes:
|
||||||
|
- ${PORTAINER_GIT_DIR}:/code
|
||||||
|
# env_file:
|
||||||
|
# - stack.env
|
||||||
|
networks:
|
||||||
|
- default
|
||||||
|
- xf
|
||||||
|
- db
|
||||||
|
environment:
|
||||||
|
PORTAINER_GIT_DIR: "${PORTAINER_GIT_DIR}"
|
||||||
|
MODULES_ENABLED: "${MODULES_ENABLED}"
|
||||||
|
DISCORD_TOKEN: "${DISCORD_TOKEN}"
|
||||||
|
THRESHOLD_LISTENER_HOST: "${THRESHOLD_LISTENER_HOST}"
|
||||||
|
THRESHOLD_LISTENER_PORT: "${THRESHOLD_LISTENER_PORT}"
|
||||||
|
THRESHOLD_LISTENER_SSL: "${THRESHOLD_LISTENER_SSL}"
|
||||||
|
THRESHOLD_RELAY_ENABLED: "${THRESHOLD_RELAY_ENABLED}"
|
||||||
|
THRESHOLD_RELAY_HOST: "${THRESHOLD_RELAY_HOST}"
|
||||||
|
THRESHOLD_RELAY_PORT: "${THRESHOLD_RELAY_PORT}"
|
||||||
|
THRESHOLD_RELAY_SSL: "${THRESHOLD_RELAY_SSL}"
|
||||||
|
THRESHOLD_API_ENABLED: "${THRESHOLD_API_ENABLED}"
|
||||||
|
THRESHOLD_API_HOST: "${THRESHOLD_API_HOST}"
|
||||||
|
THRESHOLD_API_PORT: "${THRESHOLD_API_PORT}"
|
||||||
|
THRESHOLD_CONFIG_DIR: "${THRESHOLD_CONFIG_DIR}"
|
||||||
|
#THRESHOLD_TEMPLATE_DIR: "${#THRESHOLD_TEMPLATE_DIR}"
|
||||||
|
THRESHOLD_CERT_DIR: "${THRESHOLD_CERT_DIR}"
|
||||||
|
# How many messages to ingest at once from Redis
|
||||||
|
MONOLITH_INGEST_CHUNK_SIZE: "${MONOLITH_INGEST_CHUNK_SIZE}"
|
||||||
|
# Time to wait between polling Redis again
|
||||||
|
MONOLITH_INGEST_ITER_DELAY: "${MONOLITH_INGEST_ITER_DELAY}"
|
||||||
|
# Number of 4chan threads to request at once
|
||||||
|
MONOLITH_CH4_THREADS_CONCURRENT: "${MONOLITH_CH4_THREADS_CONCURRENT}"
|
||||||
|
# Time to wait between every MONOLITH_CH4_THREADS_CONCURRENT threads
|
||||||
|
MONOLITH_CH4_THREADS_DELAY: "${MONOLITH_CH4_THREADS_DELAY}"
|
||||||
|
# Time to wait after finishing a crawl before starting again
|
||||||
|
MONOLITH_CH4_CRAWL_DELAY: "${MONOLITH_CH4_CRAWL_DELAY}"
|
||||||
|
# Semaphore value
|
||||||
|
MONOLITH_CH4_THREADS_SEMAPHORE: "${MONOLITH_CH4_THREADS_SEMAPHORE}"
|
||||||
|
# Threads to use for data processing
|
||||||
|
# Leave uncommented to use all available threads
|
||||||
|
MONOLITH_PROCESS_THREADS: "${MONOLITH_PROCESS_THREADS}"
|
||||||
|
# Enable performance metrics after message processing
|
||||||
|
MONOLITH_PROCESS_PERFSTATS: "${MONOLITH_PROCESS_PERFSTATS}"
|
||||||
|
MONOLITH_CH4_BOARDS: "${MONOLITH_CH4_BOARDS}"
|
||||||
|
REDIS_PASSWORD: "${REDIS_PASSWORD}"
|
||||||
|
|
||||||
|
db:
|
||||||
|
#image: pathogen/manticore:kibana
|
||||||
|
image: manticoresearch/manticore:dev
|
||||||
|
container_name: monolith_db
|
||||||
|
#build:
|
||||||
|
# context: ./docker/manticore
|
||||||
|
# args:
|
||||||
|
# DEV: 1
|
||||||
|
restart: always
|
||||||
|
ports:
|
||||||
|
- 9308
|
||||||
|
- 9312
|
||||||
|
- 9306
|
||||||
|
ulimits:
|
||||||
|
nproc: 65535
|
||||||
|
nofile:
|
||||||
|
soft: 65535
|
||||||
|
hard: 65535
|
||||||
|
memlock:
|
||||||
|
soft: -1
|
||||||
|
hard: -1
|
||||||
|
environment:
|
||||||
|
# - MCL=1
|
||||||
|
- EXTRA=1
|
||||||
|
networks:
|
||||||
|
- default
|
||||||
|
- xf
|
||||||
|
- db
|
||||||
|
volumes:
|
||||||
|
- ./docker/data:/var/lib/manticore
|
||||||
|
# - ./docker/manticore.conf:/etc/manticoresearch/manticore.conf
|
||||||
|
|
||||||
|
# threshold:
|
||||||
|
# image: pathogen/threshold:latest
|
||||||
|
# container_name: threshold
|
||||||
|
# build: legacy/docker
|
||||||
|
# volumes:
|
||||||
|
# - ${PORTAINER_GIT_DIR}:/code
|
||||||
|
# - ${THRESHOLD_CONFIG_DIR}:/code/legacy/conf/live
|
||||||
|
# #- ${THRESHOLD_TEMPLATE_DIR}:/code/conf/templates
|
||||||
|
# - ${THRESHOLD_CERT_DIR}:/code/legacy/conf/cert
|
||||||
|
# volumes_from:
|
||||||
|
# - tmp
|
||||||
|
# ports:
|
||||||
|
# - "${THRESHOLD_LISTENER_PORT}:${THRESHOLD_LISTENER_PORT}"
|
||||||
|
# - "${THRESHOLD_RELAY_PORT}:${THRESHOLD_RELAY_PORT}"
|
||||||
|
# - "${THRESHOLD_API_PORT}:${THRESHOLD_API_PORT}"
|
||||||
|
# environment:
|
||||||
|
# PORTAINER_GIT_DIR: "${PORTAINER_GIT_DIR}"
|
||||||
|
# MODULES_ENABLED: "${MODULES_ENABLED}"
|
||||||
|
# DISCORD_TOKEN: "${DISCORD_TOKEN}"
|
||||||
|
# THRESHOLD_LISTENER_HOST: "${THRESHOLD_LISTENER_HOST}"
|
||||||
|
# THRESHOLD_LISTENER_PORT: "${THRESHOLD_LISTENER_PORT}"
|
||||||
|
# THRESHOLD_LISTENER_SSL: "${THRESHOLD_LISTENER_SSL}"
|
||||||
|
# THRESHOLD_RELAY_ENABLED: "${THRESHOLD_RELAY_ENABLED}"
|
||||||
|
# THRESHOLD_RELAY_HOST: "${THRESHOLD_RELAY_HOST}"
|
||||||
|
# THRESHOLD_RELAY_PORT: "${THRESHOLD_RELAY_PORT}"
|
||||||
|
# THRESHOLD_RELAY_SSL: "${THRESHOLD_RELAY_SSL}"
|
||||||
|
# THRESHOLD_API_ENABLED: "${THRESHOLD_API_ENABLED}"
|
||||||
|
# THRESHOLD_API_HOST: "${THRESHOLD_API_HOST}"
|
||||||
|
# THRESHOLD_API_PORT: "${THRESHOLD_API_PORT}"
|
||||||
|
# THRESHOLD_CONFIG_DIR: "${THRESHOLD_CONFIG_DIR}"
|
||||||
|
# #THRESHOLD_TEMPLATE_DIR: "${#THRESHOLD_TEMPLATE_DIR}"
|
||||||
|
# THRESHOLD_CERT_DIR: "${THRESHOLD_CERT_DIR}"
|
||||||
|
# # How many messages to ingest at once from Redis
|
||||||
|
# MONOLITH_INGEST_CHUNK_SIZE: "${MONOLITH_INGEST_CHUNK_SIZE}"
|
||||||
|
# # Time to wait between polling Redis again
|
||||||
|
# MONOLITH_INGEST_ITER_DELAY: "${MONOLITH_INGEST_ITER_DELAY}"
|
||||||
|
# # Number of 4chan threads to request at once
|
||||||
|
# MONOLITH_CH4_THREADS_CONCURRENT: "${MONOLITH_CH4_THREADS_CONCURRENT}"
|
||||||
|
# # Time to wait between every MONOLITH_CH4_THREADS_CONCURRENT threads
|
||||||
|
# MONOLITH_CH4_THREADS_DELAY: "${MONOLITH_CH4_THREADS_DELAY}"
|
||||||
|
# # Time to wait after finishing a crawl before starting again
|
||||||
|
# MONOLITH_CH4_CRAWL_DELAY: "${MONOLITH_CH4_CRAWL_DELAY}"
|
||||||
|
# # Semaphore value
|
||||||
|
# MONOLITH_CH4_THREADS_SEMAPHORE: "${MONOLITH_CH4_THREADS_SEMAPHORE}"
|
||||||
|
# # Threads to use for data processing
|
||||||
|
# # Leave uncommented to use all available threads
|
||||||
|
# MONOLITH_PROCESS_THREADS: "${MONOLITH_PROCESS_THREADS}"
|
||||||
|
# # Enable performance metrics after message processing
|
||||||
|
# MONOLITH_PROCESS_PERFSTATS: "${MONOLITH_PROCESS_PERFSTATS}"
|
||||||
|
# MONOLITH_CH4_BOARDS: "${MONOLITH_CH4_BOARDS}"
|
||||||
|
# REDIS_PASSWORD: "${REDIS_PASSWORD}"
|
||||||
|
# # for development
|
||||||
|
# extra_hosts:
|
||||||
|
# - "host.docker.internal:host-gateway"
|
||||||
|
# networks:
|
||||||
|
# - default
|
||||||
|
# - xf
|
||||||
|
# - db
|
||||||
|
|
||||||
|
ssdb:
|
||||||
|
image: tsl0922/ssdb
|
||||||
|
container_name: ssdb_monolith
|
||||||
|
ports:
|
||||||
|
- "1289:1289"
|
||||||
|
environment:
|
||||||
|
- SSDB_PORT=1289
|
||||||
|
networks:
|
||||||
|
- default
|
||||||
|
- db
|
||||||
|
|
||||||
|
# tmp:
|
||||||
|
# image: busybox
|
||||||
|
# container_name: tmp_monolith
|
||||||
|
# command: chmod -R 777 /var/run/socks
|
||||||
|
# volumes:
|
||||||
|
# - /var/run/socks
|
||||||
|
|
||||||
|
redis:
|
||||||
|
image: redis
|
||||||
|
container_name: redis_monolith
|
||||||
|
command: redis-server /etc/redis.conf
|
||||||
|
ulimits:
|
||||||
|
nproc: 65535
|
||||||
|
nofile:
|
||||||
|
soft: 65535
|
||||||
|
hard: 65535
|
||||||
|
volumes:
|
||||||
|
- ${PORTAINER_GIT_DIR}/docker/redis.conf:/etc/redis.conf
|
||||||
|
- monolith_redis_data:/data
|
||||||
|
# volumes_from:
|
||||||
|
# - tmp
|
||||||
|
healthcheck:
|
||||||
|
test: "redis-cli ping"
|
||||||
|
interval: 2s
|
||||||
|
timeout: 2s
|
||||||
|
retries: 15
|
||||||
|
networks:
|
||||||
|
- default
|
||||||
|
- xf
|
||||||
|
- db
|
||||||
|
|
||||||
|
networks:
|
||||||
|
default:
|
||||||
|
driver: bridge
|
||||||
|
xf:
|
||||||
|
external: true
|
||||||
|
db:
|
||||||
|
external: true
|
||||||
|
|
||||||
|
volumes:
|
||||||
|
monolith_redis_data:
|
@ -1,87 +0,0 @@
|
|||||||
version: "2.2"
|
|
||||||
|
|
||||||
services:
|
|
||||||
app:
|
|
||||||
image: pathogen/monolith:latest
|
|
||||||
container_name: monolith
|
|
||||||
build: ${PORTAINER_GIT_DIR}/docker
|
|
||||||
volumes:
|
|
||||||
- ${PORTAINER_GIT_DIR}:/code
|
|
||||||
env_file:
|
|
||||||
- ../stack.env
|
|
||||||
networks:
|
|
||||||
- default
|
|
||||||
- pathogen
|
|
||||||
- elastic
|
|
||||||
|
|
||||||
threshold:
|
|
||||||
image: pathogen/threshold:latest
|
|
||||||
container_name: threshold
|
|
||||||
build: ../legacy/docker
|
|
||||||
volumes:
|
|
||||||
- ${PORTAINER_GIT_DIR}:/code
|
|
||||||
- ${THRESHOLD_CONFIG_DIR}:/code/legacy/conf/live
|
|
||||||
#- ${THRESHOLD_TEMPLATE_DIR}:/code/conf/templates
|
|
||||||
- ${THRESHOLD_CERT_DIR}:/code/legacy/conf/cert
|
|
||||||
volumes_from:
|
|
||||||
- tmp
|
|
||||||
ports:
|
|
||||||
- "${THRESHOLD_LISTENER_PORT}:${THRESHOLD_LISTENER_PORT}"
|
|
||||||
- "${THRESHOLD_RELAY_PORT}:${THRESHOLD_RELAY_PORT}"
|
|
||||||
- "${THRESHOLD_API_PORT}:${THRESHOLD_API_PORT}"
|
|
||||||
env_file:
|
|
||||||
- ../stack.env
|
|
||||||
# for development
|
|
||||||
extra_hosts:
|
|
||||||
- "host.docker.internal:host-gateway"
|
|
||||||
networks:
|
|
||||||
- default
|
|
||||||
- pathogen
|
|
||||||
|
|
||||||
ssdb:
|
|
||||||
image: tsl0922/ssdb
|
|
||||||
container_name: ssdb_monolith
|
|
||||||
ports:
|
|
||||||
- "1289:1289"
|
|
||||||
environment:
|
|
||||||
- SSDB_PORT=1289
|
|
||||||
networks:
|
|
||||||
- default
|
|
||||||
|
|
||||||
tmp:
|
|
||||||
image: busybox
|
|
||||||
container_name: tmp_monolith
|
|
||||||
command: chmod -R 777 /var/run/socks
|
|
||||||
volumes:
|
|
||||||
- /var/run/socks
|
|
||||||
|
|
||||||
redis:
|
|
||||||
image: redis
|
|
||||||
container_name: redis_monolith
|
|
||||||
command: redis-server /etc/redis.conf
|
|
||||||
ulimits:
|
|
||||||
nproc: 65535
|
|
||||||
nofile:
|
|
||||||
soft: 65535
|
|
||||||
hard: 65535
|
|
||||||
volumes:
|
|
||||||
- ${PORTAINER_GIT_DIR}/docker/redis.conf:/etc/redis.conf
|
|
||||||
- redis_data:/data
|
|
||||||
volumes_from:
|
|
||||||
- tmp
|
|
||||||
healthcheck:
|
|
||||||
test: "redis-cli -s /var/run/socks/redis.sock ping"
|
|
||||||
interval: 2s
|
|
||||||
timeout: 2s
|
|
||||||
retries: 15
|
|
||||||
|
|
||||||
networks:
|
|
||||||
default:
|
|
||||||
driver: bridge
|
|
||||||
pathogen:
|
|
||||||
external: true
|
|
||||||
elastic:
|
|
||||||
external: true
|
|
||||||
|
|
||||||
volumes:
|
|
||||||
redis_data:
|
|
@ -1,2 +1,4 @@
|
|||||||
unixsocket /var/run/socks/redis.sock
|
# unixsocket /var/run/socks/redis.sock
|
||||||
unixsocketperm 777
|
# unixsocketperm 777
|
||||||
|
port 6379
|
||||||
|
requirepass changeme
|
@ -1,24 +0,0 @@
|
|||||||
wheel
|
|
||||||
beautifulsoup4
|
|
||||||
redis
|
|
||||||
siphashc
|
|
||||||
aiohttp[speedups]
|
|
||||||
python-dotenv
|
|
||||||
#manticoresearch
|
|
||||||
numpy
|
|
||||||
aioredis[hiredis]
|
|
||||||
#aiokafka
|
|
||||||
vaderSentiment
|
|
||||||
polyglot
|
|
||||||
pyicu
|
|
||||||
pycld2
|
|
||||||
morfessor
|
|
||||||
six
|
|
||||||
nltk
|
|
||||||
#spacy
|
|
||||||
gensim
|
|
||||||
python-Levenshtein
|
|
||||||
orjson
|
|
||||||
uvloop
|
|
||||||
elasticsearch[async]
|
|
||||||
msgpack
|
|
@ -1,11 +1,10 @@
|
|||||||
wheel
|
wheel
|
||||||
pre-commit
|
|
||||||
beautifulsoup4
|
beautifulsoup4
|
||||||
redis
|
redis
|
||||||
siphashc
|
siphashc
|
||||||
aiohttp[speedups]
|
aiohttp[speedups]
|
||||||
python-dotenv
|
python-dotenv
|
||||||
#manticoresearch
|
manticoresearch
|
||||||
numpy
|
numpy
|
||||||
aioredis[hiredis]
|
aioredis[hiredis]
|
||||||
#aiokafka
|
#aiokafka
|
||||||
|
@ -129,6 +129,15 @@ schema_main = {
|
|||||||
"version_sentiment": "int",
|
"version_sentiment": "int",
|
||||||
# 1, 2
|
# 1, 2
|
||||||
"version_tokens": "int",
|
"version_tokens": "int",
|
||||||
|
# en, ru
|
||||||
|
"lang_code": "string indexed attribute",
|
||||||
|
"lang_name": "text",
|
||||||
|
"match_ts": "timestamp",
|
||||||
|
"batch_id": "bigint",
|
||||||
|
"rule_id": "bigint",
|
||||||
|
"index": "string indexed attribute",
|
||||||
|
"meta": "text",
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
schema_meta = {
|
schema_meta = {
|
||||||
|
@ -60,7 +60,7 @@ class Chan4(object):
|
|||||||
self.log.debug(f"Decoded hash key: {self.hash_key}")
|
self.log.debug(f"Decoded hash key: {self.hash_key}")
|
||||||
|
|
||||||
async def run(self):
|
async def run(self):
|
||||||
if not BOARDS:
|
if "ALL" in BOARDS:
|
||||||
await self.get_board_list()
|
await self.get_board_list()
|
||||||
else:
|
else:
|
||||||
self.boards = BOARDS
|
self.boards = BOARDS
|
||||||
|
Loading…
Reference in New Issue
Block a user