Begin implementing RTS

Add psutil to requirements
Begin implementation of dynamic throttling framework
2026-02-17 12:14:29 +00:00 · 2025-01-24 12:18:17 +00:00 · 2025-01-24 12:17:43 +00:00 · 2025-01-24 12:17:22 +00:00 · 2025-01-23 11:30:01 +00:00 · 2025-01-23 11:29:48 +00:00
46 changed files with 1200 additions and 2391 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -159,3 +159,4 @@ docker/data
 *.pem
 legacy/conf/live/
 legacy/conf/cert/
 stack.env
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -1,15 +1,21 @@
 repos:
  - repo: https://github.com/psf/black
-    rev: 22.6.0
+    rev: 23.1.0
    hooks:
    - id: black
      exclude: ^core/migrations
  - repo: https://github.com/PyCQA/isort
-    rev: 5.10.1
+    rev: 5.11.5
    hooks:
      - id: isort
        args: ["--profile", "black"]
  - repo: https://github.com/PyCQA/flake8
-    rev: 4.0.1
+    rev: 6.0.0
    hooks:
      - id: flake8
        args: [--max-line-length=88]
        exclude: ^core/migrations
  - repo: https://github.com/sirwart/ripsecrets.git
    rev: v0.1.5
    hooks:
      - id: ripsecrets
--- a/docker/Dockerfile
+++ b/docker/Dockerfile
@@ -1,19 +1,19 @@
 # syntax=docker/dockerfile:1
-FROM python:3
+FROM python:3.10
-RUN useradd -d /code pathogen
+RUN useradd -d /code xf
 RUN mkdir /code
-RUN chown pathogen:pathogen /code
+RUN chown xf:xf /code
 RUN mkdir /venv
-RUN chown pathogen:pathogen /venv
+RUN chown xf:xf /venv
-USER pathogen
+USER xf
 ENV PYTHONDONTWRITEBYTECODE=1
 ENV PYTHONUNBUFFERED=1
 WORKDIR /code
 COPY requirements.txt /code/
-COPY discord-patched.tgz /code/
+COPY docker/discord-patched.tgz /code/
 RUN python -m venv /venv
 RUN . /venv/bin/activate && pip install -r requirements.txt
--- a/20
+++ b/20
@@ -0,0 +1,20 @@
 run:
 	docker-compose -f docker-compose.prod.yml --env-file=stack.env up -d
 build:
 	docker-compose -f docker-compose.prod.yml --env-file=stack.env build
 stop:
 	docker-compose -f docker-compose.prod.yml --env-file=stack.env down
 log:
 	docker-compose -f docker-compose.prod.yml --env-file=stack.env logs -f --names
 run-infra:
 	docker-compose -f docker-compose.infra.yml --env-file=stack.env up -d
 stop-infra:
 	docker-compose -f docker-compose.infra.yml --env-file=stack.env down
 log-infra:
 	docker-compose -f docker-compose.infra.yml --env-file=stack.env logs -f
--- a/clients/mexc.py
+++ b/clients/mexc.py
--- a/db.py
+++ b/db.py
@@ -1,23 +1,52 @@
-import random
+import asyncio
 from math import ceil
 from os import getenv
 from time import sleep
 import aiomysql
 import aioredis
 import manticoresearch
 import msgpack
 import orjson
-
+from manticoresearch.rest import ApiException
-# Kafka
+from numpy import array_split
 from aiokafka import AIOKafkaProducer
 from redis import StrictRedis
 import util
 from schemas import mc_s
-# KAFKA_TOPIC = "msg"
+mysql_pool = None
 configuration = manticoresearch.Configuration(host="http://127.0.0.1:9308")
 api_client = manticoresearch.ApiClient(configuration)
 api_instance = manticoresearch.IndexApi(api_client)
 log = util.get_logger("db")
 # Redis (legacy)
-r = StrictRedis(unix_socket_path="/var/run/redis/redis.sock", db=0)
+# r = StrictRedis(unix_socket_path="/var/run/redis/redis.sock", db=0)
-
+r = StrictRedis(
    host="127.0.0.1",  # Replace with your Redis server's IP address
    port=1289,  # Replace with your Redis server's port
    db=0,  # Database number
 )
 # AIORedis
-ar = aioredis.from_url("unix:///var/run/redis/redis.sock", db=0)
+# ar = aioredis.from_url("unix:///var/run/redis/redis.sock", db=0)
 ar = aioredis.from_url("redis://127.0.0.1:1289", db=0)
 # /var/run/neptune-redis.sock
 # db = 10
 pr = aioredis.from_url("unix://var/run/neptune-redis.sock", db=10)
 # fr = aioredis.from_url("unix://var/run/fisk-redis.sock", db=10)
 fr = aioredis.from_url("unix://var/run/redis.sock", db=10)
 # pr = aioredis.from_url("redis://redis_neptune:6379", db=10, password=getenv("REDIS_PASSWORD"))
 KEYNAME = "queue"
 MESSAGE_KEY = "messages"
 OHLC_MESSAGE_KEY = "ohlc"
 TYPES_MAIN = [
    "msg",
@@ -32,89 +61,174 @@ TYPES_MAIN = [
    "topic",
    "update",
 ]
 MAIN_SRC_MAP = {
    "dis": "main",
    "irc": "restricted",
    "4ch": "main",
 }
 TYPES_META = ["who"]
 TYPES_INT = ["conn", "highlight", "znc", "query", "self"]
 KEYNAME = "queue"
-async def store_kafka_batch(data):
+async def init_mysql_pool():
-    # log.debug(f"Storing Kafka batch of {len(data)} messages")
+    """
-    producer = AIOKafkaProducer(bootstrap_servers="kafka:9092")
+    Initialize the MySQL connection pool.
-    await producer.start()
+    """
-    topicmap = {}
+    global mysql_pool
-    for msg in data:
+    mysql_pool = await aiomysql.create_pool(
        host="127.0.0.1", port=9306, db="Manticore", minsize=1, maxsize=10
    )
 async def rts_store_message(index, data):
    """
    Store a RTS message into MySQL using an existing connection pool.
    Prioritizes instant PubSub delivery, with minimal data storage overhead.
    :param index: str
    :param data: dict
    """
    # Publish to Redis PubSub
    packed_index = msgpack.packb({"index": index, "data": data}, use_bin_type=True)
    try:
        await fr.publish(OHLC_MESSAGE_KEY, packed_index)
    except aioredis.exceptions.ConnectionError as e:
        raise e
        await asyncio.sleep(0.1)
    # Insert data into MySQL
    try:
        async with mysql_pool.acquire() as conn:
            async with conn.cursor() as cur:
                # Insert data into the table
                query = f"""
                    INSERT INTO {index} (s, o, c, h, l, v, a, i, t, t2, ts)
                    VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
                """
                # Bind the values directly
                await cur.execute(
                    query,
                    (
                        data["s"],  # symbol
                        data["o"],  # open
                        data["c"],  # close
                        data["h"],  # high
                        data["l"],  # low
                        data["v"],  # volume_base
                        data["a"],  # volume_quote
                        data["i"],  # interval
                        data["t"],  # start_time
                        data["t2"],  # end_time
                        data["ts"],  # event_time
                    ),
                )
                await conn.commit()
                log.debug(f"Stored data for {data['s']} in MySQL.")
    except aiomysql.Error as e:
        log.error(f"MySQL error: {e}")
 async def store_batch(data):
    """
    Store a message into Manticore
    :param data: list
    """
    if not data:
        return
    # 10000: maximum inserts we can submit to
    # Manticore as of Sept 2022
    split_posts = array_split(data, ceil(len(data) / 10000))
    for messages in split_posts:
        total = []
        indexmap = {}
        for msg in messages:
            if msg["type"] in TYPES_MAIN:
-            # index = "main"
+                index = "main"
-            index = MAIN_SRC_MAP[msg["src"]]
+                schema = mc_s.schema_main
            # schema = mc_s.schema_main
            elif msg["type"] in TYPES_META:
                index = "meta"
-            # schema = mc_s.schema_meta
+                schema = mc_s.schema_meta
            elif msg["type"] in TYPES_INT:
                index = "internal"
-            # schema = mc_s.schema_int
+                schema = mc_s.schema_int
            # normalise fields
            for key, value in list(msg.items()):
                if value is None:
                    del msg[key]
                if key in schema:
                    if isinstance(value, int):
                        if schema[key].startswith("string") or schema[key].startswith(
                            "text"
                        ):
                            msg[key] = str(value)
-        KAFKA_TOPIC = index
+            body = {"insert": {"index": index, "doc": msg}}
-
+            total.append(body)
        # if key in schema:
        #     if isinstance(value, int):
        #         if schema[key].startswith("string") or schema[key].startswith(
        #             "text"
        #         ):
        #             msg[key] = str(value)
        body = orjson.dumps(msg)
            if "ts" not in msg:
                raise Exception("No TS in msg")
-        if KAFKA_TOPIC not in topicmap:
+            if index not in indexmap:
-            topicmap[KAFKA_TOPIC] = [body]
+                indexmap[index] = [msg]
            else:
-            topicmap[KAFKA_TOPIC].append(body)
+                indexmap[index].append(msg)
        # END MSG IN MESSAGES
-    for topic, messages in topicmap.items():
+        # Pack the indexmap with msgpack and publish it to Neptune
-        batch = producer.create_batch()
+        packed_index = msgpack.packb(indexmap, use_bin_type=True)
-        for body in messages:
+        completed_publish = False
-            metadata = batch.append(key=None, value=body, timestamp=msg["ts"])
+        for i in range(10):
-            if metadata is None:
+            if completed_publish:
-                partitions = await producer.partitions_for(topic)
+                break
-                partition = random.choice(tuple(partitions))
+            try:
-                await producer.send_batch(batch, topic, partition=partition)
+                await pr.publish(MESSAGE_KEY, packed_index)
-                # log.debug(
+                completed_publish = True
-                #     (
+            except aioredis.exceptions.ConnectionError as e:
-                #         f"{batch.record_count()} messages sent to topic "
+                raise e
-                #         f"{topic} partition {partition}"
+                await asyncio.sleep(0.1)
-                #     )
+        if not completed_publish:
-                # )
+            log.error("Failed to publish to Neptune")
                batch = producer.create_batch()
                continue
-        partitions = await producer.partitions_for(topic)
+        body_post = ""
-        partition = random.choice(tuple(partitions))
+        for item in total:
-        await producer.send_batch(batch, topic, partition=partition)
+            # print("ITEM", item)
-        # log.debug(
+            body_post += orjson.dumps(item).decode("utf-8")
-        #     (
+            body_post += "\n"
-        #         f"{batch.record_count()} messages sent to topic "
+
-        #         f"{topic} partition {partition}"
+        # print("BODY POST INDEX", index, body_post)
-        #     )
+
-        # )
+        try:
-    log.debug(
+            # Bulk index operations
-        "Kafka batches sent: "
+            api_response = api_instance.bulk(body_post)  # , async_req=True
-        + ", ".join([topic + ": " + str(len(topicmap[topic])) for topic in topicmap])
+        except ApiException as e:
            log.error("Exception when calling IndexApi->bulk: %s\n" % e)
            log.error("body_post attempted to send", body_post)
        log.info(f"Completed ingest to MC of length {len(total)}")
    # END MESSAGES IN SPLIT
 def update_schema():
    pass
 def create_index(api_client):
    util_instance = manticoresearch.UtilsApi(api_client)
    schemas = {
        "main": mc_s.schema_main,
        "rule_storage": mc_s.schema_rule_storage,
        "meta": mc_s.schema_meta,
        "internal": mc_s.schema_int,
    }
    for name, schema in schemas.items():
        schema_types = ", ".join([f"{k} {v}" for k, v in schema.items()])
        create_query = (
            f"create table if not exists {name}({schema_types}) engine='columnar'"
        )
-    await producer.stop()
+        print("Schema types", create_query)
        util_instance.sql(create_query)
 async def queue_message(msg):
    """
    Queue a message on the Redis buffer.
    """
    # TODO: msgpack
    message = orjson.dumps(msg)
-    await ar.sadd(KEYNAME, message)
+    await ar.lpush(KEYNAME, message)
 async def queue_message_bulk(data):
@@ -122,5 +236,6 @@ async def queue_message_bulk(data):
    Queue multiple messages on the Redis buffer.
    """
    for msg in data:
        # TODO: msgpack
        message = orjson.dumps(msg)
-        await ar.sadd(KEYNAME, message)
+        await ar.lpush(KEYNAME, message)
--- a/db_old_ref.py
+++ b/db_old_ref.py
@@ -0,0 +1,174 @@
 import asyncio
 from os import getenv
 import aioredis
 import msgpack
 import orjson
 import redis
 # Elasticsearch
 from elasticsearch import AsyncElasticsearch
 import util
 trues = ("true", "1", "t", True)
 # INDEX = "msg"
 log = util.get_logger("db")
 # Redis (legacy)
 # r = redis.from_url("redis://ssdb:1289", db=0)
 # AIORedis
 ar = aioredis.from_url("redis://ssdb:1289", db=0)
 # Neptune redis for PubSub
 pr = aioredis.from_url("redis://redis_neptune:6379", db=10)
 TYPES_MAIN = [
    "msg",
    "notice",
    "action",
    "part",
    "join",
    "kick",
    "quit",
    "nick",
    "mode",
    "topic",
    "update",
 ]
 MAIN_SRC_MAP = {
    "dis": "main",
    "irc": "restricted",
    "4ch": "main",
 }
 TYPES_META = ["who"]
 TYPES_INT = ["conn", "highlight", "znc", "query", "self"]
 KEYNAME = "queue"
 MESSAGE_KEY = "messages"
 ELASTICSEARCH_USERNAME = getenv("ELASTICSEARCH_USERNAME", "elastic")
 ELASTICSEARCH_PASSWORD = getenv("ELASTICSEARCH_PASSWORD", "changeme")
 ELASTICSEARCH_HOST = getenv("ELASTICSEARCH_HOST", "localhost")
 ELASTICSEARCH_TLS = getenv("ELASTICSEARCH_TLS", "false") in trues
 client = None
 # These are sometimes numeric, sometimes strings.
 # If they are seen to be numeric first, ES will erroneously
 # index them as "long" and then subsequently fail to index messages
 # with strings in the field.
 keyword_fields = ["nick_id", "user_id", "net_id"]
 mapping_int = {
    "mappings": {
        "properties": {
            "ts": {"type": "date", "format": "epoch_second"},
            "file_tim": {"type": "date", "format": "epoch_millis"},
        }
    }
 }
 mapping = dict(mapping_int)
 for field in keyword_fields:
    mapping["mappings"]["properties"][field] = {"type": "text"}
 del mapping_int["mappings"]["properties"]["file_tim"]
 async def initialise_elasticsearch():
    """
    Initialise the Elasticsearch client.
    """
    auth = (ELASTICSEARCH_USERNAME, ELASTICSEARCH_PASSWORD)
    client = AsyncElasticsearch(ELASTICSEARCH_HOST, http_auth=auth, verify_certs=False)
    for index in ("main", "meta", "restricted", "internal"):
        if index == "internal":
            map_dict = mapping_int
        else:
            map_dict = mapping
        if await client.indices.exists(index=index):
            # update index with mapping
            await client.indices.put_mapping(
                index=index, properties=map_dict["mappings"]["properties"]
            )
        else:
            await client.indices.create(index=index, mappings=map_dict["mappings"])
    return client
 async def store_batch(data):
    global client
    if not client:
        client = await initialise_elasticsearch()
    indexmap = {}
    for msg in data:
        if msg["type"] in TYPES_MAIN:
            # index = "main"
            index = MAIN_SRC_MAP[msg["src"]]
            # schema = mc_s.schema_main
        elif msg["type"] in TYPES_META:
            index = "meta"
            # schema = mc_s.schema_meta
        elif msg["type"] in TYPES_INT:
            index = "internal"
            # schema = mc_s.schema_int
        INDEX = index
        # if key in schema:
        #     if isinstance(value, int):
        #         if schema[key].startswith("string") or schema[key].startswith(
        #             "text"
        #         ):
        #             msg[key] = str(value)
        # body = orjson.dumps(msg)
        if "ts" not in msg:
            raise Exception("No TS in msg")
        if INDEX not in indexmap:
            indexmap[INDEX] = [msg]
        else:
            indexmap[INDEX].append(msg)
    # Pack the indexmap with msgpack and publish it to Neptune
    packed_index = msgpack.packb(indexmap, use_bin_type=True)
    completed_publish = False
    for i in range(10):
        if completed_publish:
            break
        try:
            await pr.publish(MESSAGE_KEY, packed_index)
            completed_publish = True
        except aioredis.exceptions.ConnectionError:
            await asyncio.sleep(0.1)
    if not completed_publish:
        log.error("Failed to publish to Neptune")
    for index, index_messages in indexmap.items():
        for message in index_messages:
            result = await client.index(index=index, body=message)
            if not result["result"] == "created":
                log.error(f"Indexing failed: {result}")
    log.debug(f"Indexed {len(data)} messages in ES")
 async def queue_message(msg):
    """
    Queue a message on the Redis buffer.
    """
    # TODO: msgpack
    message = orjson.dumps(msg)
    await ar.lpush(KEYNAME, message)
 async def queue_message_bulk(data):
    """
    Queue multiple messages on the Redis buffer.
    """
    for msg in data:
        # TODO: msgpack
        message = orjson.dumps(msg)
        await ar.lpush(KEYNAME, message)
--- a/docker-compose.prod.yml
+++ b/docker-compose.prod.yml
@@ -0,0 +1,206 @@
 version: "2.2"
 services:
  rts:
    image: xf/monolith:latest
    container_name: rts_monolith
    command: sh -c '. /venv/bin/activate && exec python rts.py'
    build: .
    volumes:
      - ${PORTAINER_GIT_DIR}:/code
      - type: bind
        source: /code/run
        target: /var/run
    environment: 
      PORTAINER_GIT_DIR: "${PORTAINER_GIT_DIR}"
      MODULES_ENABLED: "${MODULES_ENABLED}"
      MONOLITH_RTS_MEXC_API_ACCESS_KEY: "${MONOLITH_RTS_MEXC_API_ACCESS_KEY}"
      MONOLITH_RTS_MEXC_API_SECRET_KEY: "${MONOLITH_RTS_MEXC_API_SECRET_KEY}"
    deploy:
      resources:
        limits:
          cpus: '0.5'
          memory: 1.0G
    network_mode: host
  app:
    image: xf/monolith:latest
    container_name: monolith
    #command: sh -c '. /venv/bin/activate && exec python -m cProfile -o /tmp/profile.out monolith.py'
    build: .
    volumes:
      - ${PORTAINER_GIT_DIR}:/code
      - type: bind
        source: /code/run
        target: /var/run
    environment: 
      PORTAINER_GIT_DIR: "${PORTAINER_GIT_DIR}"
      MODULES_ENABLED: "${MODULES_ENABLED}"
      DISCORD_TOKEN: "${DISCORD_TOKEN}"
      THRESHOLD_LISTENER_HOST: "${THRESHOLD_LISTENER_HOST}"
      THRESHOLD_LISTENER_PORT: "${THRESHOLD_LISTENER_PORT}"
      THRESHOLD_LISTENER_SSL: "${THRESHOLD_LISTENER_SSL}"
      THRESHOLD_RELAY_ENABLED: "${THRESHOLD_RELAY_ENABLED}"
      THRESHOLD_RELAY_HOST: "${THRESHOLD_RELAY_HOST}"
      THRESHOLD_RELAY_PORT: "${THRESHOLD_RELAY_PORT}"
      THRESHOLD_RELAY_SSL: "${THRESHOLD_RELAY_SSL}"
      THRESHOLD_API_ENABLED: "${THRESHOLD_API_ENABLED}"
      THRESHOLD_API_HOST: "${THRESHOLD_API_HOST}"
      THRESHOLD_API_PORT: "${THRESHOLD_API_PORT}"
      THRESHOLD_CONFIG_DIR: "${THRESHOLD_CONFIG_DIR}"
      #THRESHOLD_TEMPLATE_DIR: "${#THRESHOLD_TEMPLATE_DIR}"
      THRESHOLD_CERT_DIR: "${THRESHOLD_CERT_DIR}"
      # How many messages to ingest at once from Redis
      MONOLITH_INGEST_CHUNK_SIZE: "${MONOLITH_INGEST_CHUNK_SIZE}"
      # Time to wait between polling Redis again
      MONOLITH_INGEST_ITER_DELAY: "${MONOLITH_INGEST_ITER_DELAY}"
      # Number of 4chan threads to request at once
      MONOLITH_CH4_THREADS_CONCURRENT: "${MONOLITH_CH4_THREADS_CONCURRENT}"
      # Time to wait between every MONOLITH_CH4_THREADS_CONCURRENT threads
      MONOLITH_CH4_THREADS_DELAY: "${MONOLITH_CH4_THREADS_DELAY}"
      # Time to wait after finishing a crawl before starting again
      MONOLITH_CH4_CRAWL_DELAY: "${MONOLITH_CH4_CRAWL_DELAY}"
      # Semaphore value
      MONOLITH_CH4_THREADS_SEMAPHORE: "${MONOLITH_CH4_THREADS_SEMAPHORE}"
      # Threads to use for data processing
      # Leave uncommented to use all available threads
      MONOLITH_PROCESS_THREADS: "${MONOLITH_PROCESS_THREADS}"
      # Enable performance metrics after message processing
      MONOLITH_PROCESS_PERFSTATS: "${MONOLITH_PROCESS_PERFSTATS}"
      MONOLITH_PROCESS_TARGET_CPU_USAGE: "${MONOLITH_PROCESS_TARGET_CPU_USAGE}"
      MONOLITH_CH4_TARGET_CPU_USAGE: "${MONOLITH_CH4_TARGET_CPU_USAGE}"
      MONOLITH_CH4_BOARDS: "${MONOLITH_CH4_BOARDS}"
      REDIS_PASSWORD: "${REDIS_PASSWORD}"
      MONOLITH_INGEST_INCREASE_BELOW: "${MONOLITH_INGEST_INCREASE_BELOW}"
      MONOLITH_INGEST_INCREASE_BY: "${MONOLITH_INGEST_INCREASE_BY}"
      MONOLITH_INGEST_DECREASE_ABOVE: "${MONOLITH_INGEST_DECREASE_ABOVE}"
      MONOLITH_INGEST_DECREASE_BY: "${MONOLITH_INGEST_DECREASE_BY}"
      MONOLITH_INGEST_MAX: "${MONOLITH_INGEST_MAX}"
      MONOLITH_INGEST_MIN: "${MONOLITH_INGEST_MIN}"
    deploy:
      resources:
        limits:
          cpus: '0.5'
          memory: 1.0G
    network_mode: host
  threshold:
    image: xf/threshold:latest
    container_name: threshold
    build: legacy/docker
    volumes:
      - ${PORTAINER_GIT_DIR}:/code
      - ${THRESHOLD_CONFIG_DIR}:/code/legacy/conf/live
      #- ${THRESHOLD_TEMPLATE_DIR}:/code/conf/templates
      - ${THRESHOLD_CERT_DIR}:/code/legacy/conf/cert
    volumes_from:
      - tmp
    ports:
      - "${THRESHOLD_LISTENER_PORT}:${THRESHOLD_LISTENER_PORT}"
      - "${THRESHOLD_RELAY_PORT}:${THRESHOLD_RELAY_PORT}"
      - "${THRESHOLD_API_PORT}:${THRESHOLD_API_PORT}"
    environment: 
      PORTAINER_GIT_DIR: "${PORTAINER_GIT_DIR}"
      MODULES_ENABLED: "${MODULES_ENABLED}"
      DISCORD_TOKEN: "${DISCORD_TOKEN}"
      THRESHOLD_LISTENER_HOST: "${THRESHOLD_LISTENER_HOST}"
      THRESHOLD_LISTENER_PORT: "${THRESHOLD_LISTENER_PORT}"
      THRESHOLD_LISTENER_SSL: "${THRESHOLD_LISTENER_SSL}"
      THRESHOLD_RELAY_ENABLED: "${THRESHOLD_RELAY_ENABLED}"
      THRESHOLD_RELAY_HOST: "${THRESHOLD_RELAY_HOST}"
      THRESHOLD_RELAY_PORT: "${THRESHOLD_RELAY_PORT}"
      THRESHOLD_RELAY_SSL: "${THRESHOLD_RELAY_SSL}"
      THRESHOLD_API_ENABLED: "${THRESHOLD_API_ENABLED}"
      THRESHOLD_API_HOST: "${THRESHOLD_API_HOST}"
      THRESHOLD_API_PORT: "${THRESHOLD_API_PORT}"
      THRESHOLD_CONFIG_DIR: "${THRESHOLD_CONFIG_DIR}"
      #THRESHOLD_TEMPLATE_DIR: "${#THRESHOLD_TEMPLATE_DIR}"
      THRESHOLD_CERT_DIR: "${THRESHOLD_CERT_DIR}"
      # How many messages to ingest at once from Redis
      MONOLITH_INGEST_CHUNK_SIZE: "${MONOLITH_INGEST_CHUNK_SIZE}"
      # Time to wait between polling Redis again
      MONOLITH_INGEST_ITER_DELAY: "${MONOLITH_INGEST_ITER_DELAY}"
      # Number of 4chan threads to request at once
      MONOLITH_CH4_THREADS_CONCURRENT: "${MONOLITH_CH4_THREADS_CONCURRENT}"
      # Time to wait between every MONOLITH_CH4_THREADS_CONCURRENT threads
      MONOLITH_CH4_THREADS_DELAY: "${MONOLITH_CH4_THREADS_DELAY}"
      # Time to wait after finishing a crawl before starting again
      MONOLITH_CH4_CRAWL_DELAY: "${MONOLITH_CH4_CRAWL_DELAY}"
      # Semaphore value
      MONOLITH_CH4_THREADS_SEMAPHORE: "${MONOLITH_CH4_THREADS_SEMAPHORE}"
      # Threads to use for data processing
      # Leave uncommented to use all available threads
      MONOLITH_PROCESS_THREADS: "${MONOLITH_PROCESS_THREADS}"
      # Enable performance metrics after message processing
      MONOLITH_PROCESS_PERFSTATS: "${MONOLITH_PROCESS_PERFSTATS}"
      MONOLITH_CH4_BOARDS: "${MONOLITH_CH4_BOARDS}"
      REDIS_PASSWORD: "${REDIS_PASSWORD}"
    # for development
    extra_hosts:
      - "host.docker.internal:host-gateway"
    network_mode: host
  ssdb:
    image: tsl0922/ssdb
    container_name: ssdb_monolith
    ports:
      - "1289:1289"
    environment:
      - SSDB_PORT=1289
    volumes:
      - monolith_ssdb_data:/var/lib/ssdb
    # networks:
    #   - default
    #   - db
    deploy:
      resources:
        limits:
          cpus: '0.5'
          memory: 1.0G
    network_mode: host
  redis:
    image: redis
    container_name: redis_monolith
    command: redis-server /etc/redis.conf
    ulimits:
      nproc: 65535
      nofile:
         soft: 65535
         hard: 65535
    volumes:
      - ${PORTAINER_GIT_DIR}/docker/redis.conf:/etc/redis.conf
      - monolith_redis_data:/data
      - type: bind
        source: /code/run
        target: /var/run
    # volumes_from:
    #   - tmp
    healthcheck:
      test: "redis-cli ping"
      interval: 2s
      timeout: 2s
      retries: 15
    # networks:
    #   - default
    #   - xf
    #   - db
    deploy:
      resources:
        limits:
          cpus: '0.5'
          memory: 1.0G
    network_mode: host
 # networks:
 #   default:
 #     driver: bridge
 #   xf:
 #     external: true
 #   db:
 #     external: true
 volumes:
  monolith_redis_data:
  monolith_ssdb_data:
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -1,351 +0,0 @@
 version: "2.2"
 x-superset-image: &superset-image apache/superset:${TAG:-latest-dev}
 x-superset-depends-on: &superset-depends-on
  - db
  - redis_superset
 x-superset-volumes: &superset-volumes
  # /app/pythonpath_docker will be appended to the PYTHONPATH in the final container
  - ./docker/superset:/app/docker
  - superset_home:/app/superset_home
 services:
  app:
    image: pathogen/monolith:latest
    container_name: monolith
    build: ./docker
    volumes:
      - ${PORTAINER_GIT_DIR}:/code
    env_file:
      - .env
    volumes_from:
      - tmp
    depends_on:
      broker:
        condition: service_started
      kafka:
        condition: service_healthy
      tmp:
        condition: service_started
      redis:
        condition: service_healthy
    #   - db
  threshold:
    image: pathogen/threshold:latest
    container_name: threshold
    build: ./legacy/docker
    volumes:
      - ${PORTAINER_GIT_DIR}:/code
      - ${THRESHOLD_CONFIG_DIR}:/code/legacy/conf/live
      #- ${THRESHOLD_TEMPLATE_DIR}:/code/conf/templates
      - ${THRESHOLD_CERT_DIR}:/code/legacy/conf/cert
    ports:
      - "${THRESHOLD_LISTENER_PORT}:${THRESHOLD_LISTENER_PORT}"
      - "${THRESHOLD_RELAY_PORT}:${THRESHOLD_RELAY_PORT}"
      - "${THRESHOLD_API_PORT}:${THRESHOLD_API_PORT}"
    env_file:
      - .env
    # for development
    extra_hosts:
      - "host.docker.internal:host-gateway"
    volumes_from:
        - tmp
    depends_on:
      tmp:
        condition: service_started
      redis:
        condition: service_healthy
  # db:
    #image: pathogen/manticore:kibana
    # image: manticoresearch/manticore:latest
    #build: 
    #  context: ./docker/manticore
    #  args:
    #    DEV: 1
    # restart: always
  # turnilo:
  #   container_name: turnilo
  #   image: uchhatre/turnilo:latest
  #   ports:
  #     - 9093:9090
  #   environment:
  #     - DRUID_BROKER_URL=http://broker:8082
  #     - CONFIG_FILE=/config.yaml
  #   volumes:
  #     - ${PORTAINER_GIT_DIR}/docker/turnilo.yaml:/config.yaml
  #   depends_on:
  #     - broker
  # metabase:
  #   container_name: metabase
  #   image: metabase/metabase:latest
  #   ports:
  #     - 3096:3000
  #   environment:
  #     JAVA_OPTS: -Xmx1g
  #     MB_DB_TYPE: postgres
  #     MB_DB_DBNAME: metabase
  #     MB_DB_PORT: 5432
  #     MB_DB_USER: druid
  #     MB_DB_PASS: FoolishPassword
  #     MB_DB_HOST: postgres
  #   depends_on:
  #     - broker
  redis_superset:
    image: redis:latest
    container_name: superset_cache
    restart: unless-stopped
    volumes:
      - redis:/data
  db:
    env_file: docker/.env-non-dev
    image: postgres:10
    container_name: superset_db
    restart: unless-stopped
    volumes:
      - db_home:/var/lib/postgresql/data
  superset:
    env_file: docker/.env-non-dev
    image: *superset-image
    container_name: superset_app
    command: ["/app/docker/docker-bootstrap.sh", "app-gunicorn"]
    user: "root"
    restart: unless-stopped
    ports:
      - 8088:8088
    depends_on: *superset-depends-on
    volumes: *superset-volumes
  superset-init:
    image: *superset-image
    container_name: superset_init
    command: ["/app/docker/docker-init.sh"]
    env_file: docker/.env-non-dev
    depends_on: *superset-depends-on
    user: "root"
    volumes: *superset-volumes
  superset-worker:
    image: *superset-image
    container_name: superset_worker
    command: ["/app/docker/docker-bootstrap.sh", "worker"]
    env_file: docker/.env-non-dev
    restart: unless-stopped
    depends_on: *superset-depends-on
    user: "root"
    volumes: *superset-volumes
  superset-worker-beat:
    image: *superset-image
    container_name: superset_worker_beat
    command: ["/app/docker/docker-bootstrap.sh", "beat"]
    env_file: docker/.env-non-dev
    restart: unless-stopped
    depends_on: *superset-depends-on
    user: "root"
    volumes: *superset-volumes
  postgres:
    container_name: postgres
    image: postgres:latest
    volumes:
      - metadata_data:/var/lib/postgresql/data
    environment:
      POSTGRES_PASSWORD: FoolishPassword
      POSTGRES_USER: druid
      POSTGRES_DB: druid
  # Need 3.5 or later for container nodes
  zookeeper:
    container_name: zookeeper
    image: zookeeper:3.5
    ports:
      - "2181:2181"
    environment:
      - ZOO_MY_ID=1
  kafka:
    image: wurstmeister/kafka:latest
    container_name: kafka
    depends_on:
      - zookeeper
    ports:
      - 9092:9092
      - 29092:29092
    environment:
      KAFKA_BROKER_ID: 1
      KAFKA_ZOOKEEPER_CONNECT: zookeeper:2181
      KAFKA_ADVERTISED_LISTENERS: PLAINTEXT://kafka:9092,PLAINTEXT_HOST://localhost:29092
      KAFKA_LISTENERS: PLAINTEXT://kafka:9092,PLAINTEXT_HOST://localhost:29092
      KAFKA_LISTENER_SECURITY_PROTOCOL_MAP: PLAINTEXT:PLAINTEXT,PLAINTEXT_HOST:PLAINTEXT
      KAFKA_INTER_BROKER_LISTENER_NAME: PLAINTEXT
      KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR: 1
      KAFKA_AUTO_CREATE_TOPICS_ENABLE: 'true'
      KAFKA_MESSAGE_MAX_BYTES: 2000000
      #KAFKA_HEAP_OPTS: -Xmx2g
    healthcheck:
      test: ["CMD", "kafka-topics.sh", "--list", "--bootstrap-server", "kafka:9092"]
      start_period: 15s
      interval: 30s
      timeout: 30s
      retries: 45
  coordinator:
    image: apache/druid:0.23.0
    container_name: coordinator
    volumes:
      - druid_shared:/opt/shared
      - coordinator_var:/opt/druid/var
    depends_on: 
      - zookeeper
      - postgres
    ports:
      - "8081:8081"
    command:
      - coordinator
    env_file:
      - environment
  broker:
    image: apache/druid:0.23.0
    container_name: broker
    volumes:
      - broker_var:/opt/druid/var
    depends_on: 
      - zookeeper
      - postgres
      - coordinator
    ports:
      - "8082:8082"
    command:
      - broker
    env_file:
      - environment
  historical:
    image: apache/druid:0.23.0
    container_name: historical
    volumes:
      - druid_shared:/opt/shared
      - historical_var:/opt/druid/var
    depends_on: 
      - zookeeper
      - postgres
      - coordinator
    ports:
      - "8083:8083"
    command:
      - historical
    env_file:
      - environment
  middlemanager:
    image: apache/druid:0.23.0
    container_name: middlemanager
    volumes:
      - druid_shared:/opt/shared
      - middle_var:/opt/druid/var
    depends_on: 
      - zookeeper
      - postgres
      - coordinator
    ports:
      - "8091:8091"
      - "8100-8105:8100-8105"
    command:
      - middleManager
    env_file:
      - environment
  router:
    image: apache/druid:0.23.0
    container_name: router
    volumes:
      - router_var:/opt/druid/var
    depends_on:
      - zookeeper
      - postgres
      - coordinator
    ports:
      - "8888:8888"
    command:
      - router
    env_file:
      - environment
  # db:
  #   #image: pathogen/manticore:kibana
  #   image: manticoresearch/manticore:dev
  #   #build: 
  #   #  context: ./docker/manticore
  #   #  args:
  #   #    DEV: 1
  #   restart: always
  #   ports:
  #     - 9308
  #     - 9312
  #     - 9306
  #   ulimits:
  #     nproc: 65535
  #     nofile:
  #        soft: 65535
  #        hard: 65535
  #     memlock:
  #       soft: -1
  #       hard: -1
  #   environment:
  #     - MCL=1
  #   volumes:
  #     - ./docker/data:/var/lib/manticore
  #     - ./docker/manticore.conf:/etc/manticoresearch/manticore.conf
  tmp:
    image: busybox
    command: chmod -R 777 /var/run/redis
    volumes:
        - /var/run/redis
  redis:
    image: redis
    command: redis-server /etc/redis.conf
    ulimits:
      nproc: 65535
      nofile:
         soft: 65535
         hard: 65535
    volumes:
      - ${PORTAINER_GIT_DIR}/docker/redis.conf:/etc/redis.conf
      - redis_data:/data
    volumes_from:
      - tmp
    healthcheck:
      test: "redis-cli -s /var/run/redis/redis.sock ping"
      interval: 2s
      timeout: 2s
      retries: 15
 networks:
   default:
      external:
         name: pathogen
 volumes:
  superset_home:
    external: false
  db_home:
    external: false
  redis_superset:
    external: false
  redis_data: {}
  metadata_data: {}
  middle_var: {}
  historical_var: {}
  broker_var: {}
  coordinator_var: {}
  router_var: {}
  druid_shared: {}
--- a/docker/.env-non-dev
+++ b/docker/.env-non-dev
@@ -1,46 +0,0 @@
 #
 # Licensed to the Apache Software Foundation (ASF) under one or more
 # contributor license agreements.  See the NOTICE file distributed with
 # this work for additional information regarding copyright ownership.
 # The ASF licenses this file to You under the Apache License, Version 2.0
 # (the "License"); you may not use this file except in compliance with
 # the License.  You may obtain a copy of the License at
 #
 #    http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
 COMPOSE_PROJECT_NAME=superset
 # database configurations (do not modify)
 DATABASE_DB=superset
 DATABASE_HOST=db
 DATABASE_PASSWORD=superset
 DATABASE_USER=superset
 # database engine specific environment variables
 # change the below if you prefers another database engine
 DATABASE_PORT=5432
 DATABASE_DIALECT=postgresql
 POSTGRES_DB=superset
 POSTGRES_USER=superset
 POSTGRES_PASSWORD=superset
 #MYSQL_DATABASE=superset
 #MYSQL_USER=superset
 #MYSQL_PASSWORD=superset
 #MYSQL_RANDOM_ROOT_PASSWORD=yes
 # Add the mapped in /app/pythonpath_docker which allows devs to override stuff
 PYTHONPATH=/app/pythonpath:/app/docker/pythonpath_dev
 REDIS_HOST=redis
 REDIS_PORT=6379
 FLASK_ENV=production
 SUPERSET_ENV=production
 SUPERSET_LOAD_EXAMPLES=yes
 CYPRESS_CONFIG=false
 SUPERSET_PORT=8088
--- a/docker/docker-compose.prod.yml
+++ b/docker/docker-compose.prod.yml
@@ -1,348 +0,0 @@
 version: "2.2"
 # volumes:
 #   metadata_data: {}
 #   middle_var: {}
 #   historical_var: {}
 #   broker_var: {}
 #   coordinator_var: {}
 #   router_var: {}
 #   druid_shared: {}
 x-superset-image: &superset-image apache/superset:${TAG:-latest-dev}
 x-superset-depends-on: &superset-depends-on
  - db
  - redis_superset
 x-superset-volumes: &superset-volumes
  # /app/pythonpath_docker will be appended to the PYTHONPATH in the final container
  - ./docker/superset:/app/docker
  - superset_home:/app/superset_home
 services:
  app:
    image: pathogen/monolith:latest
    container_name: monolith
    build: ./docker
    volumes:
      - ${PORTAINER_GIT_DIR}:/code
    env_file:
      - ../stack.env
    volumes_from:
      - tmp
    depends_on:
      broker:
        condition: service_started
      kafka:
        condition: service_healthy
      tmp:
        condition: service_started
      redis:
        condition: service_healthy
    #   - db
  threshold:
    image: pathogen/threshold:latest
    container_name: threshold
    build: ./legacy/docker
    volumes:
      - ${PORTAINER_GIT_DIR}:/code
      - ${THRESHOLD_CONFIG_DIR}:/code/legacy/conf/live
      #- ${THRESHOLD_TEMPLATE_DIR}:/code/conf/templates
      - ${THRESHOLD_CERT_DIR}:/code/legacy/conf/cert
    ports:
      - "${THRESHOLD_LISTENER_PORT}:${THRESHOLD_LISTENER_PORT}"
      - "${THRESHOLD_RELAY_PORT}:${THRESHOLD_RELAY_PORT}"
      - "${THRESHOLD_API_PORT}:${THRESHOLD_API_PORT}"
    env_file:
      - ../stack.env
    volumes_from:
        - tmp
    depends_on:
      tmp:
        condition: service_started
      redis:
        condition: service_healthy
  # db:
    #image: pathogen/manticore:kibana
    # image: manticoresearch/manticore:latest
    #build: 
    #  context: ./docker/manticore
    #  args:
    #    DEV: 1
    # restart: always
  # turnilo:
  #   container_name: turnilo
  #   image: uchhatre/turnilo:latest
  #   ports:
  #     - 9093:9090
  #   environment:
  #     - DRUID_BROKER_URL=http://broker:8082
  #   depends_on:
  #     - broker
  # metabase:
  #   container_name: metabase
  #   image: metabase/metabase:latest
  #   ports:
  #     - 3096:3000
  #   environment:
  #     JAVA_OPTS: -Xmx1g
  #     MB_DB_TYPE: postgres
  #     MB_DB_DBNAME: metabase
  #     MB_DB_PORT: 5432
  #     MB_DB_USER: druid
  #     MB_DB_PASS: FoolishPassword
  #     MB_DB_HOST: postgres
  #   depends_on:
  #     - broker
  redis_superset:
    image: redis:latest
    container_name: superset_cache
    restart: unless-stopped
    volumes:
      - redis:/data
  db:
    env_file: .env-non-dev
    image: postgres:10
    container_name: superset_db
    restart: unless-stopped
    volumes:
      - db_home:/var/lib/postgresql/data
  superset:
    env_file: .env-non-dev
    image: *superset-image
    container_name: superset_app
    command: ["/app/docker/docker-bootstrap.sh", "app-gunicorn"]
    user: "root"
    restart: unless-stopped
    ports:
      - 8088:8088
    depends_on: *superset-depends-on
    volumes: *superset-volumes
  superset-init:
    image: *superset-image
    container_name: superset_init
    command: ["/app/docker/docker-init.sh"]
    env_file: .env-non-dev
    depends_on: *superset-depends-on
    user: "root"
    volumes: *superset-volumes
  superset-worker:
    image: *superset-image
    container_name: superset_worker
    command: ["/app/docker/docker-bootstrap.sh", "worker"]
    env_file: .env-non-dev
    restart: unless-stopped
    depends_on: *superset-depends-on
    user: "root"
    volumes: *superset-volumes
  superset-worker-beat:
    image: *superset-image
    container_name: superset_worker_beat
    command: ["/app/docker/docker-bootstrap.sh", "beat"]
    env_file: .env-non-dev
    restart: unless-stopped
    depends_on: *superset-depends-on
    user: "root"
    volumes: *superset-volumes
  postgres:
    container_name: postgres
    image: postgres:latest
    volumes:
      - /block/store/metadata_data:/var/lib/postgresql/data
    environment:
      - POSTGRES_PASSWORD=FoolishPassword
      - POSTGRES_USER=druid
      - POSTGRES_DB=druid
  # Need 3.5 or later for container nodes
  zookeeper:
    container_name: zookeeper
    image: zookeeper:3.5
    ports:
      - "2181:2181"
    environment:
      - ZOO_MY_ID=1
  kafka:
    image: wurstmeister/kafka:latest
    container_name: kafka
    depends_on:
      - zookeeper
    ports:
      - 9092:9092
      - 29092:29092
    environment:
      KAFKA_BROKER_ID: 1
      KAFKA_ZOOKEEPER_CONNECT: zookeeper:2181
      KAFKA_ADVERTISED_LISTENERS: PLAINTEXT://kafka:9092,PLAINTEXT_HOST://localhost:29092
      KAFKA_LISTENERS: PLAINTEXT://kafka:9092,PLAINTEXT_HOST://localhost:29092
      KAFKA_LISTENER_SECURITY_PROTOCOL_MAP: PLAINTEXT:PLAINTEXT,PLAINTEXT_HOST:PLAINTEXT
      KAFKA_INTER_BROKER_LISTENER_NAME: PLAINTEXT
      KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR: 1
      KAFKA_AUTO_CREATE_TOPICS_ENABLE: 'true'
      KAFKA_MESSAGE_MAX_BYTES: 2000000
      #KAFKA_HEAP_OPTS: -Xmx2g
    healthcheck:
      test: ["CMD", "kafka-topics.sh", "--list", "--bootstrap-server", "kafka:9092"]
      start_period: 15s
      interval: 30s
      timeout: 30s
      retries: 45
  coordinator:
    image: apache/druid:0.23.0
    container_name: coordinator
    volumes:
      - /block/store/druid_shared:/opt/shared
      - /block/store/coordinator_var:/opt/druid/var
    depends_on: 
      - zookeeper
      - postgres
    ports:
      - "8081:8081"
    command:
      - coordinator
    env_file:
      - environment
  broker:
    image: apache/druid:0.23.0
    container_name: broker
    volumes:
      - /block/store/broker_var:/opt/druid/var
    depends_on: 
      - zookeeper
      - postgres
      - coordinator
    ports:
      - "8082:8082"
    command:
      - broker
    env_file:
      - environment
  historical:
    image: apache/druid:0.23.0
    container_name: historical
    volumes:
      - /block/store/druid_shared:/opt/shared
      - /block/store/historical_var:/opt/druid/var
    depends_on: 
      - zookeeper
      - postgres
      - coordinator
    ports:
      - "8083:8083"
    command:
      - historical
    env_file:
      - environment
  middlemanager:
    image: apache/druid:0.23.0
    container_name: middlemanager
    volumes:
      - /block/store/druid_shared:/opt/shared
      - /block/store/middle_var:/opt/druid/var
    depends_on: 
      - zookeeper
      - postgres
      - coordinator
    ports:
      - "8091:8091"
      - "8100-8105:8100-8105"
    command:
      - middleManager
    env_file:
      - environment
  router:
    image: apache/druid:0.23.0
    container_name: router
    volumes:
      - /block/store/router_var:/opt/druid/var
    depends_on:
      - zookeeper
      - postgres
      - coordinator
    ports:
      - "8888:8888"
    command:
      - router
    env_file:
      - environment
  # db:
  #   #image: pathogen/manticore:kibana
  #   image: manticoresearch/manticore:dev
  #   #build: 
  #   #  context: ./docker/manticore
  #   #  args:
  #   #    DEV: 1
  #   restart: always
  #   ports:
  #     - 9308
  #     - 9312
  #     - 9306
  #   ulimits:
  #     nproc: 65535
  #     nofile:
  #        soft: 65535
  #        hard: 65535
  #     memlock:
  #       soft: -1
  #       hard: -1
  #   environment:
  #     - MCL=1
  #   volumes:
  #     - ./docker/data:/var/lib/manticore
  #     - ./docker/manticore.conf:/etc/manticoresearch/manticore.conf
  tmp:
    image: busybox
    command: chmod -R 777 /var/run/redis
    volumes:
        - /var/run/redis
  redis:
    image: redis
    command: redis-server /etc/redis.conf
    ulimits:
      nproc: 65535
      nofile:
         soft: 65535
         hard: 65535
    volumes:
      - ${PORTAINER_GIT_DIR}/docker/redis.conf:/etc/redis.conf
      - redis_data:/data
    volumes_from:
      - tmp
    healthcheck:
      test: "redis-cli -s /var/run/redis/redis.sock ping"
      interval: 2s
      timeout: 2s
      retries: 15
 networks:
   default:
      external:
         name: pathogen
 volumes:
  redis_data: {}
  superset_home:
    external: false
  db_home:
    external: false
  redis:
    external: false
--- a/docker/environment
+++ b/docker/environment
@@ -1,87 +0,0 @@
 #
 # Licensed to the Apache Software Foundation (ASF) under one
 # or more contributor license agreements.  See the NOTICE file
 # distributed with this work for additional information
 # regarding copyright ownership.  The ASF licenses this file
 # to you under the Apache License, Version 2.0 (the
 # "License"); you may not use this file except in compliance
 # with the License.  You may obtain a copy of the License at
 #
 #   http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing,
 # software distributed under the License is distributed on an
 # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 # KIND, either express or implied.  See the License for the
 # specific language governing permissions and limitations
 # under the License.
 #
 # Java tuning
 #DRUID_XMX=1g
 #DRUID_XMS=1g
 #DRUID_MAXNEWSIZE=250m
 #DRUID_NEWSIZE=250m
 #DRUID_MAXDIRECTMEMORYSIZE=1g
 #druid_emitter_logging_logLevel=debug
 #druid_extensions_loadList=["druid-histogram", "druid-datasketches", "druid-lookups-cached-global", "postgresql-metadata-storage", "druid-kafka-indexing-service"]
 #druid_zk_service_host=zookeeper
 #druid_metadata_storage_host=
 #druid_metadata_storage_type=postgresql
 #druid_metadata_storage_connector_connectURI=jdbc:postgresql://postgres:5432/druid
 #druid_metadata_storage_connector_user=druid
 #druid_metadata_storage_connector_password=FoolishPassword
 #druid_coordinator_balancer_strategy=cachingCost
 #druid_indexer_runner_javaOptsArray=["-server", "-Xmx1g", "-Xms1g", "-XX:MaxDirectMemorySize=3g", "-Duser.timezone=UTC", "-Dfile.encoding=UTF-8", "-Djava.util.logging.manager=org.apache.logging.log4j.jul.LogManager"]
 #druid_indexer_fork_property_druid_processing_buffer_sizeBytes=128MiB
 #druid_processing_buffer_sizeBytes=268435456 # 256MiB
 #druid_storage_type=local
 #druid_storage_storageDirectory=/opt/shared/segments
 #druid_indexer_logs_type=file
 #druid_indexer_logs_directory=/opt/shared/indexing-logs
 #druid_processing_numThreads=1
 #druid_processing_numMergeBuffers=1
 #DRUID_LOG4J=<?xml version="1.0" encoding="UTF-8" ?><Configuration status="WARN"><Appenders><Console name="Console" target="SYSTEM_OUT"><PatternLayout pattern="%d{ISO8601} %p [%t] %c - %m%n"/></Console></Appenders><Loggers><Root level="info"><AppenderRef ref="Console"/></Root><Logger name="org.apache.druid.jetty.RequestLog" additivity="false" level="DEBUG"><AppenderRef ref="Console"/></Logger></Loggers></Configuration>
 # Java tuning
 #DRUID_XMX=1g
 #DRUID_XMS=1g
 #DRUID_MAXNEWSIZE=250m
 #DRUID_NEWSIZE=250m
 #DRUID_MAXDIRECTMEMORYSIZE=6172m
 DRUID_SINGLE_NODE_CONF=nano-quickstart
 druid_emitter_logging_logLevel=debug
 druid_extensions_loadList=["druid-histogram", "druid-datasketches", "druid-lookups-cached-global", "postgresql-metadata-storage", "druid-kafka-indexing-service"]
 druid_zk_service_host=zookeeper
 druid_metadata_storage_host=
 druid_metadata_storage_type=postgresql
 druid_metadata_storage_connector_connectURI=jdbc:postgresql://postgres:5432/druid
 druid_metadata_storage_connector_user=druid
 druid_metadata_storage_connector_password=FoolishPassword
 druid_coordinator_balancer_strategy=cachingCost
 druid_indexer_runner_javaOptsArray=["-server", "-Xmx1g", "-Xms1g", "-XX:MaxDirectMemorySize=3g", "-Duser.timezone=UTC", "-Dfile.encoding=UTF-8", "-Djava.util.logging.manager=org.apache.logging.log4j.jul.LogManager"]
 druid_indexer_fork_property_druid_processing_buffer_sizeBytes=256MiB
 druid_storage_type=local
 druid_storage_storageDirectory=/opt/shared/segments
 druid_indexer_logs_type=file
 druid_indexer_logs_directory=/opt/shared/indexing-logs
 druid_processing_numThreads=2
 druid_processing_numMergeBuffers=2
 DRUID_LOG4J=<?xml version="1.0" encoding="UTF-8" ?><Configuration status="WARN"><Appenders><Console name="Console" target="SYSTEM_OUT"><PatternLayout pattern="%d{ISO8601} %p [%t] %c - %m%n"/></Console></Appenders><Loggers><Root level="info"><AppenderRef ref="Console"/></Root><Logger name="org.apache.druid.jetty.RequestLog" additivity="false" level="DEBUG"><AppenderRef ref="Console"/></Logger></Loggers></Configuration>
--- a/docker/manticore.conf
+++ b/docker/manticore.conf
@@ -1,265 +0,0 @@
 #!/bin/sh
 ip=`hostname -i|rev|cut -d\  -f 1|rev`
 cat << EOF
 searchd {
    # https://manual.manticoresearch.com/Server_settings/Searchd#access_plain_attrs
    # access_plain_attrs = mmap_preread
    # https://manual.manticoresearch.com/Server_settings/Searchd#access_blob_attrs
    # access_blob_attrs = mmap_preread
    # https://manual.manticoresearch.com/Server_settings/Searchd#access_doclists
    # access_doclists = file
    # https://manual.manticoresearch.com/Server_settings/Searchd#access_hitlists
    # access_hitlists = file
    # https://manual.manticoresearch.com/Server_settings/Searchd#agent_connect_timeout
    # agent_connect_timeout =
    # https://manual.manticoresearch.com/Server_settings/Searchd#agent_query_timeout
    # agent_query_timeout =
    # https://manual.manticoresearch.com/Server_settings/Searchd#agent_retry_count
    # agent_retry_count = 0
    # https://manual.manticoresearch.com/Server_settings/Searchd#agent_retry_delay
    # agent_retry_delay = 500
    # https://manual.manticoresearch.com/Server_settings/Searchd#attr_flush_period
    # attr_flush_period = 0
    # https://manual.manticoresearch.com/Server_settings/Searchd#binlog_flush
    # binlog_flush = 2
    # https://manual.manticoresearch.com/Server_settings/Searchd#binlog_max_log_size
    # binlog_max_log_size = 268435456
    # https://manual.manticoresearch.com/Server_settings/Searchd#binlog_path
    # binlog_path =
    # https://manual.manticoresearch.com/Server_settings/Searchd#client_timeout
    # client_timeout = 300
    # https://manual.manticoresearch.com/Server_settings/Searchd#collation_libc_locale
    # collation_libc_locale = C
    # https://manual.manticoresearch.com/Server_settings/Searchd#collation_server
    # collation_server = libc_ci
    # https://manual.manticoresearch.com/Server_settings/Searchd#data_dir
    data_dir = /var/lib/manticore
    # https://manual.manticoresearch.com/Server_settings/Searchd#docstore_cache_size
    # docstore_cache_size = 16m
    # https://manual.manticoresearch.com/Server_settings/Searchd#expansion_limit
    # expansion_limit = 0
    # https://manual.manticoresearch.com/Server_settings/Searchd#grouping_in_utc
    # grouping_in_utc = 0
    # https://manual.manticoresearch.com/Server_settings/Searchd#ha_period_karma
    # ha_period_karma = 60
    # https://manual.manticoresearch.com/Server_settings/Searchd#ha_ping_interval
    # ha_ping_interval = 1000
    # https://manual.manticoresearch.com/Server_settings/Searchd#hostname_lookup
    # hostname_lookup =
    # https://manual.manticoresearch.com/Server_settings/Searchd#jobs_queue_size
    # jobs_queue_size =
    # https://manual.manticoresearch.com/Server_settings/Searchd#listen_backlog
    # listen_backlog = 5
    # https://manual.manticoresearch.com/Server_settings/Searchd#listen
    # listen_env = this directive allows to append listeners from environment variables
    listen = 9306:mysql41
    listen = /var/run/mysqld/mysqld.sock:mysql41
    listen = $ip:9312
    listen = 9308:http
    listen = $ip:9315-9325:replication
    # https://manual.manticoresearch.com/Server_settings/Searchd#listen_tfo
    # listen_tfo = 0
    # https://manual.manticoresearch.com/Server_settings/Searchd#log
    log = /var/log/manticore/searchd.log
    # https://manual.manticoresearch.com/Server_settings/Searchd#max_batch_queries
    # max_batch_queries = 32
    # https://manual.manticoresearch.com/Server_settings/Searchd#threads
    # threads =
    # https://manual.manticoresearch.com/Server_settings/Searchd#max_filters
    # max_filters = 256
    # https://manual.manticoresearch.com/Server_settings/Searchd#max_filter_values
    # max_filter_values = 4096
    # https://manual.manticoresearch.com/Server_settings/Searchd#max_open_files
    # max_open_files = max
    # https://manual.manticoresearch.com/Server_settings/Searchd#max_packet_size
    max_packet_size = 128M
    # https://manual.manticoresearch.com/Server_settings/Searchd#mysql_version_string
    # mysql_version_string =
    # https://manual.manticoresearch.com/Server_settings/Searchd#net_workers
    # net_workers = 1
    # https://manual.manticoresearch.com/Server_settings/Searchd#net_wait_tm
    # net_wait_tm = -1
    # https://manual.manticoresearch.com/Server_settings/Searchd#net_throttle_accept
    # net_throttle_accept = 0
    # https://manual.manticoresearch.com/Server_settings/Searchd#net_throttle_action
    # net_throttle_action = 0
    # https://manual.manticoresearch.com/Server_settings/Searchd#node_address
    # node_address =
    # https://manual.manticoresearch.com/Server_settings/Searchd#ondisk_attrs_default
    # ondisk_attrs_default = 0
    # https://manual.manticoresearch.com/Server_settings/Searchd#persistent_connections_limit
    # persistent_connections_limit =
    # https://manual.manticoresearch.com/Server_settings/Searchd#pid_file
    pid_file = /var/run/manticore/searchd.pid
    # https://manual.manticoresearch.com/Server_settings/Searchd#predicted_time_costs
    # predicted_time_costs = doc=64, hit=48, skip=2048, match=64
    # https://manual.manticoresearch.com/Server_settings/Searchd#preopen_indexes
    # preopen_indexes = 1
    # https://manual.manticoresearch.com/Server_settings/Searchd#qcache_max_bytes
    qcache_max_bytes = 128Mb
    # https://manual.manticoresearch.com/Server_settings/Searchd#qcache_thresh_msec
    qcache_thresh_msec = 150
    # https://manual.manticoresearch.com/Server_settings/Searchd#qcache_ttl_sec
    qcache_ttl_sec = 120
    # https://manual.manticoresearch.com/Server_settings/Searchd#query_log_format
    query_log_format = sphinxql
    # https://manual.manticoresearch.com/Server_settings/Searchd#query_log_min_msec
    # query_log_min_msec = 0
    # https://manual.manticoresearch.com/Server_settings/Searchd#query_log
    # query_log = /var/log/manticore/query.log
    # https://manual.manticoresearch.com/Server_settings/Searchd#query_log_mode
    # query_log_mode = 600
    # https://manual.manticoresearch.com/Server_settings/Searchd#max_connections
    # max_connections =
    # https://manual.manticoresearch.com/Server_settings/Searchd#network_timeout
    # network_timeout = 5
    # https://manual.manticoresearch.com/Server_settings/Searchd#read_buffer
    # read_buffer = 256K
    # https://manual.manticoresearch.com/Server_settings/Searchd#read_buffer_docs
    # read_buffer_docs = 256K
    # https://manual.manticoresearch.com/Server_settings/Searchd#read_buffer_hits
    # read_buffer_hits = 256K
    # https://manual.manticoresearch.com/Server_settings/Searchd#read_unhinted
    # read_unhinted 32K
    # https://manual.manticoresearch.com/Server_settings/Searchd#rt_flush_period
    # rt_flush_period =
    # https://manual.manticoresearch.com/Server_settings/Searchd#rt_merge_iops
    # rt_merge_iops = 0
    # https://manual.manticoresearch.com/Server_settings/Searchd#rt_merge_maxiosize
    # rt_merge_maxiosize = 0
    # https://manual.manticoresearch.com/Server_settings/Searchd#seamless_rotate
    # seamless_rotate = 1
    # https://manual.manticoresearch.com/Server_settings/Searchd#server_id
    # server_id =
    # https://manual.manticoresearch.com/Server_settings/Searchd#shutdown_timeout
    # shutdown_timeout = 3
    # https://manual.manticoresearch.com/Server_settings/Searchd#shutdown_token
    # shutdown_token =
    # https://manual.manticoresearch.com/Server_settings/Searchd#snippets_file_prefix
    # snippets_file_prefix =
    # https://manual.manticoresearch.com/Server_settings/Searchd#sphinxql_state
    # sphinxql_state =
    # https://manual.manticoresearch.com/Server_settings/Searchd#sphinxql_timeout
    # sphinxql_timeout = 900
    # https://manual.manticoresearch.com/Server_settings/Searchd#ssl_ca
    # ssl_ca =
    # https://manual.manticoresearch.com/Server_settings/Searchd#ssl_cert
    # ssl_cert =
    # https://manual.manticoresearch.com/Server_settings/Searchd#ssl_key
    # ssl_key =
    # https://manual.manticoresearch.com/Server_settings/Searchd#subtree_docs_cache
    # subtree_docs_cache = 0
    # https://manual.manticoresearch.com/Server_settings/Searchd#subtree_hits_cache
    # subtree_hits_cache = 0
    # https://manual.manticoresearch.com/Server_settings/Searchd#thread_stack
    # thread_stack =
    # https://manual.manticoresearch.com/Server_settings/Searchd#unlink_old
    # unlink_old = 1
    # https://manual.manticoresearch.com/Server_settings/Searchd#watchdog
    # watchdog = 1
 }
 common {
    # https://manual.manticoresearch.com/Server_settings/Common#lemmatizer_base
    # lemmatizer_base = /usr/local/share
    # https://manual.manticoresearch.com/Server_settings/Common#progressive_merge
    # progressive_merge =
    # https://manual.manticoresearch.com/Server_settings/Common#json_autoconv_keynames
    # json_autoconv_keynames =
    # https://manual.manticoresearch.com/Server_settings/Common#json_autoconv_numbers
    # json_autoconv_numbers = 0
    # https://manual.manticoresearch.com/Server_settings/Common#on_json_attr_error
    # on_json_attr_error = ignore_attr
    # https://manual.manticoresearch.com/Server_settings/Common#plugin_dir
    # plugin_dir =
 }
 # indexer {
 #     lemmatizer_cache = 1024M
 #     max_iops = 0
 #     max_iosize = 0
 #     mem_limit = 1024M
 # }
 EOF
--- a/docker/manticore/.mysql_history
+++ b/docker/manticore/.mysql_history
@@ -1,12 +0,0 @@
 _HiStOrY_V2_
 SELECT * FROM films WHERE MATCH('"shark monkey boy robot"/2') AND release_year IN(2006,2007) AND rental_rate BETWEEN 2.0 and 3.0;
 SELECT title, HIGHLIGHT({},'description') FROM films WHERE MATCH('"shark monkey boy robot"/2');
 SELECT * FROM films WHERE MATCH('" shark  monkey  boy robot "/2');
 SELECT * FROM films WHERE MATCH('Emotional drama') FACET release_year FACET category_id;
 SELECT * FROM films WHERE MATCH('Emotional drama') GROUP BY release_year;
 SELECT * FROM films WHERE MATCH('Emotional drama -dog -shark');
 SELECT * FROM films WHERE MATCH('Emotional drama');
 SELECT * FROM films;
 DESCRIBE films;
 SHOW TABLES;
 SOURCE /sandbox.sql
--- a/docker/manticore/Dockerfile
+++ b/docker/manticore/Dockerfile
@@ -1,76 +0,0 @@
 FROM ubuntu:focal
 ARG DEV
 ARG DAEMON_URL
 ARG MCL_URL
 RUN groupadd -r manticore && useradd -r -g manticore manticore
 ENV GOSU_VERSION 1.11
 ENV MCL_URL=${MCL_URL:-"https://repo.manticoresearch.com/repository/manticoresearch_focal/dists/focal/main/binary-amd64/manticore-columnar-lib_1.15.4-220522-2fef34e_amd64.deb"}
 ENV DAEMON_URL=${DAEMON_URL:-"https://repo.manticoresearch.com/repository/manticoresearch_focal/dists/manticore_5.0.2-220530-348514c86_amd64.tgz"}
 ENV BETA_URL=${BETA_URL:-"https://repo.manticoresearch.com/repository/kibana_beta/ubuntu/focal.zip"}
 RUN set -x \
    && apt-get update && apt-get -y install --no-install-recommends ca-certificates binutils wget gnupg dirmngr unzip && rm -rf /var/lib/apt/lists/* \
    && wget -O /usr/local/bin/gosu "https://github.com/tianon/gosu/releases/download/$GOSU_VERSION/gosu-$(dpkg --print-architecture)" \
    && wget -O /usr/local/bin/gosu.asc "https://github.com/tianon/gosu/releases/download/$GOSU_VERSION/gosu-$(dpkg --print-architecture).asc" \
    && export GNUPGHOME="$(mktemp -d)" \
    && gpg --batch --keyserver hkps://keys.openpgp.org --recv-keys B42F6819007F00F88E364FD4036A9C25BF357DD4 \
    && gpg --batch --verify /usr/local/bin/gosu.asc /usr/local/bin/gosu \
    && { command -v gpgconf > /dev/null && gpgconf --kill all || :; } \
    && rm -rf "$GNUPGHOME" /usr/local/bin/gosu.asc \
    && chmod +x /usr/local/bin/gosu \
    && gosu nobody true && \
    if [ "${DEV}" = "1" ]; then \
    echo "DEV IS ONE" && \
     exit && \
     wget https://repo.manticoresearch.com/manticore-dev-repo.noarch.deb \
     && dpkg -i manticore-dev-repo.noarch.deb \
     && apt-key adv --fetch-keys 'https://repo.manticoresearch.com/GPG-KEY-manticore' && apt-get -y update && apt-get -y install manticore \
     && apt-get update  \
     && echo $(apt-get -y download --print-uris manticore-columnar-lib | cut -d" " -f1 | cut -d "'" -f 2) > /mcl.url ;\
    elif [ "${DEV}" = "2" ]; then \
      echo "DEV IS TWO" && \
      wget $BETA_URL && unzip focal.zip && rm focal.zip && \
      dpkg -i build/* && echo $MCL_URL > /mcl.url; rm build/* ;\
    else \
      echo "DEV NOT EITHER" && \
      exit && \
      wget $DAEMON_URL && ARCHIVE_NAME=$(ls | grep '.tgz' | head -n1 ) && tar -xf $ARCHIVE_NAME && rm $ARCHIVE_NAME && \
      dpkg -i manticore* && echo $MCL_URL > /mcl.url && rm *.deb ; \
    fi \
    && mkdir -p /var/run/manticore && mkdir -p /var/lib/manticore/replication \
    && apt-get update && apt-get -y install  libexpat1 libodbc1 libpq5 openssl libcrypto++6 libmysqlclient21 mysql-client \
    && apt-get -y purge --auto-remove \
    && rm -rf /var/lib/apt/lists/* \
    && rm -f /usr/bin/mariabackup /usr/bin/mysqldump /usr/bin/mysqlslap /usr/bin/mysqladmin /usr/bin/mysqlimport  \
    /usr/bin/mysqlshow /usr/bin/mbstream /usr/bin/mysql_waitpid /usr/bin/innotop /usr/bin/mysqlaccess /usr/bin/mytop  \
    /usr/bin/mysqlreport /usr/bin/mysqldumpslow /usr/bin/mysql_find_rows /usr/bin/mysql_fix_extensions  \
    /usr/bin/mysql_embedded /usr/bin/mysqlcheck \
    && rm -f /usr/bin/spelldump /usr/bin/wordbreaker \
    && mkdir -p /var/run/mysqld/ && chown manticore:manticore /var/run/mysqld/ \
    && echo "\n[mysql]\nsilent\nwait\ntable\n" >> /etc/mysql/my.cnf && \
    wget -P /tmp https://repo.manticoresearch.com/repository/morphology/en.pak.tgz && \
    wget -P /tmp https://repo.manticoresearch.com/repository/morphology/de.pak.tgz && \
    wget -P /tmp https://repo.manticoresearch.com/repository/morphology/ru.pak.tgz && \
    tar -xf /tmp/en.pak.tgz -C /usr/share/manticore/ &&  \
    tar -xf /tmp/de.pak.tgz -C /usr/share/manticore/ &&  \
    tar -xf /tmp/ru.pak.tgz -C /usr/share/manticore/
 COPY manticore.conf /etc/manticoresearch/
 COPY sandbox.sql /sandbox.sql
 COPY .mysql_history /root/.mysql_history
 COPY docker-entrypoint.sh /usr/local/bin/
 RUN ln -s usr/local/bin/docker-entrypoint.sh /entrypoint.sh
 WORKDIR /var/lib/manticore
 ENTRYPOINT ["docker-entrypoint.sh"]
 EXPOSE 9306
 EXPOSE 9308
 EXPOSE 9312
 ENV LANG C.UTF-8
 ENV LC_ALL C.UTF-8
 CMD ["sh", "-c", "(echo 'START WAIT' && sleep 5 && echo 'END WAIT' && mysql -P9306 -h0 -e 'set global log_management = 0; set global log_management = 1;') & searchd --nodetach"]
--- a/docker/manticore/README.md
+++ b/docker/manticore/README.md
@@ -1,278 +0,0 @@
 # Manticore Search Docker image
 This is the git repo of official [Docker image](https://hub.docker.com/r/manticoresearch/manticore/) for [Manticore Search](https://github.com/manticoresoftware/manticoresearch).
 Manticore Search is an easy to use open source fast database for search. It helps thousands of companies from small to large, such as Craigslist, to search and filter petabytes of text data on a single or hundreds of nodes, do stream full-text filtering, add auto-complete, spell correction, more-like-this, faceting and other search-related technologies to their websites and applications.
 The default configuration includes a sample Real-Time index and listens on the default ports:
  * `9306` for connections from a MySQL client
  * `9308` for connections via HTTP
  * `9312` for connections via a binary protocol (e.g. in case you run a cluster)
 The image comes with libraries for easy indexing data from MySQL, PostgreSQL XML and CSV files.
 # How to run Manticore Search Docker image
 ## Quick usage
 The below is the simplest way to start Manticore in a container and log in to it via mysql client:
 ```bash
 docker run --name manticore --rm -d manticoresearch/manticore && sleep 3 && docker exec -it manticore mysql && docker stop manticore
 ```
 When you exit from the mysql client it stops and removes the container, so **use it only for testing / sandboxing purposes**. See below how to use it in production.
 The image comes with a sample index which can be loaded like this:
 ```mysql
 mysql> source /sandbox.sql
 ```
 Also the mysql client has in history several sample queries that you can run on the above index, just use Up/Down keys in the client to see and run them.
 ## Production use
 ### Ports and mounting points
 For data persistence `/var/lib/manticore/` should be mounted to local storage or other desired storage engine.
 ```bash
 docker run --name manticore -v $(pwd)/data:/var/lib/manticore -p 127.0.0.1:9306:9306 -p 127.0.0.1:9308:9308 -d manticoresearch/manticore
 ```
 Configuration file inside the instance is located at `/etc/manticoresearch/manticore.conf`. For custom settings, this file should be mounted to your own configuration file.
 The ports are 9306/9308/9312 for SQL/HTTP/Binary, expose them depending on how you are going to use Manticore. For example:
 ```bash
 docker run --name manticore -v $(pwd)/manticore.conf:/etc/manticoresearch/manticore.conf -v $(pwd)/data:/var/lib/manticore/ -p 127.0.0.1:9306:9306 -p 127.0.0.1:9308:9308 -d manticoresearch/manticore
 ```
 Make sure to remove `127.0.0.1:` if you want the ports to be available for external hosts.
 ### Manticore Columnar Library
 The docker image doesn't include [Manticore Columnar Library](https://github.com/manticoresoftware/columnar) which has to be used if you need:
 * columnar storage
 * secondary indexes
 but you can easily enable it in runtime by using environment variable `MCL=1`, i.e. `docker run -e MCL=1 ... manticoresearch/manticore`. It will then download and install the library and put it to the data dir (which is normally mapped as a volume in production). Next time you run the container the library will be already there, hence it won't be downloaded again unless you change the Manticore Search version.
 ### Docker-compose
 In many cases you might want to use Manticore together with other images specified in a docker-compose YAML file. Here is the minimal recommended specification for Manticore Search in docker-compose.yml:
 ```yaml
 version: '2.2'
 services:
  manticore:
    container_name: manticore
    image: manticoresearch/manticore
    restart: always
    ports:
      - 127.0.0.1:9306:9306
      - 127.0.0.1:9308:9308
    ulimits:
      nproc: 65535
      nofile:
         soft: 65535
         hard: 65535
      memlock:
        soft: -1
        hard: -1
    environment:
      - MCL=1
    volumes:
      - ./data:/var/lib/manticore
 #      - ./manticore.conf:/etc/manticoresearch/manticore.conf # uncommment if you use a custom config
 ```
 Besides using the exposed ports 9306 and 9308 you can log into the instance by running `docker-compose exec manticore mysql`.
 ### HTTP protocol
 Manticore is accessible via HTTP on ports 9308 and 9312. You can map either of them locally and connect with curl:
 ```bash
 docker run --name manticore -p 9308:9308 -d manticoresearch/manticore
 ```
 Create a table:
 ```bash
 curl -X POST 'http://127.0.0.1:9308/sql' -d 'mode=raw&query=CREATE TABLE testrt ( title text, content text, gid integer)'
 ```
 Insert a document:
 ```bash
 curl -X POST 'http://127.0.0.1:9308/json/insert' -d'{"index":"testrt","id":1,"doc":{"title":"Hello","content":"world","gid":1}}'
 ```
 Perform a simple search:
 ```bash
 curl -X POST 'http://127.0.0.1:9308/json/search' -d '{"index":"testrt","query":{"match":{"*":"hello world"}}}'
 ```
 ### Logging
 By default, Manticore logs to `/dev/stdout`, so you can watch the log on the host with:
 ```bash
 docker logs manticore
 ```
 If you want to get log of your queries the same way you can do it by passing environment variable `QUERY_LOG_TO_STDOUT=true`.
 ### Multi-node cluster with replication
 Here is a simple `docker-compose.yml` for defining a two node cluster:
 ```yaml
 version: '2.2'
 services:
  manticore-1:
    image: manticoresearch/manticore
    restart: always
    ulimits:
      nproc: 65535
      nofile:
         soft: 65535
         hard: 65535
      memlock:
        soft: -1
        hard: -1
    environment:
      - MCL=1
    networks:
      - manticore
  manticore-2:
    image: manticoresearch/manticore
    restart: always
    ulimits:
      nproc: 65535
      nofile:
        soft: 65535
        hard: 65535
      memlock:
        soft: -1
        hard: -1
    environment:
      - MCL=1
    networks:
      - manticore
 networks:
  manticore:
    driver: bridge
 ```
 * Start it: `docker-compose up`
 * Create a cluster with a table:
  ```mysql
  $ docker-compose exec manticore-1 mysql
  mysql> CREATE TABLE testrt ( title text, content text, gid integer);
  mysql> CREATE CLUSTER posts;
  Query OK, 0 rows affected (0.24 sec)
  mysql> ALTER CLUSTER posts ADD testrt;
  Query OK, 0 rows affected (0.07 sec)
  MySQL [(none)]> exit
  Bye
  ```
 * Join to the the cluster on the 2nd instance and insert smth to the table:
  ```mysql
  $ docker-compose exec manticore-2 mysql
  mysql> JOIN CLUSTER posts AT 'manticore-1:9312';
  mysql> INSERT INTO posts:testrt(title,content,gid)  VALUES('hello','world',1);
  Query OK, 1 row affected (0.00 sec)
  MySQL [(none)]> exit
  Bye
  ```
 * If you now go back to the first instance you'll see the new record:
  ```mysql
  $ docker-compose exec manticore-1 mysql
  MySQL [(none)]> select * from testrt;
  +---------------------+------+-------+---------+
  | id                  | gid  | title | content |
  +---------------------+------+-------+---------+
  | 3891565839006040065 |    1 | hello | world   |
  +---------------------+------+-------+---------+
  1 row in set (0.00 sec)
  MySQL [(none)]> exit
  Bye
  ```
 ## Memory locking and limits
 It's recommended to overwrite the default ulimits of docker for the Manticore instance:
 ```bash
 --ulimit nofile=65536:65536
 ```
 For the best performance, Manticore tables' components can be locked into memory. When Manticore is run under Docker, the instance requires additional privileges to allow memory locking. The following options must be added when running the instance:
 ```bash
  --cap-add=IPC_LOCK --ulimit memlock=-1:-1
 ```
 ## Configuring Manticore Search with Docker
 If you want to run Manticore with your custom config containing indexes definition you will need to mount the configuration to the instance:
 ```bash
 docker run --name manticore -v $(pwd)/manticore.conf:/etc/manticoresearch/manticore.conf -v $(pwd)/data/:/var/lib/manticore -p 127.0.0.1:9306:9306 -d manticoresearch/manticore
 ```
 Take into account that Manticore search inside the container is run under user `manticore`. Performing operations with tables (like creating or rotating plain indexes) should be also done under `manticore`. Otherwise the files will be created under `root` and the search daemon won't have rights to open them. For example here is how you can rotate all plain indexes:
 ```bash
 docker exec -it manticore gosu manticore indexer --all --rotate
 ```
 ### Environment variables
 You can also set individual `searchd` and `common` configuration settings using Docker environment variables.  
 The settings must be prefixed with their section name, for example to change value of setting `mysql_version_string` in section `searchd` the variable must be named `searchd_mysql_version_string`:
 ```bash
 docker run --name manticore  -p 127.0.0.1:9306:9306  -e searchd_mysql_version_string='5.5.0' -d manticoresearch/manticore
 ```
 In case of `listen` directive, you can pass using Docker variable `searchd_listen` new listening interfaces in addition to the default ones. Multiple interfaces can be declared separated by semi-colon ("|").
 For listening only on network address, the `$ip` (retrieved internally from `hostname -i`) can be used as address alias.
 For example `-e searchd_listen='9316:http|9307:mysql|$ip:5443:mysql_vip'` will add an additional SQL interface on port 9307, a SQL VIP on 5443 running only on the instance IP  and HTTP on port 9316, beside the defaults on 9306 and 9308, respectively.
 ```bash
 $ docker run --rm -p 1188:9307  -e searchd_mysql_version_string='5.5.0' -e searchd_listen='9316:http|9307:mysql|$ip:5443:mysql_vip'  manticore
 [Mon Aug 17 07:31:58.719 2020] [1] using config file '/etc/manticoresearch/manticore.conf' (9130 chars)...
 listening on all interfaces for http, port=9316
 listening on all interfaces for mysql, port=9307
 listening on 172.17.0.17:5443 for VIP mysql
 listening on all interfaces for mysql, port=9306
 listening on UNIX socket /var/run/mysqld/mysqld.sock
 listening on 172.17.0.17:9312 for sphinx
 listening on all interfaces for http, port=9308
 prereading 0 indexes
 prereaded 0 indexes in 0.000 sec
 accepting connections
 ```
 # Issues
 For reporting issues, please use the [issue tracker](https://github.com/manticoresoftware/docker/issues).
--- a/docker/manticore/docker-entrypoint.sh
+++ b/docker/manticore/docker-entrypoint.sh
@@ -1,118 +0,0 @@
 #!/bin/bash
 set -eo pipefail
 echo "RUNNING ENTRYPOINT"
 # check to see if this file is being run or sourced from another script
 _is_sourced() {
  # https://unix.stackexchange.com/a/215279
  [ "${#FUNCNAME[@]}" -ge 2 ] &&
    [ "${FUNCNAME[0]}" = '_is_sourced' ] &&
    [ "${FUNCNAME[1]}" = 'source' ]
 }
 _searchd_want_help() {
  local arg
  for arg; do
    case "$arg" in
    -'?' | --help | -h | -v)
      return 0
      ;;
    esac
  done
  return 1
 }
 docker_setup_env() {
  if [ -n "$QUERY_LOG_TO_STDOUT" ]; then
    export searchd_query_log=/var/log/manticore/query.log
    [ ! -f /var/log/manticore/query.log ] && ln -sf /dev/stdout /var/log/manticore/query.log
  fi
  if [[ "${MCL}" == "1" ]]; then
      LIB_MANTICORE_COLUMNAR="/var/lib/manticore/.mcl/lib_manticore_columnar.so"
      LIB_MANTICORE_SECONDARY="/var/lib/manticore/.mcl/lib_manticore_secondary.so"
      [ -L /usr/share/manticore/modules/lib_manticore_columnar.so ] || ln -s $LIB_MANTICORE_COLUMNAR /usr/share/manticore/modules/lib_manticore_columnar.so
      [ -L /usr/share/manticore/modules/lib_manticore_secondary.so ] || ln -s $LIB_MANTICORE_SECONDARY /usr/share/manticore/modules/lib_manticore_secondary.so
      searchd -v|grep -i error|egrep "trying to load" \
      && rm $LIB_MANTICORE_COLUMNAR $LIB_MANTICORE_SECONDARY \
      && echo "WARNING: wrong MCL version was removed, installing the correct one"
      if [[ ! -f "$LIB_MANTICORE_COLUMNAR" || ! -f "$LIB_MANTICORE_SECONDARY" ]]; then
        if ! mkdir -p /var/lib/manticore/.mcl/ ; then
          echo "ERROR: Manticore Columnar Library is inaccessible: couldn't create /var/lib/manticore/.mcl/."
          exit
        fi
        MCL_URL=$(cat /mcl.url)
        wget -P /tmp $MCL_URL
        LAST_PATH=$(pwd)
        cd /tmp
        PACKAGE_NAME=$(ls | grep manticore-columnar | head -n 1)
        ar -x $PACKAGE_NAME
        tar -xf data.tar.gz
        find . -name '*.so' -exec cp {} /var/lib/manticore/.mcl/ \;
        cd $LAST_PATH
      fi
  fi
 }
 _main() {
  # first arg is `h` or some `--option`
  if [ "${1#-}" != "$1" ]; then
    set -- searchd "$@"
  fi
  # Amended from searchd to sh since we're using sh to wait until searchd starts, then set the Kibana-specific options
  if [ "$1" = 'sh' ] && ! _searchd_want_help "@"; then
    docker_setup_env "$@"
    # allow the container to be started with `--user`
    if [ "$(id -u)" = '0' ]; then
      find /var/lib/manticore /var/log/manticore /var/run/manticore /etc/manticoresearch \! -user manticore -exec chown manticore '{}' +
      exec gosu manticore "$0" "$@"
    fi
  fi
  _replace_conf_from_env
  exec "$@"
 }
 _replace_conf_from_env() {
  sed_query=""
  while IFS='=' read -r oldname value; do
    if [[ $oldname == 'searchd_'* || $oldname == 'common_'* ]]; then
      value=$(echo ${!oldname} | sed 's/\//\\\//g')
      oldname=$(echo $oldname | sed "s/searchd_//g;s/common_//g;")
      newname=$oldname
      if [[ $newname == 'listen' ]]; then
        oldname="listen_env"
        IFS='|' read -ra ADDR <<<"$value"
        count=0
        for i in "${ADDR[@]}"; do
          if [[ $count == 0 ]]; then
            value=$i
          else
            value="$value\n    listen = $i"
          fi
          count=$((count + 1))
        done
      fi
      if [[ -z $sed_query ]]; then
        sed_query="s/(#\s)*?$oldname\s?=\s?.*?$/$newname = $value/g"
      else
        sed_query="$sed_query;s/(#\s)*?$oldname\s?=\s?.*?$/$newname = $value/g"
      fi
    fi
  done < <(env)
  if [[ ! -z $sed_query ]]; then
    sed -i -E "$sed_query" /etc/manticoresearch/manticore.conf
  fi
 }
 # If we are sourced from elsewhere, don't perform any further actions
 if ! _is_sourced; then
  _main "$@"
 fi
--- a/docker/manticore/manticore.conf
+++ b/docker/manticore/manticore.conf
@@ -1,259 +0,0 @@
 #!/bin/sh
 ip=`hostname -i|rev|cut -d\  -f 1|rev`
 cat << EOF
 searchd {
    # https://manual.manticoresearch.com/Server_settings/Searchd#access_plain_attrs
    # access_plain_attrs = mmap_preread
    # https://manual.manticoresearch.com/Server_settings/Searchd#access_blob_attrs
    # access_blob_attrs = mmap_preread
    # https://manual.manticoresearch.com/Server_settings/Searchd#access_doclists
    # access_doclists = file
    # https://manual.manticoresearch.com/Server_settings/Searchd#access_hitlists
    # access_hitlists = file
    # https://manual.manticoresearch.com/Server_settings/Searchd#agent_connect_timeout
    # agent_connect_timeout =
    # https://manual.manticoresearch.com/Server_settings/Searchd#agent_query_timeout
    # agent_query_timeout =
    # https://manual.manticoresearch.com/Server_settings/Searchd#agent_retry_count
    # agent_retry_count = 0
    # https://manual.manticoresearch.com/Server_settings/Searchd#agent_retry_delay
    # agent_retry_delay = 500
    # https://manual.manticoresearch.com/Server_settings/Searchd#attr_flush_period
    # attr_flush_period = 0
    # https://manual.manticoresearch.com/Server_settings/Searchd#binlog_flush
    # binlog_flush = 2
    # https://manual.manticoresearch.com/Server_settings/Searchd#binlog_max_log_size
    # binlog_max_log_size = 268435456
    # https://manual.manticoresearch.com/Server_settings/Searchd#binlog_path
    # binlog_path =
    # https://manual.manticoresearch.com/Server_settings/Searchd#client_timeout
    # client_timeout = 300
    # https://manual.manticoresearch.com/Server_settings/Searchd#collation_libc_locale
    # collation_libc_locale = C
    # https://manual.manticoresearch.com/Server_settings/Searchd#collation_server
    # collation_server = libc_ci
    # https://manual.manticoresearch.com/Server_settings/Searchd#data_dir
    data_dir = /var/lib/manticore
    # https://manual.manticoresearch.com/Server_settings/Searchd#docstore_cache_size
    # docstore_cache_size = 16m
    # https://manual.manticoresearch.com/Server_settings/Searchd#expansion_limit
    # expansion_limit = 0
    # https://manual.manticoresearch.com/Server_settings/Searchd#grouping_in_utc
    # grouping_in_utc = 0
    # https://manual.manticoresearch.com/Server_settings/Searchd#ha_period_karma
    # ha_period_karma = 60
    # https://manual.manticoresearch.com/Server_settings/Searchd#ha_ping_interval
    # ha_ping_interval = 1000
    # https://manual.manticoresearch.com/Server_settings/Searchd#hostname_lookup
    # hostname_lookup =
    # https://manual.manticoresearch.com/Server_settings/Searchd#jobs_queue_size
    # jobs_queue_size =
    # https://manual.manticoresearch.com/Server_settings/Searchd#listen_backlog
    # listen_backlog = 5
    # https://manual.manticoresearch.com/Server_settings/Searchd#listen
    # listen_env = this directive allows to append listeners from environment variables
    listen = 9306:mysql41
    listen = /var/run/mysqld/mysqld.sock:mysql41
    listen = $ip:9312
    listen = 9308:http
    listen = $ip:9315-9325:replication
    # https://manual.manticoresearch.com/Server_settings/Searchd#listen_tfo
    # listen_tfo = 0
    # https://manual.manticoresearch.com/Server_settings/Searchd#log
    log = /var/log/manticore/searchd.log
    # https://manual.manticoresearch.com/Server_settings/Searchd#max_batch_queries
    # max_batch_queries = 32
    # https://manual.manticoresearch.com/Server_settings/Searchd#threads
    # threads =
    # https://manual.manticoresearch.com/Server_settings/Searchd#max_filters
    # max_filters = 256
    # https://manual.manticoresearch.com/Server_settings/Searchd#max_filter_values
    # max_filter_values = 4096
    # https://manual.manticoresearch.com/Server_settings/Searchd#max_open_files
    # max_open_files =
    # https://manual.manticoresearch.com/Server_settings/Searchd#max_packet_size
    max_packet_size = 128M
    # https://manual.manticoresearch.com/Server_settings/Searchd#mysql_version_string
    # mysql_version_string =
    # https://manual.manticoresearch.com/Server_settings/Searchd#net_workers
    # net_workers = 1
    # https://manual.manticoresearch.com/Server_settings/Searchd#net_wait_tm
    # net_wait_tm = -1
    # https://manual.manticoresearch.com/Server_settings/Searchd#net_throttle_accept
    # net_throttle_accept = 0
    # https://manual.manticoresearch.com/Server_settings/Searchd#net_throttle_action
    # net_throttle_action = 0
    # https://manual.manticoresearch.com/Server_settings/Searchd#node_address
    # node_address =
    # https://manual.manticoresearch.com/Server_settings/Searchd#ondisk_attrs_default
    # ondisk_attrs_default = 0
    # https://manual.manticoresearch.com/Server_settings/Searchd#persistent_connections_limit
    # persistent_connections_limit =
    # https://manual.manticoresearch.com/Server_settings/Searchd#pid_file
    pid_file = /var/run/manticore/searchd.pid
    # https://manual.manticoresearch.com/Server_settings/Searchd#predicted_time_costs
    # predicted_time_costs = doc=64, hit=48, skip=2048, match=64
    # https://manual.manticoresearch.com/Server_settings/Searchd#preopen_indexes
    # preopen_indexes = 1
    # https://manual.manticoresearch.com/Server_settings/Searchd#qcache_max_bytes
    # qcache_max_bytes = 16Mb
    # https://manual.manticoresearch.com/Server_settings/Searchd#qcache_thresh_msec
    # qcache_thresh_msec = 3000
    # https://manual.manticoresearch.com/Server_settings/Searchd#qcache_ttl_sec
    # qcache_ttl_sec = 60
    # https://manual.manticoresearch.com/Server_settings/Searchd#query_log_format
    query_log_format = sphinxql
    # https://manual.manticoresearch.com/Server_settings/Searchd#query_log_min_msec
    query_log_min_msec = 0
    # https://manual.manticoresearch.com/Server_settings/Searchd#query_log
    query_log = /var/log/manticore/query.log
    # https://manual.manticoresearch.com/Server_settings/Searchd#query_log_mode
    # query_log_mode = 600
    # https://manual.manticoresearch.com/Server_settings/Searchd#max_connections
    # max_connections =
    # https://manual.manticoresearch.com/Server_settings/Searchd#network_timeout
    # network_timeout = 5
    # https://manual.manticoresearch.com/Server_settings/Searchd#read_buffer
    # read_buffer = 256K
    # https://manual.manticoresearch.com/Server_settings/Searchd#read_buffer_docs
    # read_buffer_docs = 256K
    # https://manual.manticoresearch.com/Server_settings/Searchd#read_buffer_hits
    # read_buffer_hits = 256K
    # https://manual.manticoresearch.com/Server_settings/Searchd#read_unhinted
    # read_unhinted 32K
    # https://manual.manticoresearch.com/Server_settings/Searchd#rt_flush_period
    # rt_flush_period =
    # https://manual.manticoresearch.com/Server_settings/Searchd#rt_merge_iops
    # rt_merge_iops = 0
    # https://manual.manticoresearch.com/Server_settings/Searchd#rt_merge_maxiosize
    # rt_merge_maxiosize = 0
    # https://manual.manticoresearch.com/Server_settings/Searchd#seamless_rotate
    # seamless_rotate = 1
    # https://manual.manticoresearch.com/Server_settings/Searchd#server_id
    # server_id =
    # https://manual.manticoresearch.com/Server_settings/Searchd#shutdown_timeout
    # shutdown_timeout = 3
    # https://manual.manticoresearch.com/Server_settings/Searchd#shutdown_token
    # shutdown_token =
    # https://manual.manticoresearch.com/Server_settings/Searchd#snippets_file_prefix
    # snippets_file_prefix =
    # https://manual.manticoresearch.com/Server_settings/Searchd#sphinxql_state
    # sphinxql_state =
    # https://manual.manticoresearch.com/Server_settings/Searchd#sphinxql_timeout
    # sphinxql_timeout = 900
    # https://manual.manticoresearch.com/Server_settings/Searchd#ssl_ca
    # ssl_ca =
    # https://manual.manticoresearch.com/Server_settings/Searchd#ssl_cert
    # ssl_cert =
    # https://manual.manticoresearch.com/Server_settings/Searchd#ssl_key
    # ssl_key =
    # https://manual.manticoresearch.com/Server_settings/Searchd#subtree_docs_cache
    # subtree_docs_cache = 0
    # https://manual.manticoresearch.com/Server_settings/Searchd#subtree_hits_cache
    # subtree_hits_cache = 0
    # https://manual.manticoresearch.com/Server_settings/Searchd#thread_stack
    # thread_stack =
    # https://manual.manticoresearch.com/Server_settings/Searchd#unlink_old
    # unlink_old = 1
    # https://manual.manticoresearch.com/Server_settings/Searchd#watchdog
    # watchdog = 1
 }
 common {
    # https://manual.manticoresearch.com/Server_settings/Common#lemmatizer_base
    # lemmatizer_base = /usr/local/share
    # https://manual.manticoresearch.com/Server_settings/Common#progressive_merge
    # progressive_merge =
    # https://manual.manticoresearch.com/Server_settings/Common#json_autoconv_keynames
    # json_autoconv_keynames =
    # https://manual.manticoresearch.com/Server_settings/Common#json_autoconv_numbers
    # json_autoconv_numbers = 0
    # https://manual.manticoresearch.com/Server_settings/Common#on_json_attr_error
    # on_json_attr_error = ignore_attr
    # https://manual.manticoresearch.com/Server_settings/Common#plugin_dir
    # plugin_dir =
 }
 EOF
--- a/docker/manticore/sandbox.sql
+++ b/docker/manticore/sandbox.sql
--- a/docker/redis.conf
+++ b/docker/redis.conf
@@ -1,2 +1,5 @@
-unixsocket /var/run/redis/redis.sock
+unixsocket /var/run/monolith-redis.sock
 unixsocketperm 777
 port 0
 # port 6379
 # requirepass changeme
--- a/docker/requirements.txt
+++ b/docker/requirements.txt
@@ -1,23 +0,0 @@
 wheel
 beautifulsoup4
 redis
 siphashc
 aiohttp[speedups]
 python-dotenv
 #manticoresearch
 numpy
 aioredis[hiredis]
 aiokafka
 vaderSentiment
 polyglot
 pyicu
 pycld2
 morfessor
 six
 nltk
 #spacy
 gensim
 python-Levenshtein
 orjson
 uvloop
 numba
--- a/docker/superset/docker-bootstrap.sh
+++ b/docker/superset/docker-bootstrap.sh
@@ -1,50 +0,0 @@
 #!/usr/bin/env bash
 #
 # Licensed to the Apache Software Foundation (ASF) under one or more
 # contributor license agreements.  See the NOTICE file distributed with
 # this work for additional information regarding copyright ownership.
 # The ASF licenses this file to You under the Apache License, Version 2.0
 # (the "License"); you may not use this file except in compliance with
 # the License.  You may obtain a copy of the License at
 #
 #    http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
 set -eo pipefail
 REQUIREMENTS_LOCAL="/app/docker/requirements-local.txt"
 # If Cypress run – overwrite the password for admin and export env variables
 if [ "$CYPRESS_CONFIG" == "true" ]; then
    export SUPERSET_CONFIG=tests.integration_tests.superset_test_config
    export SUPERSET_TESTENV=true
    export SUPERSET__SQLALCHEMY_DATABASE_URI=postgresql+psycopg2://superset:superset@db:5432/superset
 fi
 #
 # Make sure we have dev requirements installed
 #
 if [ -f "${REQUIREMENTS_LOCAL}" ]; then
  echo "Installing local overrides at ${REQUIREMENTS_LOCAL}"
  pip install -r "${REQUIREMENTS_LOCAL}"
 else
  echo "Skipping local overrides"
 fi
 if [[ "${1}" == "worker" ]]; then
  echo "Starting Celery worker..."
  celery --app=superset.tasks.celery_app:app worker -Ofair -l INFO
 elif [[ "${1}" == "beat" ]]; then
  echo "Starting Celery beat..."
  celery --app=superset.tasks.celery_app:app beat --pidfile /tmp/celerybeat.pid -l INFO -s "${SUPERSET_HOME}"/celerybeat-schedule
 elif [[ "${1}" == "app" ]]; then
  echo "Starting web app..."
  flask run -p 8088 --with-threads --reload --debugger --host=0.0.0.0
 elif [[ "${1}" == "app-gunicorn" ]]; then
  echo "Starting web app..."
  /usr/bin/run-server.sh
 fi
--- a/docker/superset/docker-init.sh
+++ b/docker/superset/docker-init.sh
@@ -1,78 +0,0 @@
 #!/usr/bin/env bash
 #
 # Licensed to the Apache Software Foundation (ASF) under one or more
 # contributor license agreements.  See the NOTICE file distributed with
 # this work for additional information regarding copyright ownership.
 # The ASF licenses this file to You under the Apache License, Version 2.0
 # (the "License"); you may not use this file except in compliance with
 # the License.  You may obtain a copy of the License at
 #
 #    http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
 set -e
 #
 # Always install local overrides first
 #
 /app/docker/docker-bootstrap.sh
 STEP_CNT=4
 echo_step() {
 cat <<EOF
 ######################################################################
 Init Step ${1}/${STEP_CNT} [${2}] -- ${3}
 ######################################################################
 EOF
 }
 ADMIN_PASSWORD="admin"
 # If Cypress run – overwrite the password for admin and export env variables
 if [ "$CYPRESS_CONFIG" == "true" ]; then
    ADMIN_PASSWORD="general"
    export SUPERSET_CONFIG=tests.integration_tests.superset_test_config
    export SUPERSET_TESTENV=true
    export SUPERSET__SQLALCHEMY_DATABASE_URI=postgresql+psycopg2://superset:superset@db:5432/superset
 fi
 # Initialize the database
 echo_step "1" "Starting" "Applying DB migrations"
 superset db upgrade
 echo_step "1" "Complete" "Applying DB migrations"
 # Create an admin user
 echo_step "2" "Starting" "Setting up admin user ( admin / $ADMIN_PASSWORD )"
 superset fab create-admin \
              --username admin \
              --firstname Superset \
              --lastname Admin \
              --email admin@superset.com \
              --password $ADMIN_PASSWORD
 echo_step "2" "Complete" "Setting up admin user"
 # Create default roles and permissions
 echo_step "3" "Starting" "Setting up roles and perms"
 superset init
 echo_step "3" "Complete" "Setting up roles and perms"
 if [ "$SUPERSET_LOAD_EXAMPLES" = "yes" ]; then
    # Load some data to play with
    echo_step "4" "Starting" "Loading examples"
    # If Cypress run which consumes superset_test_config – load required data for tests
    if [ "$CYPRESS_CONFIG" == "true" ]; then
        superset load_test_users
        superset load_examples --load-test-data
    else
        superset load_examples
    fi
    echo_step "4" "Complete" "Loading examples"
 fi
--- a/docker/superset/requirements-local.txt
+++ b/docker/superset/requirements-local.txt
@@ -1 +0,0 @@
 pydruid
--- a/env.example
+++ b/env.example
@@ -1,6 +1,6 @@
-PORTAINER_GIT_DIR=.
+PORTAINER_GIT_DIR=..
 MODULES_ENABLED="dis"
-DISCORD_TOKEN="xx"
+DISCORD_TOKEN=
 THRESHOLD_LISTENER_HOST=0.0.0.0
 THRESHOLD_LISTENER_PORT=13867
 THRESHOLD_LISTENER_SSL=1
@@ -13,16 +13,16 @@ THRESHOLD_RELAY_SSL=1
 THRESHOLD_API_ENABLED=1
 THRESHOLD_API_HOST=0.0.0.0
 THRESHOLD_API_PORT=13869
 PORTAINER_GIT_DIR=.
-THRESHOLD_CONFIG_DIR=./legacy/conf/live/
+THRESHOLD_CONFIG_DIR=../legacy/conf/live/
-THRESHOLD_CERT_DIR=./legacy/conf/cert/
+#THRESHOLD_TEMPLATE_DIR=../legacy/conf/templates/
 THRESHOLD_CERT_DIR=../legacy/conf/cert/
 # How many messages to ingest at once from Redis
-MONOLITH_INGEST_CHUNK_SIZE=900
+MONOLITH_INGEST_CHUNK_SIZE=70000
 # Time to wait between polling Redis again
-MONOLITH_INGEST_ITER_DELAY=0.5
+MONOLITH_INGEST_ITER_DELAY=2
 # Number of 4chan threads to request at once
 MONOLITH_CH4_THREADS_CONCURRENT=1000
@@ -31,11 +31,20 @@ MONOLITH_CH4_THREADS_CONCURRENT=1000
 MONOLITH_CH4_THREADS_DELAY=0.1
 # Time to wait after finishing a crawl before starting again
-MONOLITH_CH4_CRAWL_DELAY=30
+MONOLITH_CH4_CRAWL_DELAY=60
 # Semaphore value
 MONOLITH_CH4_THREADS_SEMAPHORE=1000
 # Threads to use for data processing
 # Leave uncommented to use all available threads
-# MONOLITH_PROCESS_THREADS=4
+MONOLITH_PROCESS_THREADS=7
 # Enable performance metrics after message processing
 MONOLITH_PROCESS_PERFSTATS=0
 # Elasticsearch
 ELASTICSEARCH_USERNAME=elastic
 ELASTICSEARCH_PASSWORD=
 ELASTICSEARCH_HOST=https://es01:9200
 ELASTICSEARCH_TLS=1
--- a/87
+++ b/87
@@ -1,87 +0,0 @@
 #
 # Licensed to the Apache Software Foundation (ASF) under one
 # or more contributor license agreements.  See the NOTICE file
 # distributed with this work for additional information
 # regarding copyright ownership.  The ASF licenses this file
 # to you under the Apache License, Version 2.0 (the
 # "License"); you may not use this file except in compliance
 # with the License.  You may obtain a copy of the License at
 #
 #   http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing,
 # software distributed under the License is distributed on an
 # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 # KIND, either express or implied.  See the License for the
 # specific language governing permissions and limitations
 # under the License.
 #
 # Java tuning
 #DRUID_XMX=1g
 #DRUID_XMS=1g
 #DRUID_MAXNEWSIZE=250m
 #DRUID_NEWSIZE=250m
 #DRUID_MAXDIRECTMEMORYSIZE=1g
 #druid_emitter_logging_logLevel=debug
 #druid_extensions_loadList=["druid-histogram", "druid-datasketches", "druid-lookups-cached-global", "postgresql-metadata-storage", "druid-kafka-indexing-service"]
 #druid_zk_service_host=zookeeper
 #druid_metadata_storage_host=
 #druid_metadata_storage_type=postgresql
 #druid_metadata_storage_connector_connectURI=jdbc:postgresql://postgres:5432/druid
 #druid_metadata_storage_connector_user=druid
 #druid_metadata_storage_connector_password=FoolishPassword
 #druid_coordinator_balancer_strategy=cachingCost
 #druid_indexer_runner_javaOptsArray=["-server", "-Xmx1g", "-Xms1g", "-XX:MaxDirectMemorySize=3g", "-Duser.timezone=UTC", "-Dfile.encoding=UTF-8", "-Djava.util.logging.manager=org.apache.logging.log4j.jul.LogManager"]
 #druid_indexer_fork_property_druid_processing_buffer_sizeBytes=128MiB
 #druid_processing_buffer_sizeBytes=268435456 # 256MiB
 #druid_storage_type=local
 #druid_storage_storageDirectory=/opt/shared/segments
 #druid_indexer_logs_type=file
 #druid_indexer_logs_directory=/opt/shared/indexing-logs
 #druid_processing_numThreads=1
 #druid_processing_numMergeBuffers=1
 #DRUID_LOG4J=<?xml version="1.0" encoding="UTF-8" ?><Configuration status="WARN"><Appenders><Console name="Console" target="SYSTEM_OUT"><PatternLayout pattern="%d{ISO8601} %p [%t] %c - %m%n"/></Console></Appenders><Loggers><Root level="info"><AppenderRef ref="Console"/></Root><Logger name="org.apache.druid.jetty.RequestLog" additivity="false" level="DEBUG"><AppenderRef ref="Console"/></Logger></Loggers></Configuration>
 # Java tuning
 #DRUID_XMX=1g
 #DRUID_XMS=1g
 #DRUID_MAXNEWSIZE=250m
 #DRUID_NEWSIZE=250m
 #DRUID_MAXDIRECTMEMORYSIZE=6172m
 DRUID_SINGLE_NODE_CONF=nano-quickstart
 druid_emitter_logging_logLevel=debug
 druid_extensions_loadList=["druid-histogram", "druid-datasketches", "druid-lookups-cached-global", "postgresql-metadata-storage", "druid-kafka-indexing-service"]
 druid_zk_service_host=zookeeper
 druid_metadata_storage_host=
 druid_metadata_storage_type=postgresql
 druid_metadata_storage_connector_connectURI=jdbc:postgresql://postgres:5432/druid
 druid_metadata_storage_connector_user=druid
 druid_metadata_storage_connector_password=FoolishPassword
 druid_coordinator_balancer_strategy=cachingCost
 druid_indexer_runner_javaOptsArray=["-server", "-Xmx1g", "-Xms1g", "-XX:MaxDirectMemorySize=3g", "-Duser.timezone=UTC", "-Dfile.encoding=UTF-8", "-Djava.util.logging.manager=org.apache.logging.log4j.jul.LogManager"]
 druid_indexer_fork_property_druid_processing_buffer_sizeBytes=256MiB
 druid_storage_type=local
 druid_storage_storageDirectory=/opt/shared/segments
 druid_indexer_logs_type=file
 druid_indexer_logs_directory=/opt/shared/indexing-logs
 druid_processing_numThreads=2
 druid_processing_numMergeBuffers=2
 DRUID_LOG4J=<?xml version="1.0" encoding="UTF-8" ?><Configuration status="WARN"><Appenders><Console name="Console" target="SYSTEM_OUT"><PatternLayout pattern="%d{ISO8601} %p [%t] %c - %m%n"/></Console></Appenders><Loggers><Root level="info"><AppenderRef ref="Console"/></Root><Logger name="org.apache.druid.jetty.RequestLog" additivity="false" level="DEBUG"><AppenderRef ref="Console"/></Logger></Loggers></Configuration>
--- a/legacy/.pre-commit-config.yaml
+++ b/legacy/.pre-commit-config.yaml
@@ -1,17 +0,0 @@
 repos:
  - repo: https://github.com/psf/black
    rev: 22.6.0
    hooks:
    - id: black
      args:
        - --line-length=120
  - repo: https://github.com/PyCQA/isort
    rev: 5.10.1
    hooks:
      - id: isort
        args: ["--profile", "black"]
  - repo: https://github.com/PyCQA/flake8
    rev: 4.0.1
    hooks:
      - id: flake8
        args: [--max-line-length=120]
--- a/legacy/conf/templates/config.json
+++ b/legacy/conf/templates/config.json
@@ -17,7 +17,7 @@
    },
    "Key": "key.pem",
    "Certificate": "cert.pem",
-    "RedisSocket": "/var/run/redis/redis.sock",
+    "RedisSocket": "/var/run/socks/redis.sock",
    "RedisDBEphemeral": 1,
    "RedisDBPersistent": 0,
    "UsePassword": false,
--- a/legacy/docker-compose.yml
+++ b/legacy/docker-compose.yml
@@ -1,41 +0,0 @@
 version: "2"
 services:
  app:
    image: pathogen/threshold:latest
    build: ./docker
    volumes:
      - ${PORTAINER_GIT_DIR}:/code
      - ${THRESHOLD_CONFIG_DIR}:/code/conf/live
      #- ${THRESHOLD_TEMPLATE_DIR}:/code/conf/templates
      - ${THRESHOLD_CERT_DIR}:/code/conf/cert
    ports:
      - "${THRESHOLD_LISTENER_PORT}:${THRESHOLD_LISTENER_PORT}"
      - "${THRESHOLD_RELAY_PORT}:${THRESHOLD_RELAY_PORT}"
      - "${THRESHOLD_API_PORT}:${THRESHOLD_API_PORT}"
    env_file:
      - .env
    # for development
    extra_hosts:
      - "host.docker.internal:host-gateway"
    volumes_from:
        - tmp
  tmp:
    image: busybox
    command: chmod -R 777 /var/run/redis
    volumes:
        - /var/run/redis
  redis:
      image: redis
      command: redis-server /etc/redis.conf
      volumes:
          - ${PORTAINER_GIT_DIR}/docker/redis.conf:/etc/redis.conf
      volumes_from:
          - tmp
 networks:
   default:
      external:
         name: pathogen
--- a/legacy/docker/docker-compose.prod.yml
+++ b/legacy/docker/docker-compose.prod.yml
@@ -1,38 +0,0 @@
 version: "2"
 services:
  app:
    image: pathogen/threshold:latest
    build: ./docker
    volumes:
      - ${PORTAINER_GIT_DIR}:/code
      - ${THRESHOLD_CONFIG_DIR}:/code/conf/live
      #- ${THRESHOLD_TEMPLATE_DIR}:/code/conf/templates
      - ${THRESHOLD_CERT_DIR}:/code/conf/cert
    ports:
      - "${THRESHOLD_LISTENER_PORT}:${THRESHOLD_LISTENER_PORT}"
      - "${THRESHOLD_RELAY_PORT}:${THRESHOLD_RELAY_PORT}"
      - "${THRESHOLD_API_PORT}:${THRESHOLD_API_PORT}"
    env_file:
      - ../stack.env
    volumes_from:
        - tmp
  tmp:
    image: busybox
    command: chmod -R 777 /var/run/redis
    volumes:
        - /var/run/redis
  redis:
      image: redis
      command: redis-server /etc/redis.conf
      volumes:
          - ${PORTAINER_GIT_DIR}/docker/redis.conf:/etc/redis.conf
      volumes_from:
          - tmp
 networks:
   default:
      external:
         name: pathogen
--- a/legacy/main.py
+++ b/legacy/main.py
@@ -4,6 +4,7 @@ from os import urandom
 from os.path import exists
 from string import digits
 import redis
 from redis import StrictRedis
 # List of errors ZNC can give us
@@ -121,7 +122,7 @@ def initConf():
 def initMain():
-    global r, g
+    global r, g, x
    initConf()
    r = StrictRedis(
        unix_socket_path=config["RedisSocket"], db=config["RedisDBEphemeral"]  # noqa
@@ -129,3 +130,5 @@ def initMain():
    g = StrictRedis(
        unix_socket_path=config["RedisSocket"], db=config["RedisDBPersistent"]
    )  # noqa
    # SSDB for communication with Monolith
    x = redis.from_url("redis://ssdb:1289", db=0)
--- a/legacy/modules/monitor.py
+++ b/legacy/modules/monitor.py
@@ -67,7 +67,7 @@ def parsemeta(numName, c):
 def queue_message(c):
    message = json.dumps(c)
-    main.g.sadd("queue", message)
+    main.x.lpush("queue", message)
 def event(
--- a/legacy/requirements.txt
+++ b/legacy/requirements.txt
@@ -1,9 +0,0 @@
 wheel
 pre-commit
 twisted
 pyOpenSSL
 redis
 pyYaML
 service_identity
 siphashc
 Klein
--- a/legacy/utils/deliver_relay_commands.py
+++ b/legacy/utils/deliver_relay_commands.py
@@ -98,7 +98,6 @@ class IRCRelayFactory(ReconnectingClientFactory):
        self.relayCommands, self.user, self.stage2 = relayCommands, user, stage2
    def buildProtocol(self, addr):
        entry = IRCRelay(self.num, self.relayCommands, self.user, self.stage2)
        self.client = entry
--- a/monolith.py
+++ b/monolith.py
@@ -1,8 +1,10 @@
 import asyncio
 from os import getenv
 from time import sleep
 import uvloop
 import db
 import util
 from sources.ch4 import Chan4
 from sources.dis import DiscordClient
@@ -21,14 +23,28 @@ if not token:
 async def main(loop):
    if "ingest" in modules_enabled:
        ingest = Ingest()
        loop.create_task(ingest.run())
    if "dis" in modules_enabled:
        client = DiscordClient()
        loop.create_task(client.start(token))
    if "ch4" in modules_enabled:
        chan = Chan4()
        loop.create_task(chan.run())
-    ingest = Ingest()
+
-    loop.create_task(ingest.run())
+created = False
 while not created:
    try:
        db.create_index(db.api_client)
        created = True
    except Exception as e:
        print(f"Error creating index: {e}")
        sleep(1)  # Block the thread, just wait for the DB
 db.update_schema()
 loop = asyncio.get_event_loop()
--- a/0
+++ b/0
--- a/perf/init.py
+++ b/perf/init.py
--- a/perf/throttle.py
+++ b/perf/throttle.py
@@ -0,0 +1,134 @@
 import asyncio
 import time
 import psutil
 import util
 class DynamicThrottle(object):
    def __init__(self, **kwargs):
        self.target_cpu_usage = kwargs.get("target_cpu_usage", 50)
        self.sleep_interval = 0.0
        self.sleep_increment = kwargs.get("sleep_increment", 0.01)
        self.sleep_decrement = kwargs.get("sleep_decrement", 0.01)
        self.sleep_max = kwargs.get("sleep_max", 0.1)
        self.sleep_min = kwargs.get("sleep_min", 0.01)
        self.psutil_interval = kwargs.get("psutil_interval", 0.1)
        self.log = kwargs.get("log", util.get_logger(self.__class__.__name__))
        self.consecutive_increments = 0
        self.consecutive_decrements = 0
        self.consecutive_divisor = kwargs.get("consecutive_divisor", 1)
        self.last_was_increment = kwargs.get("start_increment", True)
        if kwargs.get("use_async"):
            self.wait = self.dynamic_throttle_async
        else:
            self.wait = self.dynamic_throttle
    async def dynamic_throttle_async(self):
        """
        Dynamically sleeps before a request if CPU usage is above our target.
        """
        current_cpu_usage = psutil.cpu_percent(interval=self.psutil_interval)
        if current_cpu_usage > self.target_cpu_usage:
            if self.last_was_increment:
                self.consecutive_increments += 1
                # self.log.debug(f"High CPU consecutive increments: {self.consecutive_increments}")
            else:
                self.consecutive_increments = 0  # ?
                self.consecutive_decrements = 0  # ?
                # self.log.debug(f"High CPU alert reset.")
            self.sleep_interval += self.sleep_increment * (
                max(1, self.consecutive_increments) / self.consecutive_divisor
            )
            self.last_was_increment = True
            if self.sleep_interval > self.sleep_max:
                self.sleep_interval = self.sleep_max
                # self.log.debug(f"High CPU, but not increasing above {self.sleep_max:.3f}s")
            # self.log.debug(
            #     f"High CPU: {current_cpu_usage}% > {self.target_cpu_usage}%, "
            #     f"=> sleep {self.sleep_interval:.3f}s"
            # )
        elif current_cpu_usage < self.target_cpu_usage:
            if not self.last_was_increment:
                self.consecutive_decrements += 1
                # self.log.debug(f"Low CPU consecutive decrements: {self.consecutive_decrements}")
            else:
                self.consecutive_decrements = 0  # ?
                self.consecutive_increments = 0  # ?
                # self.log.debug(f"Low CPU alert reset.")
            self.sleep_interval -= self.sleep_decrement * (
                max(1, self.consecutive_decrements) / self.consecutive_divisor
            )
            self.last_was_increment = False
            if self.sleep_interval < self.sleep_min:
                self.sleep_interval = self.sleep_min
                # self.log.debug(f"Low CPU, but not decreasing below {self.sleep_min:.3f}s")
            # self.log.debug(
            #     f"Low CPU: {current_cpu_usage}% < {self.target_cpu_usage}%, "
            #     f"=> sleep {self.sleep_interval:.3f}s"
            # )
        if self.sleep_interval > 0:
            await asyncio.sleep(self.sleep_interval)
            return self.sleep_interval
        return 0.0
    def dynamic_throttle(self):
        """
        Dynamically sleeps before a request if CPU usage is above our target.
        """
        current_cpu_usage = psutil.cpu_percent(interval=self.psutil_interval)
        if current_cpu_usage > self.target_cpu_usage:
            if self.last_was_increment:
                self.consecutive_increments += 1
                # self.log.debug(f"High CPU consecutive increments: {self.consecutive_increments}")
            else:
                self.consecutive_increments = 0  # ?
                self.consecutive_decrements = 0  # ?
                # self.log.debug(f"High CPU alert reset.")
            self.sleep_interval += self.sleep_increment * (
                max(1, self.consecutive_increments) / self.consecutive_divisor
            )
            self.last_was_increment = True
            if self.sleep_interval > self.sleep_max:
                self.sleep_interval = self.sleep_max
                # self.log.debug(f"High CPU, but not increasing above {self.sleep_max:.3f}s")
            # self.log.debug(
            #     f"High CPU: {current_cpu_usage}% > {self.target_cpu_usage}%, "
            #     f"=> sleep {self.sleep_interval:.3f}s"
            # )
        elif current_cpu_usage < self.target_cpu_usage:
            if not self.last_was_increment:
                self.consecutive_decrements += 1
                # self.log.debug(f"Low CPU consecutive decrements: {self.consecutive_decrements}")
            else:
                self.consecutive_decrements = 0  # ?
                self.consecutive_increments = 0  # ?
                # self.log.debug(f"Low CPU alert reset.")
            self.sleep_interval -= self.sleep_decrement * (
                max(1, self.consecutive_decrements) / self.consecutive_divisor
            )
            self.last_was_increment = False
            if self.sleep_interval < self.sleep_min:
                self.sleep_interval = self.sleep_min
                # self.log.debug(f"Low CPU, but not decreasing below {self.sleep_min:.3f}s")
            # self.log.debug(
            #     f"Low CPU: {current_cpu_usage}% < {self.target_cpu_usage}%, "
            #     f"=> sleep {self.sleep_interval:.3f}s"
            # )
        if self.sleep_interval > 0:
            time.sleep(self.sleep_interval)
            return self.sleep_interval
        return 0.0
--- a/processing/ohlc.py
+++ b/processing/ohlc.py
@@ -0,0 +1 @@
 # Resample 1Min into 5Min, 15Min, 30Min, 1H, 4H, 1D, 1W, 1M, 1Y
--- a/processing/process.py
+++ b/processing/process.py
@@ -14,7 +14,7 @@ from concurrent.futures import ProcessPoolExecutor
 # For timestamp processing
 from datetime import datetime
-from math import ceil
+from os import getenv
 import orjson
 import regex
@@ -34,7 +34,6 @@ from gensim.parsing.preprocessing import (  # stem_text,
    strip_short,
    strip_tags,
 )
 from numpy import array_split
 from polyglot.detect.base import logger as polyglot_logger
 # For NLP
@@ -48,9 +47,21 @@ from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
 import db
 import util
 # For throttling
 from perf.throttle import DynamicThrottle
 # 4chan schema
 from schemas.ch4_s import ATTRMAP
 trues = ("true", "1", "t", True)
 KEYNAME = "queue"
 MONOLITH_PROCESS_PERFSTATS = (
    getenv("MONOLITH_PROCESS_PERFSTATS", "false").lower() in trues
 )
 TARGET_CPU_USAGE = float(os.getenv("MONOLITH_PROCESS_TARGET_CPU_USAGE", 50.0))
 CUSTOM_FILTERS = [
    lambda x: x.lower(),
    strip_tags,  #
@@ -81,6 +92,19 @@ CPU_THREADS = int(os.getenv("MONOLITH_PROCESS_THREADS", os.cpu_count()))
 p = ProcessPoolExecutor(CPU_THREADS)
 throttle = DynamicThrottle(
    target_cpu_usage=TARGET_CPU_USAGE,
    sleep_increment=0.02,
    sleep_decrement=0.01,
    sleep_max=0.5,
    sleep_min=0,
    psutil_interval=0.1,
    consecutive_divisor=2,
    log=log,
    start_increment=True,
    use_async=False,
 )
 def get_hash_key():
    hash_key = db.r.get("hashing_key")
@@ -99,38 +123,44 @@ hash_key = get_hash_key()
@asyncio.coroutine
-async def spawn_processing_threads(data):
+async def spawn_processing_threads(chunk, length):
-    len_data = len(data)
+    log.debug(f"Spawning processing threads for chunk {chunk} of length {length}")
    loop = asyncio.get_event_loop()
    tasks = []
-    if len(data) < CPU_THREADS * 100:
+    if length < CPU_THREADS * 100:
-        split_data = [data]
+        cores = 1
        chunk_size = length
    else:
-        msg_per_core = int(len(data) / CPU_THREADS)
+        cores = CPU_THREADS
-        split_data = array_split(data, ceil(len(data) / msg_per_core))
+        chunk_size = int(length / cores)
-    for index, split in enumerate(split_data):
+
-        log.debug(f"Delegating processing of {len(split)} messages to thread {index}")
+    for index in range(cores):
-        task = loop.run_in_executor(p, process_data, split)
+        log.debug(
            f"[{chunk}/{index}] Delegating {chunk_size} messages to thread {index}"
        )
        task = loop.run_in_executor(p, process_data, chunk, index, chunk_size)
        tasks.append(task)
    results = [await task for task in tasks]
    log.debug(
        (
            f"Results from processing of {len_data} messages in "
            f"{len(split_data)} threads: {len(results)}"
        )
    )
    # Join the results back from the split list
    flat_list = [item for sublist in results for item in sublist]
-    await db.store_kafka_batch(flat_list)
+    total_messages = len(flat_list)
    log.info(
        (
            f"[{chunk}/{index}] Results from processing of {length} messages in "
            f"{cores} threads: {len(flat_list)}"
        )
    )
    await db.store_batch(flat_list)
    return total_messages
    # log.debug(f"Finished processing {len_data} messages")
-def process_data(data):
+def process_data(chunk, index, chunk_size):
    log.debug(f"[{chunk}/{index}] Processing {chunk_size} messages")
    to_store = []
    sentiment_time = 0.0
@@ -139,15 +169,38 @@ def process_data(data):
    date_time = 0.0
    nlp_time = 0.0
    normalise_time = 0.0
    hash_time = 0.0
    normal2_time = 0.0
    soup_time = 0.0
    sleep_time = 0.0
    total_time = 0.0
    # Initialise sentiment analyser
    analyzer = SentimentIntensityAnalyzer()
-    for msg in data:
+
    for msg_index in range(chunk_size):
        # Print percentage of msg_index relative to chunk_size
        if msg_index % 10 == 0:
            percentage_done = (msg_index / chunk_size) * 100
            log.debug(
                f"[{chunk}/{index}] {percentage_done:.2f}% done ({msg_index}/{chunk_size})"
            )
        msg = db.r.rpop(KEYNAME)
        if not msg:
            return
        msg = orjson.loads(msg)
        if msg["src"] == "4ch":
            board = msg["net"]
            thread = msg["channel"]
            redis_key = (
                f"cache.{board}.{thread}.{msg['no']}.{msg['resto']}.{msg['now']}"
            )
            key_content = db.r.get(redis_key)
            if key_content is not None:
                continue
        db.r.set(redis_key, "1")
        total_start = time.process_time()
        # normalise fields
        start = time.process_time()
@@ -173,24 +226,6 @@ def process_data(data):
            board = msg["net"]
            thread = msg["channel"]
            # Calculate hash for post
            start = time.process_time()
            post_normalised = orjson.dumps(msg, option=orjson.OPT_SORT_KEYS)
            hash = siphash(hash_key, post_normalised)
            hash = str(hash)
            redis_key = f"cache.{board}.{thread}.{msg['no']}"
            key_content = db.r.get(redis_key)
            if key_content:
                key_content = key_content.decode("ascii")
                if key_content == hash:
                    # This deletes the message since the append at the end won't be hit
                    continue
                else:
                    msg["type"] = "update"
            db.r.set(redis_key, hash)
            time_took = (time.process_time() - start) * 1000
            hash_time += time_took
            start = time.process_time()
            for key2, value in list(msg.items()):
                if key2 in ATTRMAP:
@@ -208,9 +243,10 @@ def process_data(data):
                    old_ts = datetime.strptime(old_time, "%m/%d/%y(%a)%H:%M:%S")
                else:
                    old_ts = datetime.strptime(old_time, "%m/%d/%y(%a)%H:%M")
-                # new_ts = old_ts.isoformat()
+                # iso_ts = old_ts.isoformat()
                new_ts = int(old_ts.timestamp())
                msg["ts"] = new_ts
                # msg["iso"] = iso_ts
            else:
                raise Exception("No TS in msg")
            time_took = (time.process_time() - start) * 1000
@@ -236,7 +272,7 @@ def process_data(data):
                msg["lang_code"] = lang_code
                msg["lang_name"] = lang_name
            except cld2_error as e:
-                log.error(f"Error detecting language: {e}")
+                log.error(f"[{chunk}/{index}] Error detecting language: {e}")
                # So below block doesn't fail
                lang_code = None
            time_took = (time.process_time() - start) * 1000
@@ -255,7 +291,7 @@ def process_data(data):
            # Tokens
            start = time.process_time()
            tokens = preprocess_string(msg["msg"], CUSTOM_FILTERS)
-            msg["tokens"] = tokens
+            msg["tokens"] = str(tokens)
            # n = nlp(msg["msg"])
            # for tag in TAGS:
            #     tag_name = tag.lower()
@@ -267,17 +303,25 @@ def process_data(data):
        # Add the mutated message to the return buffer
        to_store.append(msg)
        total_time += (time.process_time() - total_start) * 1000
-    log.debug("=====================================")
+
-    log.debug(f"Sentiment: {sentiment_time}")
+        # Dynamic throttling to reduce CPU usage
-    log.debug(f"Regex: {regex_time}")
+        if msg_index % 5 == 0:
-    log.debug(f"Polyglot: {polyglot_time}")
+            sleep_time += throttle.wait()
-    log.debug(f"Date: {date_time}")
+
-    log.debug(f"NLP: {nlp_time}")
+    if MONOLITH_PROCESS_PERFSTATS:
-    log.debug(f"Normalise: {normalise_time}")
+        log.info("=====================================")
-    log.debug(f"Hash: {hash_time}")
+        log.info(f"Chunk: {chunk}")
-    log.debug(f"Normal2: {normal2_time}")
+        log.info(f"Index: {index}")
-    log.debug(f"Soup: {soup_time}")
+        log.info(f"Sentiment: {sentiment_time}")
-    log.debug(f"Total: {total_time}")
+        log.info(f"Regex: {regex_time}")
-    log.debug("=====================================")
+        log.info(f"Polyglot: {polyglot_time}")
        log.info(f"Date: {date_time}")
        log.info(f"NLP: {nlp_time}")
        log.info(f"Normalise: {normalise_time}")
        log.info(f"Normal2: {normal2_time}")
        log.info(f"Soup: {soup_time}")
        log.info(f"Total: {total_time}")
        log.info(f"Throttling: {sleep_time}")
        log.info("=====================================")
    return to_store
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,14 +1,13 @@
 wheel
 pre-commit
 beautifulsoup4
 redis
 siphashc
 aiohttp[speedups]
 python-dotenv
-#manticoresearch
+manticoresearch
 numpy
 aioredis[hiredis]
-aiokafka
+#aiokafka
 vaderSentiment
 polyglot
 pyicu
@@ -21,4 +20,10 @@ gensim
 python-Levenshtein
 orjson
 uvloop
-numba
+elasticsearch[async]
 msgpack
 # flpc
 psutil
 pymexc
 websockets
 aiomysql
--- a/rts.py
+++ b/rts.py
@@ -0,0 +1,186 @@
 import asyncio
 import logging
 from os import getenv
 import orjson
 import websockets
 import db
 # Logger setup
 logging.basicConfig(level=logging.INFO)
 log = logging.getLogger("RTS")
 # Environment variables
 MONOLITH_RTS_MEXC_API_ACCESS_KEY = getenv("MONOLITH_RTS_MEXC_API_ACCESS_KEY", None)
 MONOLITH_RTS_MEXC_API_SECRET_KEY = getenv("MONOLITH_RTS_MEXC_API_SECRET_KEY", None)
 # WebSocket endpoint
 MEXC_WS_URL = "wss://wbs.mexc.com/ws"
 {
    "d": {
        "e": "spot@public.kline.v3.api",
        "k": {
            "t": 1737901140,  # TS
            "o": "684.4",  # Open
            "c": "684.5",  # Close
            "h": "684.5",  # High
            "l": "684.4",  # Low
            "v": "0.173",  # Volume of the base
            "a": "118.41",  # Volume of the quote (Quantity)
            "T": 1737901200,  # ?
            "i": "Min1",  # ?
        },
    },
    "c": "spot@public.kline.v3.api@BNBUSDT@Min1",  # Channel
    "t": 1737901159239,
    "s": "BNBUSDT",  # Symbol
 }
 # Scan DB for last endtime (T)
 # Request Kline data from last endtime (T) to now
 # Check Server Time
 #     Response
 # {
 #     "serverTime" : 1645539742000
 # }
 #     GET /api/v3/time
 # Weight(IP): 1
 # Parameter:
 # NONE
 # Kline/Candlestick Data
 #     Response
 # [
 #   [
 #     1640804880000,
 #     "47482.36",
 #     "47482.36",
 #     "47416.57",
 #     "47436.1",
 #     "3.550717",
 #     1640804940000,
 #     "168387.3"
 #   ]
 # ]
 #     GET /api/v3/klines
 # Weight(IP): 1
 # Kline/candlestick bars for a symbol. Klines are uniquely identified by their open time.
 # Parameters:
 # Name 	Type 	Mandatory 	Description
 # symbol 	string 	YES
 # interval 	ENUM 	YES 	ENUM: Kline Interval
 # startTime 	long 	NO
 # endTime 	long 	NO
 # limit 	integer 	NO 	Default 500; max 1000.
 # Scrub function:
 # For each record, ensure there are no time gaps
 # When the 1m window goes over, the next t is always the last T.
 # Check for gaps, and request all klines between those gaps to ensure a full DB, even with restarts.
 # Idle jitter function - compare our time with server time.
 # Compare ts with our time and print jitter. Add jitter warning to log and OHLC.
 # High jitter may prevent us from getting the correct data for trading.
 async def mex_handle(data):
    message = orjson.loads(data)
    # print(orjson.dumps(message, option=orjson.OPT_INDENT_2).decode("utf-8"))
    if "code" in message:
        if message["code"] == 0:
            log.info("Control message received")
            return
    symbol = message["s"]
    open = message["d"]["k"]["o"]
    close = message["d"]["k"]["c"]
    high = message["d"]["k"]["h"]
    low = message["d"]["k"]["l"]
    volume_base = message["d"]["k"]["v"]  # ERROR IN API DOCS
    volume_quote = message["d"]["k"]["a"]  # > a 	bigDecimal 	volume
    interval = message["d"]["k"]["i"]
    start_time = message["d"]["k"]["t"]  # > t 	long 	stratTime
    end_time = message["d"]["k"]["T"]  # > T 	long 	endTime
    event_time = message["t"]  # t 	long 	eventTime
    index = f"mex_ohlc_{symbol.lower()}"
    reformatted = {
        "s": symbol,
        "o": float(open),
        "c": float(close),
        "h": float(high),
        "l": float(low),
        "v": float(volume_base),
        "a": float(volume_quote),
        "i": interval,
        "t": int(start_time),
        "t2": int(end_time),
        "ts": int(event_time),
    }
    await db.rts_store_message(index, reformatted)
    print(index)
    print(orjson.dumps(reformatted, option=orjson.OPT_INDENT_2).decode("utf-8"))
    print()
 # Kline WebSocket handler
 async def mex_main():
    await db.init_mysql_pool()
    async with websockets.connect(MEXC_WS_URL) as websocket:
        log.info("WebSocket connected")
        # Define symbols and intervals
        symbols = ["BTCUSDT"]  # Add more symbols as needed
        interval = "Min1"  # Kline interval
        # Prepare subscription requests for Kline streams
        # Request: spot@public.kline.v3.api@<symbol>@<interval>
        subscriptions = [
            f"spot@public.kline.v3.api@{symbol}@{interval}" for symbol in symbols
        ]
        # Send subscription requests
        subscribe_request = {
            "method": "SUBSCRIPTION",
            "params": subscriptions,
            # "id": 1,
        }
        await websocket.send(orjson.dumps(subscribe_request).decode("utf-8"))
        log.info(f"Subscribed to: {subscriptions}")
        # Listen for messages
        while True:
            try:
                message = await websocket.recv()
                await mex_handle(message)
            except websockets.exceptions.ConnectionClosed as e:
                log.error(f"WebSocket connection closed: {e}")
                break
 # Entry point
 if __name__ == "__main__":
    try:
        asyncio.run(mex_main())
    except KeyboardInterrupt:
        log.info("RTS process terminated.")
--- a/schemas/mc_s.py
+++ b/schemas/mc_s.py
@@ -129,8 +129,19 @@ schema_main = {
    "version_sentiment": "int",
    # 1, 2
    "version_tokens": "int",
    # en, ru
    "lang_code": "string indexed attribute",
    "lang_name": "text",
    "match_ts": "timestamp",
    "batch_id": "bigint",
    "rule_id": "bigint",
    "index": "string indexed attribute",
    "meta": "text",
    # "iso": "string indexed attribute",
 }
 schema_rule_storage = schema_main
 schema_meta = {
    "id": "bigint",
    # 393598265, #main, Rust Programmer's Club
--- a/sources/ch4.py
+++ b/sources/ch4.py
@@ -10,6 +10,7 @@ from numpy import array_split
 import db
 import util
 from perf.throttle import DynamicThrottle
 # CONFIGURATION #
@@ -25,6 +26,12 @@ CRAWL_DELAY = int(getenv("MONOLITH_CH4_CRAWL_DELAY", 5))
 # Semaphore value ?
 THREADS_SEMAPHORE = int(getenv("MONOLITH_CH4_THREADS_SEMAPHORE", 1000))
 # Target CPU usage percentage
 TARGET_CPU_USAGE = float(getenv("MONOLITH_CH4_TARGET_CPU_USAGE", 50.0))
 # Boards to crawl
 BOARDS = getenv("MONOLITH_CH4_BOARDS", "").split(",")
 # CONFIGURATION END #
@@ -37,6 +44,19 @@ class Chan4(object):
        name = self.__class__.__name__
        self.log = util.get_logger(name)
        self.throttle = DynamicThrottle(
            target_cpu_usage=TARGET_CPU_USAGE,
            sleep_increment=0.01,
            sleep_decrement=0.01,
            sleep_max=0.1,
            sleep_min=0,
            psutil_interval=0.1,
            log=self.log,
            start_increment=False,
            use_async=True,
        )
        self.wait = self.throttle.wait
        self.api_endpoint = "https://a.4cdn.org"
        # self.boards = ["out", "g", "a", "3", "pol"] #
        self.boards = []
@@ -53,12 +73,14 @@ class Chan4(object):
            self.log.debug(f"Created new hash key: {self.hash_key}")
            db.r.set("hashing_key", self.hash_key)
        else:
            self.hash_key = self.hash_key.decode("ascii")
            self.log.debug(f"Decoded hash key: {self.hash_key}")
    async def run(self):
        if "ALL" in BOARDS:
            await self.get_board_list()
        else:
            self.boards = BOARDS
        while True:
            await self.get_thread_lists(self.boards)
            await asyncio.sleep(CRAWL_DELAY)
@@ -71,29 +93,37 @@ class Chan4(object):
            for board in response["boards"]:
                self.boards.append(board["board"])
            self.log.debug(f"Got boards: {self.boards}")
            # await self.dynamic_throttle()
            # TODO
    async def get_thread_lists(self, boards):
        # self.log.debug(f"Getting thread list for {boards}")
-        board_urls = {board: f"{board}/catalog.json" for board in boards}
+        board_urls = {board: f"{board}/threads.json" for board in boards}
        responses = await self.api_call(board_urls)
        to_get = []
        flat_map = [board for board, thread in responses]
-        self.log.debug(f"Got thread list for {flat_map}: {len(responses)}")
+        self.log.debug(f"Got thread list for {len(responses)} boards: {flat_map}")
-        for mapped, response in responses:
+        for board, response in responses:
            if not response:
                continue
            for page in response:
                for threads in page["threads"]:
                    no = threads["no"]
-                    to_get.append((mapped, no))
+                    to_get.append((board, no))
                    # await self.dynamic_throttle()
                    # TODO
        if not to_get:
            return
        self.log.debug(f"Got {len(to_get)} threads to fetch")
        split_threads = array_split(to_get, ceil(len(to_get) / THREADS_CONCURRENT))
-        for threads in split_threads:
+        self.log.debug(f"Split threads into {len(split_threads)} series")
-            await self.get_threads_content(threads)
+        for index, thr in enumerate(split_threads):
            self.log.debug(f"Series {index} - getting {len(thr)} threads")
            await self.get_threads_content(thr)
            # await self.dynamic_throttle()
            # TODO
            await asyncio.sleep(THREADS_DELAY)
        # await self.get_threads_content(to_get)
    def take_items(self, dict_list, n):
        i = 0
@@ -123,6 +153,8 @@ class Chan4(object):
                continue
            board, thread = mapped
            all_posts[mapped] = response["posts"]
            # await self.dynamic_throttle()
            # TODO
        if not all_posts:
            return
@@ -132,14 +164,16 @@ class Chan4(object):
        to_store = []
        for key, post_list in posts.items():
            board, thread = key
-            for index, post in enumerate(post_list):
+            for post in post_list:
-                posts[key][index]["type"] = "msg"
+                post["type"] = "msg"
-                posts[key][index]["src"] = "4ch"
+                post["src"] = "4ch"
-                posts[key][index]["net"] = board
+                post["net"] = board
-                posts[key][index]["channel"] = thread
+                post["channel"] = thread
-                to_store.append(posts[key][index])
+                to_store.append(post)
                # await self.dynamic_throttle()
                # TODO
        if to_store:
            await db.queue_message_bulk(to_store)
@@ -154,6 +188,7 @@ class Chan4(object):
    async def bound_fetch(self, sem, url, session, mapped):
        # Getter function with semaphore.
        async with sem:
            await self.wait()
            try:
                return await self.fetch(url, session, mapped)
            except:  # noqa
--- a/sources/ingest.py
+++ b/sources/ingest.py
@@ -1,8 +1,6 @@
 import asyncio
 from os import getenv
 import orjson
 import db
 import util
 from processing import process
@@ -13,13 +11,22 @@ KEYNAME = "queue"
 CHUNK_SIZE = int(getenv("MONOLITH_INGEST_CHUNK_SIZE", "900"))
 ITER_DELAY = float(getenv("MONOLITH_INGEST_ITER_DELAY", "0.5"))
 INGEST_INCREASE_BELOW = int(getenv("MONOLITH_INGEST_INCREASE_BELOW", "2500"))
 INGEST_DECREASE_ABOVE = int(getenv("MONOLITH_INGEST_DECREASE_ABOVE", "10000"))
 INGEST_INCREASE_BY = int(getenv("MONOLITH_INGEST_INCREASE_BY", "100"))
 INGEST_DECREASE_BY = int(getenv("MONOLITH_INGEST_DECREASE_BY", "100"))
 log = util.get_logger("ingest")
 INGEST_MAX = int(getenv("MONOLITH_INGEST_MAX", "1000000"))
 INGEST_MIN = int(getenv("MONOLITH_INGEST_MIN", "100"))
 class Ingest(object):
    def __init__(self):
        name = self.__class__.__name__
        self.log = util.get_logger(name)
        self.current_chunk = 0
        self.log.info(
            (
                "Starting ingest handler for chunk size of "
@@ -30,17 +37,45 @@ class Ingest(object):
    async def run(self):
        while True:
            await self.get_chunk()
            self.log.debug(f"Ingest chunk {self.current_chunk} complete")
            self.current_chunk += 1
            await asyncio.sleep(ITER_DELAY)
    async def get_chunk(self):
-        items = []
+        global CHUNK_SIZE
-        # for source in SOURCES:
+        length = await db.ar.llen(KEYNAME)
-        # key = f"{KEYPREFIX}{source}"
+        if length > CHUNK_SIZE:
-        chunk = await db.ar.spop(KEYNAME, CHUNK_SIZE)
+            length = CHUNK_SIZE
-        if not chunk:
+        if not length:
            return
-        for item in chunk:
+        ingested = await process.spawn_processing_threads(self.current_chunk, length)
-            item = orjson.loads(item)
+
-            items.append(item)
+        if ingested < INGEST_INCREASE_BELOW:
-        if items:
+            if CHUNK_SIZE + INGEST_INCREASE_BY < INGEST_MAX:
-            await process.spawn_processing_threads(items)
+                self.log.debug(
                    (
                        f"Increasing chunk size to "
                        f"{CHUNK_SIZE + INGEST_INCREASE_BY} "
                        f"due to low ingestion ({ingested})"
                    )
                )
                CHUNK_SIZE += INGEST_INCREASE_BY
            else:
                log.debug(
                    f"Chunk size ({CHUNK_SIZE}) at maximum, not increasing above: {INGEST_MAX}"
                )
        elif ingested > INGEST_DECREASE_ABOVE:
            if CHUNK_SIZE - INGEST_DECREASE_BY > INGEST_MIN:
                self.log.debug(
                    (
                        f"Decreasing chunk size to "
                        f"{CHUNK_SIZE - INGEST_DECREASE_BY}"
                        f"due to high ingestion ({ingested})"
                    )
                )
                CHUNK_SIZE -= INGEST_DECREASE_BY
            else:
                log.debug(
                    f"Chunk size ({CHUNK_SIZE}) at minimum, not decreasing below: {INGEST_MIN}"
                )
--- a/util.py
+++ b/util.py
@@ -43,7 +43,6 @@ class ColoredFormatter(logging.Formatter):
 def get_logger(name):
    # Define the logging format
    FORMAT = "%(asctime)s %(levelname)18s $BOLD%(name)13s$RESET  - %(message)s"
    COLOR_FORMAT = formatter_message(FORMAT, True)
		`@@ -0,0 +1 @@`
							`# Resample 1Min into 5Min, 15Min, 30Min, 1H, 4H, 1D, 1W, 1M, 1Y`