diff --git a/docker-compose.yml b/docker-compose.yml index 27364a3..3aea394 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -79,7 +79,7 @@ services: container_name: metabase image: metabase/metabase:latest ports: - - 3001:3000 + - 3096:3000 depends_on: - broker diff --git a/env.example b/env.example new file mode 100644 index 0000000..4db5053 --- /dev/null +++ b/env.example @@ -0,0 +1,37 @@ +PORTAINER_GIT_DIR=. +MODULES_ENABLED="dis" +DISCORD_TOKEN="xx" +THRESHOLD_LISTENER_HOST=0.0.0.0 +THRESHOLD_LISTENER_PORT=13867 +THRESHOLD_LISTENER_SSL=1 + +THRESHOLD_RELAY_ENABLED=0 +THRESHOLD_RELAY_HOST=0.0.0.0 +THRESHOLD_RELAY_PORT=13868 +THRESHOLD_RELAY_SSL=1 + +THRESHOLD_API_ENABLED=1 +THRESHOLD_API_HOST=0.0.0.0 +THRESHOLD_API_PORT=13869 +PORTAINER_GIT_DIR=. + +THRESHOLD_CONFIG_DIR=./legacy/conf/live/ +THRESHOLD_CERT_DIR=./legacy/conf/cert/ + +# How many messages to ingest at once from Redis +MONOLITH_INGEST_CHUNK_SIZE=900 + +# Time to wait between polling Redis again +MONOLITH_INGEST_ITER_DELAY=0.5 + +# Number of 4chan threads to request at once +MONOLITH_CH4_THREADS_CONCURRENT=1000 + +# Time to wait between every MONOLITH_CH4_THREADS_CONCURRENT threads +MONOLITH_CH4_THREADS_DELAY=0.1 + +# Time to wait after finishing a crawl before starting again +MONOLITH_CH4_CRAWL_DELAY=30 + +# Semaphore value +MONOLITH_CH4_THREADS_SEMAPHORE=1000 \ No newline at end of file diff --git a/sources/ch4.py b/sources/ch4.py index 4ece35f..5c3d6be 100644 --- a/sources/ch4.py +++ b/sources/ch4.py @@ -10,19 +10,21 @@ from numpy import array_split import db import util +from os import getenv + # CONFIGURATION # # Number of 4chan threads to request at once -THREADS_CONCURRENT = 1000 +THREADS_CONCURRENT = int(getenv("MONOLITH_CH4_THREADS_CONCURRENT", 1000)) # Seconds to wait between every THREADS_CONCURRENT requests -THREADS_DELAY = 0.1 +THREADS_DELAY = float(getenv("MONOLITH_CH4_THREADS_DELAY", 0.1)) # Seconds to wait between crawls -CRAWL_DELAY = 5 +CRAWL_DELAY = int(getenv("MONOLITH_CH4_CRAWL_DELAY", 5)) # Semaphore value ? -THREADS_SEMAPHORE = 1000 +THREADS_SEMAPHORE = int(getenv("MONOLITH_CH4_THREADS_SEMAPHORE", 1000)) # CONFIGURATION END # diff --git a/sources/ingest.py b/sources/ingest.py index 017b8db..6b9f0a1 100644 --- a/sources/ingest.py +++ b/sources/ingest.py @@ -5,13 +5,14 @@ import orjson import db import util from processing import process +from os import getenv SOURCES = ["4ch", "irc", "dis"] KEYPREFIX = "queue." # Chunk size per source (divide by len(SOURCES) for total) -CHUNK_SIZE = 9000 -ITER_DELAY = 0.5 +CHUNK_SIZE = int(getenv("MONOLITH_INGEST_CHUNK_SIZE", "900")) +ITER_DELAY = float(getenv("MONOLITH_INGEST_ITER_DELAY", "0.5")) log = util.get_logger("ingest")