Make performance settings configurable

This commit is contained in:
2022-09-20 18:13:46 +01:00
parent 24929a5fbb
commit 2c5133a546
4 changed files with 47 additions and 7 deletions

View File

@@ -10,19 +10,21 @@ from numpy import array_split
import db
import util
from os import getenv
# CONFIGURATION #
# Number of 4chan threads to request at once
THREADS_CONCURRENT = 1000
THREADS_CONCURRENT = int(getenv("MONOLITH_CH4_THREADS_CONCURRENT", 1000))
# Seconds to wait between every THREADS_CONCURRENT requests
THREADS_DELAY = 0.1
THREADS_DELAY = float(getenv("MONOLITH_CH4_THREADS_DELAY", 0.1))
# Seconds to wait between crawls
CRAWL_DELAY = 5
CRAWL_DELAY = int(getenv("MONOLITH_CH4_CRAWL_DELAY", 5))
# Semaphore value ?
THREADS_SEMAPHORE = 1000
THREADS_SEMAPHORE = int(getenv("MONOLITH_CH4_THREADS_SEMAPHORE", 1000))
# CONFIGURATION END #

View File

@@ -5,13 +5,14 @@ import orjson
import db
import util
from processing import process
from os import getenv
SOURCES = ["4ch", "irc", "dis"]
KEYPREFIX = "queue."
# Chunk size per source (divide by len(SOURCES) for total)
CHUNK_SIZE = 9000
ITER_DELAY = 0.5
CHUNK_SIZE = int(getenv("MONOLITH_INGEST_CHUNK_SIZE", "900"))
ITER_DELAY = float(getenv("MONOLITH_INGEST_ITER_DELAY", "0.5"))
log = util.get_logger("ingest")