Use only one Redis key for the queue to make chunk size more precise for thread allocation

This commit is contained in:
2022-09-30 07:22:22 +01:00
parent 5992498493
commit 63081f68b7
7 changed files with 25 additions and 26 deletions

View File

@@ -7,12 +7,13 @@ import db
import util
from processing import process
SOURCES = ["4ch", "irc", "dis"]
# SOURCES = ["4ch", "irc", "dis"]
# DEBUG CODE REMOVE ME
# SOURCES.remove("4ch")
# SOURCES.remove("dis")
# DEBUG CODE REMOVE ME
KEYPREFIX = "queue."
# KEYPREFIX = "queue."
KEYNAME = "queue"
# Chunk size per source (divide by len(SOURCES) for total)
CHUNK_SIZE = int(getenv("MONOLITH_INGEST_CHUNK_SIZE", "900"))
@@ -39,13 +40,13 @@ class Ingest(object):
async def get_chunk(self):
items = []
for source in SOURCES:
key = f"{KEYPREFIX}{source}"
chunk = await db.ar.spop(key, CHUNK_SIZE)
if not chunk:
continue
for item in chunk:
item = orjson.loads(item)
items.append(item)
# for source in SOURCES:
# key = f"{KEYPREFIX}{source}"
chunk = await db.ar.spop(KEYNAME, CHUNK_SIZE)
if not chunk:
return
for item in chunk:
item = orjson.loads(item)
items.append(item)
if items:
await process.spawn_processing_threads(items)