Use only one Redis key for the queue to make chunk size more precise for thread allocation

2022-09-30 07:22:22 +01:00
parent a5d29606e9
commit 02ff44a6f5
7 changed files with 25 additions and 26 deletions
--- a/sources/ingest.py
+++ b/sources/ingest.py
@@ -7,12 +7,13 @@ import db
 import util
 from processing import process

-SOURCES = ["4ch", "irc", "dis"]
+# SOURCES = ["4ch", "irc", "dis"]
 # DEBUG CODE REMOVE ME
 # SOURCES.remove("4ch")
 # SOURCES.remove("dis")
 # DEBUG CODE REMOVE ME
-KEYPREFIX = "queue."
+# KEYPREFIX = "queue."
+KEYNAME = "queue"

 # Chunk size per source (divide by len(SOURCES) for total)
 CHUNK_SIZE = int(getenv("MONOLITH_INGEST_CHUNK_SIZE", "900"))
@@ -39,13 +40,13 @@ class Ingest(object):

    async def get_chunk(self):
        items = []
-        for source in SOURCES:
-            key = f"{KEYPREFIX}{source}"
-            chunk = await db.ar.spop(key, CHUNK_SIZE)
-            if not chunk:
-                continue
-            for item in chunk:
-                item = orjson.loads(item)
-                items.append(item)
+        # for source in SOURCES:
+        # key = f"{KEYPREFIX}{source}"
+        chunk = await db.ar.spop(KEYNAME, CHUNK_SIZE)
+        if not chunk:
+            return
+        for item in chunk:
+            item = orjson.loads(item)
+            items.append(item)
        if items:
            await process.spawn_processing_threads(items)