monolith/sources/ingest.py

41 lines
931 B
Python
Raw Normal View History

2022-09-07 06:20:30 +00:00
import asyncio
import orjson
2022-09-07 06:20:30 +00:00
import db
import util
from processing import process
SOURCES = ["4ch", "irc", "dis"]
KEYPREFIX = "queue."
# Chunk size per source (divide by len(SOURCES) for total)
CHUNK_SIZE = 9000
ITER_DELAY = 0.5
log = util.get_logger("ingest")
2022-09-07 06:20:30 +00:00
class Ingest(object):
def __init__(self):
name = self.__class__.__name__
self.log = util.get_logger(name)
async def run(self):
while True:
await self.get_chunk()
await asyncio.sleep(ITER_DELAY)
async def get_chunk(self):
items = []
for source in SOURCES:
key = f"{KEYPREFIX}{source}"
chunk = await db.ar.spop(key, CHUNK_SIZE)
if not chunk:
continue
for item in chunk:
item = orjson.loads(item)
items.append(item)
if items:
await process.spawn_processing_threads(items)