You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

46 lines
1.1 KiB
Python

import asyncio
import ujson
import db
import util
from processing import process
SOURCES = ["irc", "dis", "4ch"]
KEYPREFIX = "queue."
CHUNK_SIZE = 1000
ITER_DELAY = 0.5
class Ingest(object):
def __init__(self):
name = self.__class__.__name__
self.log = util.get_logger(name)
async def run(self):
while True:
await self.get_chunk()
await asyncio.sleep(ITER_DELAY)
async def get_chunk(self):
items = []
for source in SOURCES:
key = f"{KEYPREFIX}{source}"
chunk = await db.ar.spop(key, CHUNK_SIZE)
if not chunk:
continue
#self.log.info(f"Got chunk: {chunk}")
for item in chunk:
item = ujson.loads(item)
#self.log.info(f"Got item: {item}")
items.append(item)
if items:
print("PROCESSING", len(items))
await process.spawn_processing_threads(items)
print("DONE WITH PROCESSING", len(items))
await db.store_kafka_batch(items)