Ingest into Kafka and queue messages better

This commit is contained in:
2022-09-13 22:17:46 +01:00
parent 47c5f89914
commit c5f01c3084
7 changed files with 366 additions and 157 deletions

View File

@@ -5,12 +5,17 @@ import ujson
import db
import util
SOURCES = ["irc"]
from processing import process
SOURCES = ["irc", "dis", "4ch"]
KEYPREFIX = "queue."
CHUNK_SIZE = 1000
ITER_DELAY = 0.5
class Ingest(object):
def __init__(self):
name = self.__class__.__name__
@@ -18,19 +23,23 @@ class Ingest(object):
async def run(self):
while True:
await self.process_chunk()
await self.get_chunk()
await asyncio.sleep(ITER_DELAY)
async def process_chunk(self):
async def get_chunk(self):
items = []
for source in SOURCES:
key = f"{KEYPREFIX}{source}"
chunk = await db.ar.spop(key, CHUNK_SIZE)
if not chunk:
continue
self.log.info(f"Got chunk: {chunk}")
#self.log.info(f"Got chunk: {chunk}")
for item in chunk:
item = ujson.loads(item)
self.log.info(f"Got item: {item}")
#self.log.info(f"Got item: {item}")
items.append(item)
db.store_message_bulk(items)
if items:
print("PROCESSING", len(items))
await process.spawn_processing_threads(items)
print("DONE WITH PROCESSING", len(items))
await db.store_kafka_batch(items)