Ingest into Kafka and queue messages better
This commit is contained in:
@@ -5,12 +5,17 @@ import ujson
|
||||
import db
|
||||
import util
|
||||
|
||||
SOURCES = ["irc"]
|
||||
from processing import process
|
||||
|
||||
SOURCES = ["irc", "dis", "4ch"]
|
||||
KEYPREFIX = "queue."
|
||||
CHUNK_SIZE = 1000
|
||||
ITER_DELAY = 0.5
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
class Ingest(object):
|
||||
def __init__(self):
|
||||
name = self.__class__.__name__
|
||||
@@ -18,19 +23,23 @@ class Ingest(object):
|
||||
|
||||
async def run(self):
|
||||
while True:
|
||||
await self.process_chunk()
|
||||
await self.get_chunk()
|
||||
await asyncio.sleep(ITER_DELAY)
|
||||
|
||||
async def process_chunk(self):
|
||||
async def get_chunk(self):
|
||||
items = []
|
||||
for source in SOURCES:
|
||||
key = f"{KEYPREFIX}{source}"
|
||||
chunk = await db.ar.spop(key, CHUNK_SIZE)
|
||||
if not chunk:
|
||||
continue
|
||||
self.log.info(f"Got chunk: {chunk}")
|
||||
#self.log.info(f"Got chunk: {chunk}")
|
||||
for item in chunk:
|
||||
item = ujson.loads(item)
|
||||
self.log.info(f"Got item: {item}")
|
||||
#self.log.info(f"Got item: {item}")
|
||||
items.append(item)
|
||||
db.store_message_bulk(items)
|
||||
if items:
|
||||
print("PROCESSING", len(items))
|
||||
await process.spawn_processing_threads(items)
|
||||
print("DONE WITH PROCESSING", len(items))
|
||||
await db.store_kafka_batch(items)
|
||||
|
||||
Reference in New Issue
Block a user