From 1aeadaf3b70a333f9876d3622d4e597fb274c77b Mon Sep 17 00:00:00 2001 From: Mark Veidemanis Date: Thu, 23 Jan 2025 11:29:07 +0000 Subject: [PATCH] Stringify tokens and return message number from processing --- processing/process.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/processing/process.py b/processing/process.py index 846f882..5443da8 100644 --- a/processing/process.py +++ b/processing/process.py @@ -129,6 +129,7 @@ async def spawn_processing_threads(chunk, length): # Join the results back from the split list flat_list = [item for sublist in results for item in sublist] + total_messages = len(flat_list) log.debug( ( f"[{chunk}/{index}] Results from processing of {length} messages in " @@ -136,6 +137,7 @@ async def spawn_processing_threads(chunk, length): ) ) await db.store_batch(flat_list) + return total_messages # log.debug(f"Finished processing {len_data} messages") @@ -274,7 +276,7 @@ def process_data(chunk, index, chunk_size): # Tokens start = time.process_time() tokens = preprocess_string(msg["msg"], CUSTOM_FILTERS) - msg["tokens"] = tokens + msg["tokens"] = str(tokens) # n = nlp(msg["msg"]) # for tag in TAGS: # tag_name = tag.lower()