Stringify tokens and return message number from processing

2025-01-23 11:29:07 +00:00
parent ba8c33d8fc
commit 1aeadaf3b7
1 changed files with 3 additions and 1 deletions
--- a/processing/process.py
+++ b/processing/process.py
@@ -129,6 +129,7 @@ async def spawn_processing_threads(chunk, length):

    # Join the results back from the split list
    flat_list = [item for sublist in results for item in sublist]
+    total_messages = len(flat_list)
    log.debug(
        (
            f"[{chunk}/{index}] Results from processing of {length} messages in "
@@ -136,6 +137,7 @@ async def spawn_processing_threads(chunk, length):
        )
    )
    await db.store_batch(flat_list)
+    return total_messages

    # log.debug(f"Finished processing {len_data} messages")

@@ -274,7 +276,7 @@ def process_data(chunk, index, chunk_size):
            # Tokens
            start = time.process_time()
            tokens = preprocess_string(msg["msg"], CUSTOM_FILTERS)
-            msg["tokens"] = tokens
+            msg["tokens"] = str(tokens)
            # n = nlp(msg["msg"])
            # for tag in TAGS:
            #     tag_name = tag.lower()