Stringify tokens and return message number from processing

This commit is contained in:
Mark Veidemanis 2025-01-23 11:29:07 +00:00
parent ba8c33d8fc
commit 1aeadaf3b7
Signed by: m
GPG Key ID: 5ACFCEED46C0904F

View File

@ -129,6 +129,7 @@ async def spawn_processing_threads(chunk, length):
# Join the results back from the split list # Join the results back from the split list
flat_list = [item for sublist in results for item in sublist] flat_list = [item for sublist in results for item in sublist]
total_messages = len(flat_list)
log.debug( log.debug(
( (
f"[{chunk}/{index}] Results from processing of {length} messages in " f"[{chunk}/{index}] Results from processing of {length} messages in "
@ -136,6 +137,7 @@ async def spawn_processing_threads(chunk, length):
) )
) )
await db.store_batch(flat_list) await db.store_batch(flat_list)
return total_messages
# log.debug(f"Finished processing {len_data} messages") # log.debug(f"Finished processing {len_data} messages")
@ -274,7 +276,7 @@ def process_data(chunk, index, chunk_size):
# Tokens # Tokens
start = time.process_time() start = time.process_time()
tokens = preprocess_string(msg["msg"], CUSTOM_FILTERS) tokens = preprocess_string(msg["msg"], CUSTOM_FILTERS)
msg["tokens"] = tokens msg["tokens"] = str(tokens)
# n = nlp(msg["msg"]) # n = nlp(msg["msg"])
# for tag in TAGS: # for tag in TAGS:
# tag_name = tag.lower() # tag_name = tag.lower()