Stringify tokens and return message number from processing
This commit is contained in:
parent
ba8c33d8fc
commit
1aeadaf3b7
@ -129,6 +129,7 @@ async def spawn_processing_threads(chunk, length):
|
|||||||
|
|
||||||
# Join the results back from the split list
|
# Join the results back from the split list
|
||||||
flat_list = [item for sublist in results for item in sublist]
|
flat_list = [item for sublist in results for item in sublist]
|
||||||
|
total_messages = len(flat_list)
|
||||||
log.debug(
|
log.debug(
|
||||||
(
|
(
|
||||||
f"[{chunk}/{index}] Results from processing of {length} messages in "
|
f"[{chunk}/{index}] Results from processing of {length} messages in "
|
||||||
@ -136,6 +137,7 @@ async def spawn_processing_threads(chunk, length):
|
|||||||
)
|
)
|
||||||
)
|
)
|
||||||
await db.store_batch(flat_list)
|
await db.store_batch(flat_list)
|
||||||
|
return total_messages
|
||||||
|
|
||||||
# log.debug(f"Finished processing {len_data} messages")
|
# log.debug(f"Finished processing {len_data} messages")
|
||||||
|
|
||||||
@ -274,7 +276,7 @@ def process_data(chunk, index, chunk_size):
|
|||||||
# Tokens
|
# Tokens
|
||||||
start = time.process_time()
|
start = time.process_time()
|
||||||
tokens = preprocess_string(msg["msg"], CUSTOM_FILTERS)
|
tokens = preprocess_string(msg["msg"], CUSTOM_FILTERS)
|
||||||
msg["tokens"] = tokens
|
msg["tokens"] = str(tokens)
|
||||||
# n = nlp(msg["msg"])
|
# n = nlp(msg["msg"])
|
||||||
# for tag in TAGS:
|
# for tag in TAGS:
|
||||||
# tag_name = tag.lower()
|
# tag_name = tag.lower()
|
||||||
|
Loading…
Reference in New Issue
Block a user