Make crawler more efficient and implement configurable parameters

This commit is contained in:
2022-09-05 07:20:30 +01:00
parent f8fc5e1a1b
commit b8d2ecc009
3 changed files with 71 additions and 21 deletions

2
db.py
View File

@@ -55,6 +55,8 @@ def store_message_bulk(data):
print("BULK", len(data))
if not data:
return
# 10000: maximum inserts we can submit to
# Manticore as of Sept 2022
split_posts = array_split(data, ceil(len(data) / 10000))
for messages in split_posts:
print("PROCESSING SPLIT OF", len(messages), "MESSAGES")