From 1858e06c4b69b0dae500cf33e163dfd2ebcc21ca Mon Sep 17 00:00:00 2001 From: Mark Veidemanis Date: Mon, 5 Sep 2022 07:20:30 +0100 Subject: [PATCH] Alter schemas and 4chan performance settings --- schemas/mc_s.py | 8 ++++---- sources/ch4.py | 4 ++-- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/schemas/mc_s.py b/schemas/mc_s.py index b1e469a..36401ef 100644 --- a/schemas/mc_s.py +++ b/schemas/mc_s.py @@ -58,8 +58,8 @@ schema = { "filename": "text", # Confederate "flag_name": "string indexed attribute", - "guild": "text", # LEGACY -> channel - "guild_id": "string indexed attribute", # LEGACY -> channel_id + #"guild": "text", # LEGACY -> channel + #"guild_id": "string indexed attribute", # LEGACY -> channel_id # 36180 "guild_member_count": "int", # ? -> channel_member_count # 9f7b2e6a0e9b @@ -112,7 +112,7 @@ schema = { "tag": "string indexed attribute", # 100 "tail_size": "int", - "time": "timestamp", # LEGACY -> ts + #"time": "timestamp", # LEGACY -> ts "tokens": "text", # ??? # 2022-09-02T16:10:36 "ts": "timestamp", @@ -124,7 +124,7 @@ schema = { "unix_time": "string indexed attribute", # Anonymous "user": "text", - "user_id": "string indexed attribute", # LEGACY -> nick_id + #"user_id": "string indexed attribute", # LEGACY -> nick_id # 1, 2 "version_sentiment": "int", # 1, 2 diff --git a/sources/ch4.py b/sources/ch4.py index dc05a6f..3cb68d5 100644 --- a/sources/ch4.py +++ b/sources/ch4.py @@ -19,7 +19,7 @@ from schemas.ch4_s import ATTRMAP # CONFIGURATION # # Number of 4chan threads to request at once -THREADS_CONCURRENT = 1000 +THREADS_CONCURRENT = 100 # Seconds to wait between every THREADS_CONCURRENT requests THREADS_DELAY = 0.1 @@ -31,7 +31,7 @@ CRAWL_DELAY = 5 THREADS_SEMAPHORE = 100 # Maximum number of CPU threads to use for post processing -CPU_THREADS = 2 +CPU_THREADS = 1 # CONFIGURATION END #