Implement indexing into Apache Druid #1
|
@ -58,8 +58,8 @@ schema = {
|
|||
"filename": "text",
|
||||
# Confederate
|
||||
"flag_name": "string indexed attribute",
|
||||
"guild": "text", # LEGACY -> channel
|
||||
"guild_id": "string indexed attribute", # LEGACY -> channel_id
|
||||
#"guild": "text", # LEGACY -> channel
|
||||
#"guild_id": "string indexed attribute", # LEGACY -> channel_id
|
||||
# 36180
|
||||
"guild_member_count": "int", # ? -> channel_member_count
|
||||
# 9f7b2e6a0e9b
|
||||
|
@ -112,7 +112,7 @@ schema = {
|
|||
"tag": "string indexed attribute",
|
||||
# 100
|
||||
"tail_size": "int",
|
||||
"time": "timestamp", # LEGACY -> ts
|
||||
#"time": "timestamp", # LEGACY -> ts
|
||||
"tokens": "text", # ???
|
||||
# 2022-09-02T16:10:36
|
||||
"ts": "timestamp",
|
||||
|
@ -124,7 +124,7 @@ schema = {
|
|||
"unix_time": "string indexed attribute",
|
||||
# Anonymous
|
||||
"user": "text",
|
||||
"user_id": "string indexed attribute", # LEGACY -> nick_id
|
||||
#"user_id": "string indexed attribute", # LEGACY -> nick_id
|
||||
# 1, 2
|
||||
"version_sentiment": "int",
|
||||
# 1, 2
|
||||
|
|
|
@ -19,7 +19,7 @@ from schemas.ch4_s import ATTRMAP
|
|||
# CONFIGURATION #
|
||||
|
||||
# Number of 4chan threads to request at once
|
||||
THREADS_CONCURRENT = 1000
|
||||
THREADS_CONCURRENT = 100
|
||||
|
||||
# Seconds to wait between every THREADS_CONCURRENT requests
|
||||
THREADS_DELAY = 0.1
|
||||
|
@ -31,7 +31,7 @@ CRAWL_DELAY = 5
|
|||
THREADS_SEMAPHORE = 100
|
||||
|
||||
# Maximum number of CPU threads to use for post processing
|
||||
CPU_THREADS = 2
|
||||
CPU_THREADS = 1
|
||||
|
||||
# CONFIGURATION END #
|
||||
|
||||
|
|
Loading…
Reference in New Issue