Implement indexing into Apache Druid #1
|
@ -58,8 +58,8 @@ schema = {
|
||||||
"filename": "text",
|
"filename": "text",
|
||||||
# Confederate
|
# Confederate
|
||||||
"flag_name": "string indexed attribute",
|
"flag_name": "string indexed attribute",
|
||||||
"guild": "text", # LEGACY -> channel
|
#"guild": "text", # LEGACY -> channel
|
||||||
"guild_id": "string indexed attribute", # LEGACY -> channel_id
|
#"guild_id": "string indexed attribute", # LEGACY -> channel_id
|
||||||
# 36180
|
# 36180
|
||||||
"guild_member_count": "int", # ? -> channel_member_count
|
"guild_member_count": "int", # ? -> channel_member_count
|
||||||
# 9f7b2e6a0e9b
|
# 9f7b2e6a0e9b
|
||||||
|
@ -112,7 +112,7 @@ schema = {
|
||||||
"tag": "string indexed attribute",
|
"tag": "string indexed attribute",
|
||||||
# 100
|
# 100
|
||||||
"tail_size": "int",
|
"tail_size": "int",
|
||||||
"time": "timestamp", # LEGACY -> ts
|
#"time": "timestamp", # LEGACY -> ts
|
||||||
"tokens": "text", # ???
|
"tokens": "text", # ???
|
||||||
# 2022-09-02T16:10:36
|
# 2022-09-02T16:10:36
|
||||||
"ts": "timestamp",
|
"ts": "timestamp",
|
||||||
|
@ -124,7 +124,7 @@ schema = {
|
||||||
"unix_time": "string indexed attribute",
|
"unix_time": "string indexed attribute",
|
||||||
# Anonymous
|
# Anonymous
|
||||||
"user": "text",
|
"user": "text",
|
||||||
"user_id": "string indexed attribute", # LEGACY -> nick_id
|
#"user_id": "string indexed attribute", # LEGACY -> nick_id
|
||||||
# 1, 2
|
# 1, 2
|
||||||
"version_sentiment": "int",
|
"version_sentiment": "int",
|
||||||
# 1, 2
|
# 1, 2
|
||||||
|
|
|
@ -19,7 +19,7 @@ from schemas.ch4_s import ATTRMAP
|
||||||
# CONFIGURATION #
|
# CONFIGURATION #
|
||||||
|
|
||||||
# Number of 4chan threads to request at once
|
# Number of 4chan threads to request at once
|
||||||
THREADS_CONCURRENT = 1000
|
THREADS_CONCURRENT = 100
|
||||||
|
|
||||||
# Seconds to wait between every THREADS_CONCURRENT requests
|
# Seconds to wait between every THREADS_CONCURRENT requests
|
||||||
THREADS_DELAY = 0.1
|
THREADS_DELAY = 0.1
|
||||||
|
@ -31,7 +31,7 @@ CRAWL_DELAY = 5
|
||||||
THREADS_SEMAPHORE = 100
|
THREADS_SEMAPHORE = 100
|
||||||
|
|
||||||
# Maximum number of CPU threads to use for post processing
|
# Maximum number of CPU threads to use for post processing
|
||||||
CPU_THREADS = 2
|
CPU_THREADS = 1
|
||||||
|
|
||||||
# CONFIGURATION END #
|
# CONFIGURATION END #
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue