Implement indexing into Apache Druid #1

Closed
m wants to merge 263 commits from druid into master
4 changed files with 272 additions and 4 deletions
Showing only changes of commit e092327932 - Show all commits

View File

@ -32,6 +32,7 @@ services:
- MCL=1 - MCL=1
volumes: volumes:
- ./docker/data:/var/lib/manticore - ./docker/data:/var/lib/manticore
- ./docker/manticore.conf:/etc/manticoresearch/manticore.conf
tmp: tmp:

View File

@ -31,6 +31,8 @@ services:
- MCL=1 - MCL=1
volumes: volumes:
- ./docker/data:/var/lib/manticore - ./docker/data:/var/lib/manticore
- ./docker/manticore.conf:/etc/manticoresearch/manticore.conf
tmp: tmp:
image: busybox image: busybox

265
docker/manticore.conf Normal file
View File

@ -0,0 +1,265 @@
#!/bin/sh
ip=`hostname -i|rev|cut -d\ -f 1|rev`
cat << EOF
searchd {
# https://manual.manticoresearch.com/Server_settings/Searchd#access_plain_attrs
# access_plain_attrs = mmap_preread
# https://manual.manticoresearch.com/Server_settings/Searchd#access_blob_attrs
# access_blob_attrs = mmap_preread
# https://manual.manticoresearch.com/Server_settings/Searchd#access_doclists
# access_doclists = file
# https://manual.manticoresearch.com/Server_settings/Searchd#access_hitlists
# access_hitlists = file
# https://manual.manticoresearch.com/Server_settings/Searchd#agent_connect_timeout
# agent_connect_timeout =
# https://manual.manticoresearch.com/Server_settings/Searchd#agent_query_timeout
# agent_query_timeout =
# https://manual.manticoresearch.com/Server_settings/Searchd#agent_retry_count
# agent_retry_count = 0
# https://manual.manticoresearch.com/Server_settings/Searchd#agent_retry_delay
# agent_retry_delay = 500
# https://manual.manticoresearch.com/Server_settings/Searchd#attr_flush_period
# attr_flush_period = 0
# https://manual.manticoresearch.com/Server_settings/Searchd#binlog_flush
# binlog_flush = 2
# https://manual.manticoresearch.com/Server_settings/Searchd#binlog_max_log_size
# binlog_max_log_size = 268435456
# https://manual.manticoresearch.com/Server_settings/Searchd#binlog_path
# binlog_path =
# https://manual.manticoresearch.com/Server_settings/Searchd#client_timeout
# client_timeout = 300
# https://manual.manticoresearch.com/Server_settings/Searchd#collation_libc_locale
# collation_libc_locale = C
# https://manual.manticoresearch.com/Server_settings/Searchd#collation_server
# collation_server = libc_ci
# https://manual.manticoresearch.com/Server_settings/Searchd#data_dir
data_dir = /var/lib/manticore
# https://manual.manticoresearch.com/Server_settings/Searchd#docstore_cache_size
# docstore_cache_size = 16m
# https://manual.manticoresearch.com/Server_settings/Searchd#expansion_limit
# expansion_limit = 0
# https://manual.manticoresearch.com/Server_settings/Searchd#grouping_in_utc
# grouping_in_utc = 0
# https://manual.manticoresearch.com/Server_settings/Searchd#ha_period_karma
# ha_period_karma = 60
# https://manual.manticoresearch.com/Server_settings/Searchd#ha_ping_interval
# ha_ping_interval = 1000
# https://manual.manticoresearch.com/Server_settings/Searchd#hostname_lookup
# hostname_lookup =
# https://manual.manticoresearch.com/Server_settings/Searchd#jobs_queue_size
# jobs_queue_size =
# https://manual.manticoresearch.com/Server_settings/Searchd#listen_backlog
# listen_backlog = 5
# https://manual.manticoresearch.com/Server_settings/Searchd#listen
# listen_env = this directive allows to append listeners from environment variables
listen = 9306:mysql41
listen = /var/run/mysqld/mysqld.sock:mysql41
listen = $ip:9312
listen = 9308:http
listen = $ip:9315-9325:replication
# https://manual.manticoresearch.com/Server_settings/Searchd#listen_tfo
# listen_tfo = 0
# https://manual.manticoresearch.com/Server_settings/Searchd#log
log = /var/log/manticore/searchd.log
# https://manual.manticoresearch.com/Server_settings/Searchd#max_batch_queries
# max_batch_queries = 32
# https://manual.manticoresearch.com/Server_settings/Searchd#threads
# threads =
# https://manual.manticoresearch.com/Server_settings/Searchd#max_filters
# max_filters = 256
# https://manual.manticoresearch.com/Server_settings/Searchd#max_filter_values
# max_filter_values = 4096
# https://manual.manticoresearch.com/Server_settings/Searchd#max_open_files
max_open_files = max
# https://manual.manticoresearch.com/Server_settings/Searchd#max_packet_size
max_packet_size = 128M
# https://manual.manticoresearch.com/Server_settings/Searchd#mysql_version_string
# mysql_version_string =
# https://manual.manticoresearch.com/Server_settings/Searchd#net_workers
# net_workers = 1
# https://manual.manticoresearch.com/Server_settings/Searchd#net_wait_tm
# net_wait_tm = -1
# https://manual.manticoresearch.com/Server_settings/Searchd#net_throttle_accept
# net_throttle_accept = 0
# https://manual.manticoresearch.com/Server_settings/Searchd#net_throttle_action
# net_throttle_action = 0
# https://manual.manticoresearch.com/Server_settings/Searchd#node_address
# node_address =
# https://manual.manticoresearch.com/Server_settings/Searchd#ondisk_attrs_default
# ondisk_attrs_default = 0
# https://manual.manticoresearch.com/Server_settings/Searchd#persistent_connections_limit
# persistent_connections_limit =
# https://manual.manticoresearch.com/Server_settings/Searchd#pid_file
pid_file = /var/run/manticore/searchd.pid
# https://manual.manticoresearch.com/Server_settings/Searchd#predicted_time_costs
# predicted_time_costs = doc=64, hit=48, skip=2048, match=64
# https://manual.manticoresearch.com/Server_settings/Searchd#preopen_indexes
# preopen_indexes = 1
# https://manual.manticoresearch.com/Server_settings/Searchd#qcache_max_bytes
qcache_max_bytes = 2048Mb
# https://manual.manticoresearch.com/Server_settings/Searchd#qcache_thresh_msec
qcache_thresh_msec = 100
# https://manual.manticoresearch.com/Server_settings/Searchd#qcache_ttl_sec
qcache_ttl_sec = 120
# https://manual.manticoresearch.com/Server_settings/Searchd#query_log_format
query_log_format = sphinxql
# https://manual.manticoresearch.com/Server_settings/Searchd#query_log_min_msec
# query_log_min_msec = 0
# https://manual.manticoresearch.com/Server_settings/Searchd#query_log
# query_log = /var/log/manticore/query.log
# https://manual.manticoresearch.com/Server_settings/Searchd#query_log_mode
# query_log_mode = 600
# https://manual.manticoresearch.com/Server_settings/Searchd#max_connections
# max_connections =
# https://manual.manticoresearch.com/Server_settings/Searchd#network_timeout
# network_timeout = 5
# https://manual.manticoresearch.com/Server_settings/Searchd#read_buffer
# read_buffer = 256K
# https://manual.manticoresearch.com/Server_settings/Searchd#read_buffer_docs
# read_buffer_docs = 256K
# https://manual.manticoresearch.com/Server_settings/Searchd#read_buffer_hits
# read_buffer_hits = 256K
# https://manual.manticoresearch.com/Server_settings/Searchd#read_unhinted
# read_unhinted 32K
# https://manual.manticoresearch.com/Server_settings/Searchd#rt_flush_period
# rt_flush_period =
# https://manual.manticoresearch.com/Server_settings/Searchd#rt_merge_iops
# rt_merge_iops = 0
# https://manual.manticoresearch.com/Server_settings/Searchd#rt_merge_maxiosize
# rt_merge_maxiosize = 0
# https://manual.manticoresearch.com/Server_settings/Searchd#seamless_rotate
# seamless_rotate = 1
# https://manual.manticoresearch.com/Server_settings/Searchd#server_id
# server_id =
# https://manual.manticoresearch.com/Server_settings/Searchd#shutdown_timeout
# shutdown_timeout = 3
# https://manual.manticoresearch.com/Server_settings/Searchd#shutdown_token
# shutdown_token =
# https://manual.manticoresearch.com/Server_settings/Searchd#snippets_file_prefix
# snippets_file_prefix =
# https://manual.manticoresearch.com/Server_settings/Searchd#sphinxql_state
# sphinxql_state =
# https://manual.manticoresearch.com/Server_settings/Searchd#sphinxql_timeout
# sphinxql_timeout = 900
# https://manual.manticoresearch.com/Server_settings/Searchd#ssl_ca
# ssl_ca =
# https://manual.manticoresearch.com/Server_settings/Searchd#ssl_cert
# ssl_cert =
# https://manual.manticoresearch.com/Server_settings/Searchd#ssl_key
# ssl_key =
# https://manual.manticoresearch.com/Server_settings/Searchd#subtree_docs_cache
# subtree_docs_cache = 0
# https://manual.manticoresearch.com/Server_settings/Searchd#subtree_hits_cache
# subtree_hits_cache = 0
# https://manual.manticoresearch.com/Server_settings/Searchd#thread_stack
# thread_stack =
# https://manual.manticoresearch.com/Server_settings/Searchd#unlink_old
# unlink_old = 1
# https://manual.manticoresearch.com/Server_settings/Searchd#watchdog
# watchdog = 1
}
common {
# https://manual.manticoresearch.com/Server_settings/Common#lemmatizer_base
# lemmatizer_base = /usr/local/share
# https://manual.manticoresearch.com/Server_settings/Common#progressive_merge
# progressive_merge =
# https://manual.manticoresearch.com/Server_settings/Common#json_autoconv_keynames
# json_autoconv_keynames =
# https://manual.manticoresearch.com/Server_settings/Common#json_autoconv_numbers
# json_autoconv_numbers = 0
# https://manual.manticoresearch.com/Server_settings/Common#on_json_attr_error
# on_json_attr_error = ignore_attr
# https://manual.manticoresearch.com/Server_settings/Common#plugin_dir
# plugin_dir =
}
indexer {
lemmatizer_cache = 1024M
max_iops = 0
max_iosize = 0
mem_limit = 1024M
}
EOF

View File

@ -58,8 +58,8 @@ schema = {
"filename": "text", "filename": "text",
# Confederate # Confederate
"flag_name": "string indexed attribute", "flag_name": "string indexed attribute",
#"guild": "text", # LEGACY -> channel # "guild": "text", # LEGACY -> channel
#"guild_id": "string indexed attribute", # LEGACY -> channel_id # "guild_id": "string indexed attribute", # LEGACY -> channel_id
# 36180 # 36180
"guild_member_count": "int", # ? -> channel_member_count "guild_member_count": "int", # ? -> channel_member_count
# 9f7b2e6a0e9b # 9f7b2e6a0e9b
@ -112,7 +112,7 @@ schema = {
"tag": "string indexed attribute", "tag": "string indexed attribute",
# 100 # 100
"tail_size": "int", "tail_size": "int",
#"time": "timestamp", # LEGACY -> ts # "time": "timestamp", # LEGACY -> ts
"tokens": "text", # ??? "tokens": "text", # ???
# 2022-09-02T16:10:36 # 2022-09-02T16:10:36
"ts": "timestamp", "ts": "timestamp",
@ -124,7 +124,7 @@ schema = {
"unix_time": "string indexed attribute", "unix_time": "string indexed attribute",
# Anonymous # Anonymous
"user": "text", "user": "text",
#"user_id": "string indexed attribute", # LEGACY -> nick_id # "user_id": "string indexed attribute", # LEGACY -> nick_id
# 1, 2 # 1, 2
"version_sentiment": "int", "version_sentiment": "int",
# 1, 2 # 1, 2