Compare commits
308 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
808ed18b74
|
|||
|
34e589aa9c
|
|||
|
cc6340acab
|
|||
|
7b73229d5a
|
|||
|
35ba2cc947
|
|||
|
817bfd8835
|
|||
|
40cf0c6430
|
|||
|
63081f68b7
|
|||
|
5992498493
|
|||
|
328db4a6da
|
|||
|
c5c834da82
|
|||
|
a8dbabd85e
|
|||
|
56b5c85fac
|
|||
|
fc7450c33a
|
|||
|
0e9a016e2a
|
|||
|
763501d1ee
|
|||
|
40a215e6ec
|
|||
|
7abf9a00cb
|
|||
|
bd3f1ecd53
|
|||
|
64ebcedd76
|
|||
|
3d293daad3
|
|||
|
00890860c0
|
|||
|
b0efaeef90
|
|||
|
d6d19625f3
|
|||
|
cf4aa45663
|
|||
|
48e4c07959
|
|||
|
027c43b60a
|
|||
|
e0803d4934
|
|||
|
6de17063a2
|
|||
|
2c5133a546
|
|||
|
24929a5fbb
|
|||
|
f336d96268
|
|||
|
315e477916
|
|||
|
006677819d
|
|||
|
93a0be98ce
|
|||
|
14322f5090
|
|||
|
d94da5ac5c
|
|||
|
a1382ee46d
|
|||
|
5e6b962ea8
|
|||
|
e8dd847b36
|
|||
|
d68bcfaebd
|
|||
|
ebfa06e8d6
|
|||
|
3ed382ec13
|
|||
|
dab5e81715
|
|||
|
143f2a0bf0
|
|||
|
4ea77ac543
|
|||
|
fec0d379a6
|
|||
|
3c2adfc16e
|
|||
|
4c6fe87b88
|
|||
|
79a430be04
|
|||
|
baea6aebeb
|
|||
|
eaecc5cdbe
|
|||
|
764e36ef14
|
|||
|
50a873dbba
|
|||
|
21182629b4
|
|||
|
dfd71b6c64
|
|||
|
1b0817b047
|
|||
|
0ba4929294
|
|||
|
caded433b7
|
|||
|
bf802d7fdf
|
|||
|
89328a827a
|
|||
|
32249a1d99
|
|||
|
cdd12cd082
|
|||
|
137299fe9e
|
|||
|
2aedcf77a0
|
|||
|
49784dfbe5
|
|||
|
a6b5348224
|
|||
|
d0fe2baafe
|
|||
|
e092327932
|
|||
|
8b9ad05089
|
|||
|
6b082adeb2
|
|||
|
bd9f9378cf
|
|||
|
62fe03a6cb
|
|||
|
297bbbe035
|
|||
|
ed7c439b56
|
|||
|
ecb8079b5b
|
|||
|
6811ce4af5
|
|||
|
e34d281774
|
|||
|
91e18c60e6
|
|||
|
9c9d49dcd2
|
|||
|
dcd648e1d2
|
|||
|
318a8ddbd5
|
|||
|
20e22ae7ca
|
|||
|
8feccbbf00
|
|||
|
db46fea550
|
|||
|
22cef33342
|
|||
|
663a26778d
|
|||
|
36de004ee5
|
|||
|
2c3d83fe9a
|
|||
|
d7adffb47f
|
|||
|
4f4820818a
|
|||
|
5cc38da00e
|
|||
|
a4dae2a583
|
|||
|
5f1667869f
|
|||
|
09a5cd14ad
|
|||
|
96de70aaf2
|
|||
|
f8c1e952bb
|
|||
|
36628e157d
|
|||
|
aeee745ac9
|
|||
|
d795af164f
|
|||
|
4acadd3508
|
|||
|
5c4904ba56
|
|||
|
4e88b93856
|
|||
|
af1dba5741
|
|||
|
553e2eb2b7
|
|||
|
3dfc6d736a
|
|||
|
7ef76d1424
|
|||
|
d78600a2f1
|
|||
|
f004bd47af
|
|||
|
fafcff1427
|
|||
|
e56bd61362
|
|||
|
2b7bd486f1
|
|||
|
a9592a85d0
|
|||
|
e77c046965
|
|||
|
7a8cee1431
|
|||
|
e6527b4f9f
|
|||
|
8979a03bbd
|
|||
|
f7b84913f2
|
|||
|
d46c98a211
|
|||
|
d68f0589cb
|
|||
|
d9ec68708b
|
|||
|
1b77c50552
|
|||
|
1ce5a8228c
|
|||
|
f6f515b308
|
|||
|
9864b4e2b5
|
|||
|
2fdd0cf6b8
|
|||
|
8c809ad444
|
|||
|
2022ab985b
|
|||
|
b5e78bc4de
|
|||
|
eba2c387f0
|
|||
|
5123941c79
|
|||
|
6cc07c9171
|
|||
|
ed1f3cdca7
|
|||
|
128e005611
|
|||
|
713e03b66e
|
|||
|
a0761ff1ae
|
|||
|
15523bed96
|
|||
|
653d9ea4f9
|
|||
|
f1229a76e1
|
|||
|
d4bcbf99e5
|
|||
|
e517d04095
|
|||
|
65697ce8f0
|
|||
|
ab9b0a1c9f
|
|||
|
60f7a84383
|
|||
|
956d328fd3
|
|||
|
dcd7fcc3c0
|
|||
|
7415ca5556
|
|||
|
9780a2dfc8
|
|||
|
c7fa508a38
|
|||
|
b83062c34f
|
|||
|
2e57e0930a
|
|||
|
43c5625b3b
|
|||
|
291968fbc7
|
|||
|
dd67e9cc8b
|
|||
|
c145e5cf18
|
|||
|
5db0373731
|
|||
|
6c11bbe912
|
|||
|
4d543f31ec
|
|||
|
6c92e8e7d9
|
|||
|
836e621063
|
|||
|
852d62a9c9
|
|||
|
ddc9af0ddf
|
|||
|
edfb3f15eb
|
|||
|
14967f662c
|
|||
|
0b370fc155
|
|||
|
9804f30060
|
|||
|
f7d6cec896
|
|||
|
b871fea039
|
|||
|
e69ce5090a
|
|||
|
813c9baf30
|
|||
|
220ce976f2
|
|||
|
719f014265
|
|||
|
1ef600a9df
|
|||
|
b72a0672a5
|
|||
|
bb3b96e7f7
|
|||
|
c4db8ec99d
|
|||
|
73b0518a8f
|
|||
|
571a527f43
|
|||
|
4c3bab6d96
|
|||
|
14eb05722c
|
|||
|
11c226833d
|
|||
|
ea81fc80e3
|
|||
|
8cd22888b7
|
|||
|
ba4b8c7501
|
|||
|
0666c4a153
|
|||
|
2a5e6766be
|
|||
|
c983a8e3b6
|
|||
|
a3fe92bea9
|
|||
|
9b03485b69
|
|||
|
98dcb99f90
|
|||
|
aa68bfd9be
|
|||
|
f3f717e693
|
|||
|
864f0904f5
|
|||
|
b72d3d67a1
|
|||
|
96d189290b
|
|||
|
c950bcbd43
|
|||
|
4472352785
|
|||
|
75f79cf072
|
|||
|
1ca6d79868
|
|||
|
33466b90ba
|
|||
|
659d5b391b
|
|||
|
6e1dfecc95
|
|||
|
3354a94024
|
|||
|
a5b25b2048
|
|||
|
1f51bf2972
|
|||
|
6e41c8dfc0
|
|||
|
ce0b26577f
|
|||
|
335e602072
|
|||
|
1fcc9d6643
|
|||
|
1ab9824e95
|
|||
|
47312b04d4
|
|||
|
743c1d6be8
|
|||
|
1b60ec62f6
|
|||
|
94303b1108
|
|||
|
219fc8ac35
|
|||
|
c5604c0ca8
|
|||
|
f9482cac63
|
|||
|
a61ba7b9e1
|
|||
|
b3dce50ce4
|
|||
|
7eee2ec929
|
|||
|
2ad61e6afa
|
|||
|
a598bbab4b
|
|||
|
422d3d4cdc
|
|||
|
2b4e037b51
|
|||
|
15583bdaab
|
|||
|
8050484b6f
|
|||
|
4f141b976a
|
|||
|
c302cd25da
|
|||
|
24a2f79e8e
|
|||
|
8c9ec3ab9c
|
|||
|
a8d0a7d886
|
|||
|
e3e150c805
|
|||
|
071d6f4579
|
|||
|
4a8605626a
|
|||
|
80c016761f
|
|||
|
7a0e2be66c
|
|||
|
2fecd98978
|
|||
|
4ecb37b179
|
|||
|
27cafa1def
|
|||
|
da678617d8
|
|||
|
4669096fcb
|
|||
|
404fdb000f
|
|||
|
2177766d90
|
|||
|
4734a271a1
|
|||
|
ef3151f34c
|
|||
|
8442c799be
|
|||
|
e0f86ec853
|
|||
|
f88e6dec5a
|
|||
|
4ff111a216
|
|||
|
7c855e09c0
|
|||
|
|
61f6715b20 | ||
|
|
0854c6d60d | ||
|
|
5179c43972 | ||
|
|
7439d97c71 | ||
|
|
391f917b38 | ||
| 2686e4ab04 | |||
| 08b5dc06f0 | |||
| 5deb0649fb | |||
| 9959231d50 | |||
| 73e596dac3 | |||
| be405160e4 | |||
| 7489512a82 | |||
| 1f178a20ed | |||
| cb21ad8fca | |||
| c10274ccd6 | |||
| 9fd6688892 | |||
| f54a448d54 | |||
| fe52561b71 | |||
| 09405f374e | |||
| 16ab37cc0c | |||
| fc3a349cb3 | |||
| fe86d30155 | |||
| 7485bbefd1 | |||
| 82a98c9539 | |||
| 45f02c323b | |||
| bdb3d059e3 | |||
| e403852778 | |||
| f3dd102096 | |||
| 1fec14d759 | |||
| b67eee42c1 | |||
| 9e6dd5e03d | |||
| 77e8ef4c16 | |||
| c879caa9d7 | |||
| db7e5677d3 | |||
| f848b5afd6 | |||
| 3bc65f8456 | |||
| 95ee63e399 | |||
|
a1e045793c
|
|||
|
f50a40d207
|
|||
|
4c08225a50
|
|||
|
11f15ac960
|
|||
|
8103c16253
|
|||
|
45070b06e2
|
|||
|
12db2f349e
|
|||
|
40e1f38508
|
|||
|
63c97db12e
|
|||
|
91885170f1
|
|||
|
7c23766763
|
|||
|
9e62ac62bc
|
|||
|
014de9f958
|
|||
|
f90f2fdef7
|
|||
|
e0549cdd30
|
|||
|
a78229a288
|
|||
|
918d410927
|
|||
|
bc4d5cba8e
|
|||
|
376d1bd911
|
|||
|
778690ae3a
|
|||
|
da3ba4ea8c
|
3
.gitignore
vendored
3
.gitignore
vendored
@@ -158,5 +158,4 @@ cython_debug/
|
||||
docker/data
|
||||
*.pem
|
||||
legacy/conf/live/
|
||||
legacy/conf/cert/
|
||||
stack.env
|
||||
legacy/conf/cert/
|
||||
@@ -1,21 +1,15 @@
|
||||
repos:
|
||||
- repo: https://github.com/psf/black
|
||||
rev: 23.1.0
|
||||
rev: 22.6.0
|
||||
hooks:
|
||||
- id: black
|
||||
exclude: ^core/migrations
|
||||
- repo: https://github.com/PyCQA/isort
|
||||
rev: 5.11.5
|
||||
rev: 5.10.1
|
||||
hooks:
|
||||
- id: isort
|
||||
args: ["--profile", "black"]
|
||||
- repo: https://github.com/PyCQA/flake8
|
||||
rev: 6.0.0
|
||||
rev: 4.0.1
|
||||
hooks:
|
||||
- id: flake8
|
||||
args: [--max-line-length=88]
|
||||
exclude: ^core/migrations
|
||||
- repo: https://github.com/sirwart/ripsecrets.git
|
||||
rev: v0.1.5
|
||||
hooks:
|
||||
- id: ripsecrets
|
||||
|
||||
20
Makefile
20
Makefile
@@ -1,20 +0,0 @@
|
||||
run:
|
||||
docker-compose -f docker-compose.prod.yml --env-file=stack.env up -d
|
||||
|
||||
build:
|
||||
docker-compose -f docker-compose.prod.yml --env-file=stack.env build
|
||||
|
||||
stop:
|
||||
docker-compose -f docker-compose.prod.yml --env-file=stack.env down
|
||||
|
||||
log:
|
||||
docker-compose -f docker-compose.prod.yml --env-file=stack.env logs -f --names
|
||||
|
||||
run-infra:
|
||||
docker-compose -f docker-compose.infra.yml --env-file=stack.env up -d
|
||||
|
||||
stop-infra:
|
||||
docker-compose -f docker-compose.infra.yml --env-file=stack.env down
|
||||
|
||||
log-infra:
|
||||
docker-compose -f docker-compose.infra.yml --env-file=stack.env logs -f
|
||||
275
db.py
275
db.py
@@ -1,52 +1,23 @@
|
||||
import asyncio
|
||||
from math import ceil
|
||||
from os import getenv
|
||||
from time import sleep
|
||||
import random
|
||||
|
||||
import aiomysql
|
||||
import aioredis
|
||||
import manticoresearch
|
||||
import msgpack
|
||||
import orjson
|
||||
from manticoresearch.rest import ApiException
|
||||
from numpy import array_split
|
||||
|
||||
# Kafka
|
||||
from aiokafka import AIOKafkaProducer
|
||||
from redis import StrictRedis
|
||||
|
||||
import util
|
||||
from schemas import mc_s
|
||||
|
||||
mysql_pool = None
|
||||
|
||||
|
||||
configuration = manticoresearch.Configuration(host="http://127.0.0.1:9308")
|
||||
api_client = manticoresearch.ApiClient(configuration)
|
||||
api_instance = manticoresearch.IndexApi(api_client)
|
||||
# KAFKA_TOPIC = "msg"
|
||||
|
||||
log = util.get_logger("db")
|
||||
|
||||
# Redis (legacy)
|
||||
# r = StrictRedis(unix_socket_path="/var/run/redis/redis.sock", db=0)
|
||||
r = StrictRedis(
|
||||
host="127.0.0.1", # Replace with your Redis server's IP address
|
||||
port=1289, # Replace with your Redis server's port
|
||||
db=0, # Database number
|
||||
)
|
||||
r = StrictRedis(unix_socket_path="/var/run/redis/redis.sock", db=0)
|
||||
|
||||
# AIORedis
|
||||
# ar = aioredis.from_url("unix:///var/run/redis/redis.sock", db=0)
|
||||
ar = aioredis.from_url("redis://127.0.0.1:1289", db=0)
|
||||
|
||||
# /var/run/neptune-redis.sock
|
||||
# db = 10
|
||||
pr = aioredis.from_url("unix://var/run/neptune-redis.sock", db=10)
|
||||
# fr = aioredis.from_url("unix://var/run/fisk-redis.sock", db=10)
|
||||
fr = aioredis.from_url("unix://var/run/redis.sock", db=10)
|
||||
# pr = aioredis.from_url("redis://redis_neptune:6379", db=10, password=getenv("REDIS_PASSWORD"))
|
||||
|
||||
KEYNAME = "queue"
|
||||
MESSAGE_KEY = "messages"
|
||||
|
||||
OHLC_MESSAGE_KEY = "ohlc"
|
||||
|
||||
ar = aioredis.from_url("unix:///var/run/redis/redis.sock", db=0)
|
||||
|
||||
TYPES_MAIN = [
|
||||
"msg",
|
||||
@@ -61,174 +32,89 @@ TYPES_MAIN = [
|
||||
"topic",
|
||||
"update",
|
||||
]
|
||||
MAIN_SRC_MAP = {
|
||||
"dis": "main",
|
||||
"irc": "restricted",
|
||||
"4ch": "main",
|
||||
}
|
||||
|
||||
TYPES_META = ["who"]
|
||||
TYPES_INT = ["conn", "highlight", "znc", "query", "self"]
|
||||
KEYNAME = "queue"
|
||||
|
||||
|
||||
async def init_mysql_pool():
|
||||
"""
|
||||
Initialize the MySQL connection pool.
|
||||
"""
|
||||
global mysql_pool
|
||||
mysql_pool = await aiomysql.create_pool(
|
||||
host="127.0.0.1", port=9306, db="Manticore", minsize=1, maxsize=10
|
||||
async def store_kafka_batch(data):
|
||||
# log.debug(f"Storing Kafka batch of {len(data)} messages")
|
||||
producer = AIOKafkaProducer(bootstrap_servers="kafka:9092")
|
||||
await producer.start()
|
||||
topicmap = {}
|
||||
for msg in data:
|
||||
if msg["type"] in TYPES_MAIN:
|
||||
# index = "main"
|
||||
index = MAIN_SRC_MAP[msg["src"]]
|
||||
# schema = mc_s.schema_main
|
||||
elif msg["type"] in TYPES_META:
|
||||
index = "meta"
|
||||
# schema = mc_s.schema_meta
|
||||
elif msg["type"] in TYPES_INT:
|
||||
index = "internal"
|
||||
# schema = mc_s.schema_int
|
||||
|
||||
KAFKA_TOPIC = index
|
||||
|
||||
# if key in schema:
|
||||
# if isinstance(value, int):
|
||||
# if schema[key].startswith("string") or schema[key].startswith(
|
||||
# "text"
|
||||
# ):
|
||||
# msg[key] = str(value)
|
||||
body = orjson.dumps(msg)
|
||||
if "ts" not in msg:
|
||||
raise Exception("No TS in msg")
|
||||
if KAFKA_TOPIC not in topicmap:
|
||||
topicmap[KAFKA_TOPIC] = [body]
|
||||
else:
|
||||
topicmap[KAFKA_TOPIC].append(body)
|
||||
|
||||
for topic, messages in topicmap.items():
|
||||
batch = producer.create_batch()
|
||||
for body in messages:
|
||||
metadata = batch.append(key=None, value=body, timestamp=msg["ts"])
|
||||
if metadata is None:
|
||||
partitions = await producer.partitions_for(topic)
|
||||
partition = random.choice(tuple(partitions))
|
||||
await producer.send_batch(batch, topic, partition=partition)
|
||||
# log.debug(
|
||||
# (
|
||||
# f"{batch.record_count()} messages sent to topic "
|
||||
# f"{topic} partition {partition}"
|
||||
# )
|
||||
# )
|
||||
batch = producer.create_batch()
|
||||
continue
|
||||
|
||||
partitions = await producer.partitions_for(topic)
|
||||
partition = random.choice(tuple(partitions))
|
||||
await producer.send_batch(batch, topic, partition=partition)
|
||||
# log.debug(
|
||||
# (
|
||||
# f"{batch.record_count()} messages sent to topic "
|
||||
# f"{topic} partition {partition}"
|
||||
# )
|
||||
# )
|
||||
log.debug(
|
||||
"Kafka batches sent: "
|
||||
+ ", ".join([topic + ": " + str(len(topicmap[topic])) for topic in topicmap])
|
||||
)
|
||||
|
||||
|
||||
async def rts_store_message(index, data):
|
||||
"""
|
||||
Store a RTS message into MySQL using an existing connection pool.
|
||||
Prioritizes instant PubSub delivery, with minimal data storage overhead.
|
||||
:param index: str
|
||||
:param data: dict
|
||||
"""
|
||||
# Publish to Redis PubSub
|
||||
packed_index = msgpack.packb({"index": index, "data": data}, use_bin_type=True)
|
||||
|
||||
try:
|
||||
await fr.publish(OHLC_MESSAGE_KEY, packed_index)
|
||||
except aioredis.exceptions.ConnectionError as e:
|
||||
raise e
|
||||
await asyncio.sleep(0.1)
|
||||
|
||||
# Insert data into MySQL
|
||||
try:
|
||||
async with mysql_pool.acquire() as conn:
|
||||
async with conn.cursor() as cur:
|
||||
# Insert data into the table
|
||||
query = f"""
|
||||
INSERT INTO {index} (s, o, c, h, l, v, a, i, t, t2, ts)
|
||||
VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
|
||||
"""
|
||||
# Bind the values directly
|
||||
await cur.execute(
|
||||
query,
|
||||
(
|
||||
data["s"], # symbol
|
||||
data["o"], # open
|
||||
data["c"], # close
|
||||
data["h"], # high
|
||||
data["l"], # low
|
||||
data["v"], # volume_base
|
||||
data["a"], # volume_quote
|
||||
data["i"], # interval
|
||||
data["t"], # start_time
|
||||
data["t2"], # end_time
|
||||
data["ts"], # event_time
|
||||
),
|
||||
)
|
||||
await conn.commit()
|
||||
log.debug(f"Stored data for {data['s']} in MySQL.")
|
||||
except aiomysql.Error as e:
|
||||
log.error(f"MySQL error: {e}")
|
||||
|
||||
|
||||
async def store_batch(data):
|
||||
"""
|
||||
Store a message into Manticore
|
||||
:param data: list
|
||||
"""
|
||||
if not data:
|
||||
return
|
||||
# 10000: maximum inserts we can submit to
|
||||
# Manticore as of Sept 2022
|
||||
split_posts = array_split(data, ceil(len(data) / 10000))
|
||||
for messages in split_posts:
|
||||
total = []
|
||||
indexmap = {}
|
||||
for msg in messages:
|
||||
if msg["type"] in TYPES_MAIN:
|
||||
index = "main"
|
||||
schema = mc_s.schema_main
|
||||
elif msg["type"] in TYPES_META:
|
||||
index = "meta"
|
||||
schema = mc_s.schema_meta
|
||||
elif msg["type"] in TYPES_INT:
|
||||
index = "internal"
|
||||
schema = mc_s.schema_int
|
||||
# normalise fields
|
||||
for key, value in list(msg.items()):
|
||||
if value is None:
|
||||
del msg[key]
|
||||
if key in schema:
|
||||
if isinstance(value, int):
|
||||
if schema[key].startswith("string") or schema[key].startswith(
|
||||
"text"
|
||||
):
|
||||
msg[key] = str(value)
|
||||
|
||||
body = {"insert": {"index": index, "doc": msg}}
|
||||
total.append(body)
|
||||
if "ts" not in msg:
|
||||
raise Exception("No TS in msg")
|
||||
if index not in indexmap:
|
||||
indexmap[index] = [msg]
|
||||
else:
|
||||
indexmap[index].append(msg)
|
||||
# END MSG IN MESSAGES
|
||||
|
||||
# Pack the indexmap with msgpack and publish it to Neptune
|
||||
packed_index = msgpack.packb(indexmap, use_bin_type=True)
|
||||
completed_publish = False
|
||||
for i in range(10):
|
||||
if completed_publish:
|
||||
break
|
||||
try:
|
||||
await pr.publish(MESSAGE_KEY, packed_index)
|
||||
completed_publish = True
|
||||
except aioredis.exceptions.ConnectionError as e:
|
||||
raise e
|
||||
await asyncio.sleep(0.1)
|
||||
if not completed_publish:
|
||||
log.error("Failed to publish to Neptune")
|
||||
|
||||
body_post = ""
|
||||
for item in total:
|
||||
# print("ITEM", item)
|
||||
body_post += orjson.dumps(item).decode("utf-8")
|
||||
body_post += "\n"
|
||||
|
||||
# print("BODY POST INDEX", index, body_post)
|
||||
|
||||
try:
|
||||
# Bulk index operations
|
||||
api_response = api_instance.bulk(body_post) # , async_req=True
|
||||
except ApiException as e:
|
||||
log.error("Exception when calling IndexApi->bulk: %s\n" % e)
|
||||
log.error("body_post attempted to send", body_post)
|
||||
log.info(f"Completed ingest to MC of length {len(total)}")
|
||||
# END MESSAGES IN SPLIT
|
||||
|
||||
|
||||
def update_schema():
|
||||
pass
|
||||
|
||||
|
||||
def create_index(api_client):
|
||||
util_instance = manticoresearch.UtilsApi(api_client)
|
||||
schemas = {
|
||||
"main": mc_s.schema_main,
|
||||
"rule_storage": mc_s.schema_rule_storage,
|
||||
"meta": mc_s.schema_meta,
|
||||
"internal": mc_s.schema_int,
|
||||
}
|
||||
for name, schema in schemas.items():
|
||||
schema_types = ", ".join([f"{k} {v}" for k, v in schema.items()])
|
||||
|
||||
create_query = (
|
||||
f"create table if not exists {name}({schema_types}) engine='columnar'"
|
||||
)
|
||||
print("Schema types", create_query)
|
||||
util_instance.sql(create_query)
|
||||
await producer.stop()
|
||||
|
||||
|
||||
async def queue_message(msg):
|
||||
"""
|
||||
Queue a message on the Redis buffer.
|
||||
"""
|
||||
# TODO: msgpack
|
||||
message = orjson.dumps(msg)
|
||||
await ar.lpush(KEYNAME, message)
|
||||
await ar.sadd(KEYNAME, message)
|
||||
|
||||
|
||||
async def queue_message_bulk(data):
|
||||
@@ -236,6 +122,5 @@ async def queue_message_bulk(data):
|
||||
Queue multiple messages on the Redis buffer.
|
||||
"""
|
||||
for msg in data:
|
||||
# TODO: msgpack
|
||||
message = orjson.dumps(msg)
|
||||
await ar.lpush(KEYNAME, message)
|
||||
await ar.sadd(KEYNAME, message)
|
||||
|
||||
174
db_old_ref.py
174
db_old_ref.py
@@ -1,174 +0,0 @@
|
||||
import asyncio
|
||||
from os import getenv
|
||||
|
||||
import aioredis
|
||||
import msgpack
|
||||
import orjson
|
||||
import redis
|
||||
|
||||
# Elasticsearch
|
||||
from elasticsearch import AsyncElasticsearch
|
||||
|
||||
import util
|
||||
|
||||
trues = ("true", "1", "t", True)
|
||||
|
||||
# INDEX = "msg"
|
||||
|
||||
log = util.get_logger("db")
|
||||
|
||||
# Redis (legacy)
|
||||
# r = redis.from_url("redis://ssdb:1289", db=0)
|
||||
|
||||
# AIORedis
|
||||
ar = aioredis.from_url("redis://ssdb:1289", db=0)
|
||||
|
||||
# Neptune redis for PubSub
|
||||
pr = aioredis.from_url("redis://redis_neptune:6379", db=10)
|
||||
|
||||
TYPES_MAIN = [
|
||||
"msg",
|
||||
"notice",
|
||||
"action",
|
||||
"part",
|
||||
"join",
|
||||
"kick",
|
||||
"quit",
|
||||
"nick",
|
||||
"mode",
|
||||
"topic",
|
||||
"update",
|
||||
]
|
||||
MAIN_SRC_MAP = {
|
||||
"dis": "main",
|
||||
"irc": "restricted",
|
||||
"4ch": "main",
|
||||
}
|
||||
|
||||
TYPES_META = ["who"]
|
||||
TYPES_INT = ["conn", "highlight", "znc", "query", "self"]
|
||||
KEYNAME = "queue"
|
||||
MESSAGE_KEY = "messages"
|
||||
|
||||
ELASTICSEARCH_USERNAME = getenv("ELASTICSEARCH_USERNAME", "elastic")
|
||||
ELASTICSEARCH_PASSWORD = getenv("ELASTICSEARCH_PASSWORD", "changeme")
|
||||
ELASTICSEARCH_HOST = getenv("ELASTICSEARCH_HOST", "localhost")
|
||||
ELASTICSEARCH_TLS = getenv("ELASTICSEARCH_TLS", "false") in trues
|
||||
|
||||
client = None
|
||||
|
||||
# These are sometimes numeric, sometimes strings.
|
||||
# If they are seen to be numeric first, ES will erroneously
|
||||
# index them as "long" and then subsequently fail to index messages
|
||||
# with strings in the field.
|
||||
keyword_fields = ["nick_id", "user_id", "net_id"]
|
||||
|
||||
mapping_int = {
|
||||
"mappings": {
|
||||
"properties": {
|
||||
"ts": {"type": "date", "format": "epoch_second"},
|
||||
"file_tim": {"type": "date", "format": "epoch_millis"},
|
||||
}
|
||||
}
|
||||
}
|
||||
mapping = dict(mapping_int)
|
||||
for field in keyword_fields:
|
||||
mapping["mappings"]["properties"][field] = {"type": "text"}
|
||||
|
||||
|
||||
del mapping_int["mappings"]["properties"]["file_tim"]
|
||||
|
||||
|
||||
async def initialise_elasticsearch():
|
||||
"""
|
||||
Initialise the Elasticsearch client.
|
||||
"""
|
||||
auth = (ELASTICSEARCH_USERNAME, ELASTICSEARCH_PASSWORD)
|
||||
client = AsyncElasticsearch(ELASTICSEARCH_HOST, http_auth=auth, verify_certs=False)
|
||||
for index in ("main", "meta", "restricted", "internal"):
|
||||
if index == "internal":
|
||||
map_dict = mapping_int
|
||||
else:
|
||||
map_dict = mapping
|
||||
if await client.indices.exists(index=index):
|
||||
# update index with mapping
|
||||
await client.indices.put_mapping(
|
||||
index=index, properties=map_dict["mappings"]["properties"]
|
||||
)
|
||||
else:
|
||||
await client.indices.create(index=index, mappings=map_dict["mappings"])
|
||||
return client
|
||||
|
||||
|
||||
async def store_batch(data):
|
||||
global client
|
||||
if not client:
|
||||
client = await initialise_elasticsearch()
|
||||
indexmap = {}
|
||||
for msg in data:
|
||||
if msg["type"] in TYPES_MAIN:
|
||||
# index = "main"
|
||||
index = MAIN_SRC_MAP[msg["src"]]
|
||||
# schema = mc_s.schema_main
|
||||
elif msg["type"] in TYPES_META:
|
||||
index = "meta"
|
||||
# schema = mc_s.schema_meta
|
||||
elif msg["type"] in TYPES_INT:
|
||||
index = "internal"
|
||||
# schema = mc_s.schema_int
|
||||
|
||||
INDEX = index
|
||||
|
||||
# if key in schema:
|
||||
# if isinstance(value, int):
|
||||
# if schema[key].startswith("string") or schema[key].startswith(
|
||||
# "text"
|
||||
# ):
|
||||
# msg[key] = str(value)
|
||||
# body = orjson.dumps(msg)
|
||||
if "ts" not in msg:
|
||||
raise Exception("No TS in msg")
|
||||
if INDEX not in indexmap:
|
||||
indexmap[INDEX] = [msg]
|
||||
else:
|
||||
indexmap[INDEX].append(msg)
|
||||
|
||||
# Pack the indexmap with msgpack and publish it to Neptune
|
||||
packed_index = msgpack.packb(indexmap, use_bin_type=True)
|
||||
completed_publish = False
|
||||
for i in range(10):
|
||||
if completed_publish:
|
||||
break
|
||||
try:
|
||||
await pr.publish(MESSAGE_KEY, packed_index)
|
||||
completed_publish = True
|
||||
except aioredis.exceptions.ConnectionError:
|
||||
await asyncio.sleep(0.1)
|
||||
if not completed_publish:
|
||||
log.error("Failed to publish to Neptune")
|
||||
|
||||
for index, index_messages in indexmap.items():
|
||||
for message in index_messages:
|
||||
result = await client.index(index=index, body=message)
|
||||
if not result["result"] == "created":
|
||||
log.error(f"Indexing failed: {result}")
|
||||
log.debug(f"Indexed {len(data)} messages in ES")
|
||||
|
||||
|
||||
async def queue_message(msg):
|
||||
"""
|
||||
Queue a message on the Redis buffer.
|
||||
"""
|
||||
# TODO: msgpack
|
||||
message = orjson.dumps(msg)
|
||||
await ar.lpush(KEYNAME, message)
|
||||
|
||||
|
||||
async def queue_message_bulk(data):
|
||||
"""
|
||||
Queue multiple messages on the Redis buffer.
|
||||
"""
|
||||
for msg in data:
|
||||
# TODO: msgpack
|
||||
message = orjson.dumps(msg)
|
||||
await ar.lpush(KEYNAME, message)
|
||||
@@ -1,206 +0,0 @@
|
||||
version: "2.2"
|
||||
|
||||
services:
|
||||
rts:
|
||||
image: xf/monolith:latest
|
||||
container_name: rts_monolith
|
||||
command: sh -c '. /venv/bin/activate && exec python rts.py'
|
||||
build: .
|
||||
volumes:
|
||||
- ${PORTAINER_GIT_DIR}:/code
|
||||
- type: bind
|
||||
source: /code/run
|
||||
target: /var/run
|
||||
environment:
|
||||
PORTAINER_GIT_DIR: "${PORTAINER_GIT_DIR}"
|
||||
MODULES_ENABLED: "${MODULES_ENABLED}"
|
||||
MONOLITH_RTS_MEXC_API_ACCESS_KEY: "${MONOLITH_RTS_MEXC_API_ACCESS_KEY}"
|
||||
MONOLITH_RTS_MEXC_API_SECRET_KEY: "${MONOLITH_RTS_MEXC_API_SECRET_KEY}"
|
||||
deploy:
|
||||
resources:
|
||||
limits:
|
||||
cpus: '0.5'
|
||||
memory: 1.0G
|
||||
network_mode: host
|
||||
|
||||
app:
|
||||
image: xf/monolith:latest
|
||||
container_name: monolith
|
||||
#command: sh -c '. /venv/bin/activate && exec python -m cProfile -o /tmp/profile.out monolith.py'
|
||||
build: .
|
||||
volumes:
|
||||
- ${PORTAINER_GIT_DIR}:/code
|
||||
- type: bind
|
||||
source: /code/run
|
||||
target: /var/run
|
||||
environment:
|
||||
PORTAINER_GIT_DIR: "${PORTAINER_GIT_DIR}"
|
||||
MODULES_ENABLED: "${MODULES_ENABLED}"
|
||||
DISCORD_TOKEN: "${DISCORD_TOKEN}"
|
||||
THRESHOLD_LISTENER_HOST: "${THRESHOLD_LISTENER_HOST}"
|
||||
THRESHOLD_LISTENER_PORT: "${THRESHOLD_LISTENER_PORT}"
|
||||
THRESHOLD_LISTENER_SSL: "${THRESHOLD_LISTENER_SSL}"
|
||||
THRESHOLD_RELAY_ENABLED: "${THRESHOLD_RELAY_ENABLED}"
|
||||
THRESHOLD_RELAY_HOST: "${THRESHOLD_RELAY_HOST}"
|
||||
THRESHOLD_RELAY_PORT: "${THRESHOLD_RELAY_PORT}"
|
||||
THRESHOLD_RELAY_SSL: "${THRESHOLD_RELAY_SSL}"
|
||||
THRESHOLD_API_ENABLED: "${THRESHOLD_API_ENABLED}"
|
||||
THRESHOLD_API_HOST: "${THRESHOLD_API_HOST}"
|
||||
THRESHOLD_API_PORT: "${THRESHOLD_API_PORT}"
|
||||
THRESHOLD_CONFIG_DIR: "${THRESHOLD_CONFIG_DIR}"
|
||||
#THRESHOLD_TEMPLATE_DIR: "${#THRESHOLD_TEMPLATE_DIR}"
|
||||
THRESHOLD_CERT_DIR: "${THRESHOLD_CERT_DIR}"
|
||||
# How many messages to ingest at once from Redis
|
||||
MONOLITH_INGEST_CHUNK_SIZE: "${MONOLITH_INGEST_CHUNK_SIZE}"
|
||||
# Time to wait between polling Redis again
|
||||
MONOLITH_INGEST_ITER_DELAY: "${MONOLITH_INGEST_ITER_DELAY}"
|
||||
# Number of 4chan threads to request at once
|
||||
MONOLITH_CH4_THREADS_CONCURRENT: "${MONOLITH_CH4_THREADS_CONCURRENT}"
|
||||
# Time to wait between every MONOLITH_CH4_THREADS_CONCURRENT threads
|
||||
MONOLITH_CH4_THREADS_DELAY: "${MONOLITH_CH4_THREADS_DELAY}"
|
||||
# Time to wait after finishing a crawl before starting again
|
||||
MONOLITH_CH4_CRAWL_DELAY: "${MONOLITH_CH4_CRAWL_DELAY}"
|
||||
# Semaphore value
|
||||
MONOLITH_CH4_THREADS_SEMAPHORE: "${MONOLITH_CH4_THREADS_SEMAPHORE}"
|
||||
# Threads to use for data processing
|
||||
# Leave uncommented to use all available threads
|
||||
MONOLITH_PROCESS_THREADS: "${MONOLITH_PROCESS_THREADS}"
|
||||
# Enable performance metrics after message processing
|
||||
MONOLITH_PROCESS_PERFSTATS: "${MONOLITH_PROCESS_PERFSTATS}"
|
||||
MONOLITH_PROCESS_TARGET_CPU_USAGE: "${MONOLITH_PROCESS_TARGET_CPU_USAGE}"
|
||||
MONOLITH_CH4_TARGET_CPU_USAGE: "${MONOLITH_CH4_TARGET_CPU_USAGE}"
|
||||
MONOLITH_CH4_BOARDS: "${MONOLITH_CH4_BOARDS}"
|
||||
REDIS_PASSWORD: "${REDIS_PASSWORD}"
|
||||
MONOLITH_INGEST_INCREASE_BELOW: "${MONOLITH_INGEST_INCREASE_BELOW}"
|
||||
MONOLITH_INGEST_INCREASE_BY: "${MONOLITH_INGEST_INCREASE_BY}"
|
||||
MONOLITH_INGEST_DECREASE_ABOVE: "${MONOLITH_INGEST_DECREASE_ABOVE}"
|
||||
MONOLITH_INGEST_DECREASE_BY: "${MONOLITH_INGEST_DECREASE_BY}"
|
||||
MONOLITH_INGEST_MAX: "${MONOLITH_INGEST_MAX}"
|
||||
MONOLITH_INGEST_MIN: "${MONOLITH_INGEST_MIN}"
|
||||
deploy:
|
||||
resources:
|
||||
limits:
|
||||
cpus: '0.5'
|
||||
memory: 1.0G
|
||||
network_mode: host
|
||||
|
||||
|
||||
threshold:
|
||||
image: xf/threshold:latest
|
||||
container_name: threshold
|
||||
build: legacy/docker
|
||||
volumes:
|
||||
- ${PORTAINER_GIT_DIR}:/code
|
||||
- ${THRESHOLD_CONFIG_DIR}:/code/legacy/conf/live
|
||||
#- ${THRESHOLD_TEMPLATE_DIR}:/code/conf/templates
|
||||
- ${THRESHOLD_CERT_DIR}:/code/legacy/conf/cert
|
||||
volumes_from:
|
||||
- tmp
|
||||
ports:
|
||||
- "${THRESHOLD_LISTENER_PORT}:${THRESHOLD_LISTENER_PORT}"
|
||||
- "${THRESHOLD_RELAY_PORT}:${THRESHOLD_RELAY_PORT}"
|
||||
- "${THRESHOLD_API_PORT}:${THRESHOLD_API_PORT}"
|
||||
environment:
|
||||
PORTAINER_GIT_DIR: "${PORTAINER_GIT_DIR}"
|
||||
MODULES_ENABLED: "${MODULES_ENABLED}"
|
||||
DISCORD_TOKEN: "${DISCORD_TOKEN}"
|
||||
THRESHOLD_LISTENER_HOST: "${THRESHOLD_LISTENER_HOST}"
|
||||
THRESHOLD_LISTENER_PORT: "${THRESHOLD_LISTENER_PORT}"
|
||||
THRESHOLD_LISTENER_SSL: "${THRESHOLD_LISTENER_SSL}"
|
||||
THRESHOLD_RELAY_ENABLED: "${THRESHOLD_RELAY_ENABLED}"
|
||||
THRESHOLD_RELAY_HOST: "${THRESHOLD_RELAY_HOST}"
|
||||
THRESHOLD_RELAY_PORT: "${THRESHOLD_RELAY_PORT}"
|
||||
THRESHOLD_RELAY_SSL: "${THRESHOLD_RELAY_SSL}"
|
||||
THRESHOLD_API_ENABLED: "${THRESHOLD_API_ENABLED}"
|
||||
THRESHOLD_API_HOST: "${THRESHOLD_API_HOST}"
|
||||
THRESHOLD_API_PORT: "${THRESHOLD_API_PORT}"
|
||||
THRESHOLD_CONFIG_DIR: "${THRESHOLD_CONFIG_DIR}"
|
||||
#THRESHOLD_TEMPLATE_DIR: "${#THRESHOLD_TEMPLATE_DIR}"
|
||||
THRESHOLD_CERT_DIR: "${THRESHOLD_CERT_DIR}"
|
||||
# How many messages to ingest at once from Redis
|
||||
MONOLITH_INGEST_CHUNK_SIZE: "${MONOLITH_INGEST_CHUNK_SIZE}"
|
||||
# Time to wait between polling Redis again
|
||||
MONOLITH_INGEST_ITER_DELAY: "${MONOLITH_INGEST_ITER_DELAY}"
|
||||
# Number of 4chan threads to request at once
|
||||
MONOLITH_CH4_THREADS_CONCURRENT: "${MONOLITH_CH4_THREADS_CONCURRENT}"
|
||||
# Time to wait between every MONOLITH_CH4_THREADS_CONCURRENT threads
|
||||
MONOLITH_CH4_THREADS_DELAY: "${MONOLITH_CH4_THREADS_DELAY}"
|
||||
# Time to wait after finishing a crawl before starting again
|
||||
MONOLITH_CH4_CRAWL_DELAY: "${MONOLITH_CH4_CRAWL_DELAY}"
|
||||
# Semaphore value
|
||||
MONOLITH_CH4_THREADS_SEMAPHORE: "${MONOLITH_CH4_THREADS_SEMAPHORE}"
|
||||
# Threads to use for data processing
|
||||
# Leave uncommented to use all available threads
|
||||
MONOLITH_PROCESS_THREADS: "${MONOLITH_PROCESS_THREADS}"
|
||||
# Enable performance metrics after message processing
|
||||
MONOLITH_PROCESS_PERFSTATS: "${MONOLITH_PROCESS_PERFSTATS}"
|
||||
MONOLITH_CH4_BOARDS: "${MONOLITH_CH4_BOARDS}"
|
||||
REDIS_PASSWORD: "${REDIS_PASSWORD}"
|
||||
# for development
|
||||
extra_hosts:
|
||||
- "host.docker.internal:host-gateway"
|
||||
network_mode: host
|
||||
|
||||
ssdb:
|
||||
image: tsl0922/ssdb
|
||||
container_name: ssdb_monolith
|
||||
ports:
|
||||
- "1289:1289"
|
||||
environment:
|
||||
- SSDB_PORT=1289
|
||||
volumes:
|
||||
- monolith_ssdb_data:/var/lib/ssdb
|
||||
# networks:
|
||||
# - default
|
||||
# - db
|
||||
deploy:
|
||||
resources:
|
||||
limits:
|
||||
cpus: '0.5'
|
||||
memory: 1.0G
|
||||
network_mode: host
|
||||
|
||||
redis:
|
||||
image: redis
|
||||
container_name: redis_monolith
|
||||
command: redis-server /etc/redis.conf
|
||||
ulimits:
|
||||
nproc: 65535
|
||||
nofile:
|
||||
soft: 65535
|
||||
hard: 65535
|
||||
volumes:
|
||||
- ${PORTAINER_GIT_DIR}/docker/redis.conf:/etc/redis.conf
|
||||
- monolith_redis_data:/data
|
||||
- type: bind
|
||||
source: /code/run
|
||||
target: /var/run
|
||||
# volumes_from:
|
||||
# - tmp
|
||||
healthcheck:
|
||||
test: "redis-cli ping"
|
||||
interval: 2s
|
||||
timeout: 2s
|
||||
retries: 15
|
||||
# networks:
|
||||
# - default
|
||||
# - xf
|
||||
# - db
|
||||
deploy:
|
||||
resources:
|
||||
limits:
|
||||
cpus: '0.5'
|
||||
memory: 1.0G
|
||||
network_mode: host
|
||||
|
||||
# networks:
|
||||
# default:
|
||||
# driver: bridge
|
||||
# xf:
|
||||
# external: true
|
||||
# db:
|
||||
# external: true
|
||||
|
||||
volumes:
|
||||
monolith_redis_data:
|
||||
monolith_ssdb_data:
|
||||
351
docker-compose.yml
Normal file
351
docker-compose.yml
Normal file
@@ -0,0 +1,351 @@
|
||||
version: "2.2"
|
||||
|
||||
x-superset-image: &superset-image apache/superset:${TAG:-latest-dev}
|
||||
x-superset-depends-on: &superset-depends-on
|
||||
- db
|
||||
- redis_superset
|
||||
x-superset-volumes: &superset-volumes
|
||||
# /app/pythonpath_docker will be appended to the PYTHONPATH in the final container
|
||||
- ./docker/superset:/app/docker
|
||||
- superset_home:/app/superset_home
|
||||
|
||||
services:
|
||||
app:
|
||||
image: pathogen/monolith:latest
|
||||
container_name: monolith
|
||||
build: ./docker
|
||||
volumes:
|
||||
- ${PORTAINER_GIT_DIR}:/code
|
||||
env_file:
|
||||
- .env
|
||||
volumes_from:
|
||||
- tmp
|
||||
depends_on:
|
||||
broker:
|
||||
condition: service_started
|
||||
kafka:
|
||||
condition: service_healthy
|
||||
tmp:
|
||||
condition: service_started
|
||||
redis:
|
||||
condition: service_healthy
|
||||
# - db
|
||||
|
||||
threshold:
|
||||
image: pathogen/threshold:latest
|
||||
container_name: threshold
|
||||
build: ./legacy/docker
|
||||
volumes:
|
||||
- ${PORTAINER_GIT_DIR}:/code
|
||||
- ${THRESHOLD_CONFIG_DIR}:/code/legacy/conf/live
|
||||
#- ${THRESHOLD_TEMPLATE_DIR}:/code/conf/templates
|
||||
- ${THRESHOLD_CERT_DIR}:/code/legacy/conf/cert
|
||||
ports:
|
||||
- "${THRESHOLD_LISTENER_PORT}:${THRESHOLD_LISTENER_PORT}"
|
||||
- "${THRESHOLD_RELAY_PORT}:${THRESHOLD_RELAY_PORT}"
|
||||
- "${THRESHOLD_API_PORT}:${THRESHOLD_API_PORT}"
|
||||
env_file:
|
||||
- .env
|
||||
# for development
|
||||
extra_hosts:
|
||||
- "host.docker.internal:host-gateway"
|
||||
volumes_from:
|
||||
- tmp
|
||||
depends_on:
|
||||
tmp:
|
||||
condition: service_started
|
||||
redis:
|
||||
condition: service_healthy
|
||||
|
||||
# db:
|
||||
#image: pathogen/manticore:kibana
|
||||
# image: manticoresearch/manticore:latest
|
||||
#build:
|
||||
# context: ./docker/manticore
|
||||
# args:
|
||||
# DEV: 1
|
||||
# restart: always
|
||||
|
||||
# turnilo:
|
||||
# container_name: turnilo
|
||||
# image: uchhatre/turnilo:latest
|
||||
# ports:
|
||||
# - 9093:9090
|
||||
# environment:
|
||||
# - DRUID_BROKER_URL=http://broker:8082
|
||||
# - CONFIG_FILE=/config.yaml
|
||||
# volumes:
|
||||
# - ${PORTAINER_GIT_DIR}/docker/turnilo.yaml:/config.yaml
|
||||
# depends_on:
|
||||
# - broker
|
||||
|
||||
# metabase:
|
||||
# container_name: metabase
|
||||
# image: metabase/metabase:latest
|
||||
# ports:
|
||||
# - 3096:3000
|
||||
# environment:
|
||||
# JAVA_OPTS: -Xmx1g
|
||||
# MB_DB_TYPE: postgres
|
||||
# MB_DB_DBNAME: metabase
|
||||
# MB_DB_PORT: 5432
|
||||
# MB_DB_USER: druid
|
||||
# MB_DB_PASS: FoolishPassword
|
||||
# MB_DB_HOST: postgres
|
||||
# depends_on:
|
||||
# - broker
|
||||
|
||||
redis_superset:
|
||||
image: redis:latest
|
||||
container_name: superset_cache
|
||||
restart: unless-stopped
|
||||
volumes:
|
||||
- redis:/data
|
||||
db:
|
||||
env_file: docker/.env-non-dev
|
||||
image: postgres:10
|
||||
container_name: superset_db
|
||||
restart: unless-stopped
|
||||
volumes:
|
||||
- db_home:/var/lib/postgresql/data
|
||||
|
||||
superset:
|
||||
env_file: docker/.env-non-dev
|
||||
image: *superset-image
|
||||
container_name: superset_app
|
||||
command: ["/app/docker/docker-bootstrap.sh", "app-gunicorn"]
|
||||
user: "root"
|
||||
restart: unless-stopped
|
||||
ports:
|
||||
- 8088:8088
|
||||
depends_on: *superset-depends-on
|
||||
volumes: *superset-volumes
|
||||
|
||||
superset-init:
|
||||
image: *superset-image
|
||||
container_name: superset_init
|
||||
command: ["/app/docker/docker-init.sh"]
|
||||
env_file: docker/.env-non-dev
|
||||
depends_on: *superset-depends-on
|
||||
user: "root"
|
||||
volumes: *superset-volumes
|
||||
|
||||
superset-worker:
|
||||
image: *superset-image
|
||||
container_name: superset_worker
|
||||
command: ["/app/docker/docker-bootstrap.sh", "worker"]
|
||||
env_file: docker/.env-non-dev
|
||||
restart: unless-stopped
|
||||
depends_on: *superset-depends-on
|
||||
user: "root"
|
||||
volumes: *superset-volumes
|
||||
|
||||
superset-worker-beat:
|
||||
image: *superset-image
|
||||
container_name: superset_worker_beat
|
||||
command: ["/app/docker/docker-bootstrap.sh", "beat"]
|
||||
env_file: docker/.env-non-dev
|
||||
restart: unless-stopped
|
||||
depends_on: *superset-depends-on
|
||||
user: "root"
|
||||
volumes: *superset-volumes
|
||||
|
||||
postgres:
|
||||
container_name: postgres
|
||||
image: postgres:latest
|
||||
volumes:
|
||||
- metadata_data:/var/lib/postgresql/data
|
||||
environment:
|
||||
POSTGRES_PASSWORD: FoolishPassword
|
||||
POSTGRES_USER: druid
|
||||
POSTGRES_DB: druid
|
||||
|
||||
# Need 3.5 or later for container nodes
|
||||
zookeeper:
|
||||
container_name: zookeeper
|
||||
image: zookeeper:3.5
|
||||
ports:
|
||||
- "2181:2181"
|
||||
environment:
|
||||
- ZOO_MY_ID=1
|
||||
|
||||
kafka:
|
||||
image: wurstmeister/kafka:latest
|
||||
container_name: kafka
|
||||
depends_on:
|
||||
- zookeeper
|
||||
ports:
|
||||
- 9092:9092
|
||||
- 29092:29092
|
||||
environment:
|
||||
KAFKA_BROKER_ID: 1
|
||||
KAFKA_ZOOKEEPER_CONNECT: zookeeper:2181
|
||||
KAFKA_ADVERTISED_LISTENERS: PLAINTEXT://kafka:9092,PLAINTEXT_HOST://localhost:29092
|
||||
KAFKA_LISTENERS: PLAINTEXT://kafka:9092,PLAINTEXT_HOST://localhost:29092
|
||||
KAFKA_LISTENER_SECURITY_PROTOCOL_MAP: PLAINTEXT:PLAINTEXT,PLAINTEXT_HOST:PLAINTEXT
|
||||
KAFKA_INTER_BROKER_LISTENER_NAME: PLAINTEXT
|
||||
KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR: 1
|
||||
KAFKA_AUTO_CREATE_TOPICS_ENABLE: 'true'
|
||||
KAFKA_MESSAGE_MAX_BYTES: 2000000
|
||||
#KAFKA_HEAP_OPTS: -Xmx2g
|
||||
healthcheck:
|
||||
test: ["CMD", "kafka-topics.sh", "--list", "--bootstrap-server", "kafka:9092"]
|
||||
start_period: 15s
|
||||
interval: 30s
|
||||
timeout: 30s
|
||||
retries: 45
|
||||
|
||||
coordinator:
|
||||
image: apache/druid:0.23.0
|
||||
container_name: coordinator
|
||||
volumes:
|
||||
- druid_shared:/opt/shared
|
||||
- coordinator_var:/opt/druid/var
|
||||
depends_on:
|
||||
- zookeeper
|
||||
- postgres
|
||||
ports:
|
||||
- "8081:8081"
|
||||
command:
|
||||
- coordinator
|
||||
env_file:
|
||||
- environment
|
||||
|
||||
broker:
|
||||
image: apache/druid:0.23.0
|
||||
container_name: broker
|
||||
volumes:
|
||||
- broker_var:/opt/druid/var
|
||||
depends_on:
|
||||
- zookeeper
|
||||
- postgres
|
||||
- coordinator
|
||||
ports:
|
||||
- "8082:8082"
|
||||
command:
|
||||
- broker
|
||||
env_file:
|
||||
- environment
|
||||
|
||||
historical:
|
||||
image: apache/druid:0.23.0
|
||||
container_name: historical
|
||||
volumes:
|
||||
- druid_shared:/opt/shared
|
||||
- historical_var:/opt/druid/var
|
||||
depends_on:
|
||||
- zookeeper
|
||||
- postgres
|
||||
- coordinator
|
||||
ports:
|
||||
- "8083:8083"
|
||||
command:
|
||||
- historical
|
||||
env_file:
|
||||
- environment
|
||||
|
||||
middlemanager:
|
||||
image: apache/druid:0.23.0
|
||||
container_name: middlemanager
|
||||
volumes:
|
||||
- druid_shared:/opt/shared
|
||||
- middle_var:/opt/druid/var
|
||||
depends_on:
|
||||
- zookeeper
|
||||
- postgres
|
||||
- coordinator
|
||||
ports:
|
||||
- "8091:8091"
|
||||
- "8100-8105:8100-8105"
|
||||
command:
|
||||
- middleManager
|
||||
env_file:
|
||||
- environment
|
||||
|
||||
router:
|
||||
image: apache/druid:0.23.0
|
||||
container_name: router
|
||||
volumes:
|
||||
- router_var:/opt/druid/var
|
||||
depends_on:
|
||||
- zookeeper
|
||||
- postgres
|
||||
- coordinator
|
||||
ports:
|
||||
- "8888:8888"
|
||||
command:
|
||||
- router
|
||||
env_file:
|
||||
- environment
|
||||
|
||||
# db:
|
||||
# #image: pathogen/manticore:kibana
|
||||
# image: manticoresearch/manticore:dev
|
||||
# #build:
|
||||
# # context: ./docker/manticore
|
||||
# # args:
|
||||
# # DEV: 1
|
||||
# restart: always
|
||||
# ports:
|
||||
# - 9308
|
||||
# - 9312
|
||||
# - 9306
|
||||
# ulimits:
|
||||
# nproc: 65535
|
||||
# nofile:
|
||||
# soft: 65535
|
||||
# hard: 65535
|
||||
# memlock:
|
||||
# soft: -1
|
||||
# hard: -1
|
||||
# environment:
|
||||
# - MCL=1
|
||||
# volumes:
|
||||
# - ./docker/data:/var/lib/manticore
|
||||
# - ./docker/manticore.conf:/etc/manticoresearch/manticore.conf
|
||||
|
||||
tmp:
|
||||
image: busybox
|
||||
command: chmod -R 777 /var/run/redis
|
||||
volumes:
|
||||
- /var/run/redis
|
||||
|
||||
redis:
|
||||
image: redis
|
||||
command: redis-server /etc/redis.conf
|
||||
ulimits:
|
||||
nproc: 65535
|
||||
nofile:
|
||||
soft: 65535
|
||||
hard: 65535
|
||||
volumes:
|
||||
- ${PORTAINER_GIT_DIR}/docker/redis.conf:/etc/redis.conf
|
||||
- redis_data:/data
|
||||
volumes_from:
|
||||
- tmp
|
||||
healthcheck:
|
||||
test: "redis-cli -s /var/run/redis/redis.sock ping"
|
||||
interval: 2s
|
||||
timeout: 2s
|
||||
retries: 15
|
||||
|
||||
networks:
|
||||
default:
|
||||
external:
|
||||
name: pathogen
|
||||
|
||||
volumes:
|
||||
superset_home:
|
||||
external: false
|
||||
db_home:
|
||||
external: false
|
||||
redis_superset:
|
||||
external: false
|
||||
redis_data: {}
|
||||
metadata_data: {}
|
||||
middle_var: {}
|
||||
historical_var: {}
|
||||
broker_var: {}
|
||||
coordinator_var: {}
|
||||
router_var: {}
|
||||
druid_shared: {}
|
||||
46
docker/.env-non-dev
Normal file
46
docker/.env-non-dev
Normal file
@@ -0,0 +1,46 @@
|
||||
#
|
||||
# Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
# contributor license agreements. See the NOTICE file distributed with
|
||||
# this work for additional information regarding copyright ownership.
|
||||
# The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
# (the "License"); you may not use this file except in compliance with
|
||||
# the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
COMPOSE_PROJECT_NAME=superset
|
||||
|
||||
# database configurations (do not modify)
|
||||
DATABASE_DB=superset
|
||||
DATABASE_HOST=db
|
||||
DATABASE_PASSWORD=superset
|
||||
DATABASE_USER=superset
|
||||
|
||||
# database engine specific environment variables
|
||||
# change the below if you prefers another database engine
|
||||
DATABASE_PORT=5432
|
||||
DATABASE_DIALECT=postgresql
|
||||
POSTGRES_DB=superset
|
||||
POSTGRES_USER=superset
|
||||
POSTGRES_PASSWORD=superset
|
||||
#MYSQL_DATABASE=superset
|
||||
#MYSQL_USER=superset
|
||||
#MYSQL_PASSWORD=superset
|
||||
#MYSQL_RANDOM_ROOT_PASSWORD=yes
|
||||
|
||||
# Add the mapped in /app/pythonpath_docker which allows devs to override stuff
|
||||
PYTHONPATH=/app/pythonpath:/app/docker/pythonpath_dev
|
||||
REDIS_HOST=redis
|
||||
REDIS_PORT=6379
|
||||
|
||||
FLASK_ENV=production
|
||||
SUPERSET_ENV=production
|
||||
SUPERSET_LOAD_EXAMPLES=yes
|
||||
CYPRESS_CONFIG=false
|
||||
SUPERSET_PORT=8088
|
||||
@@ -1,19 +1,19 @@
|
||||
# syntax=docker/dockerfile:1
|
||||
FROM python:3.10
|
||||
FROM python:3
|
||||
|
||||
RUN useradd -d /code xf
|
||||
RUN useradd -d /code pathogen
|
||||
RUN mkdir /code
|
||||
RUN chown xf:xf /code
|
||||
RUN chown pathogen:pathogen /code
|
||||
|
||||
RUN mkdir /venv
|
||||
RUN chown xf:xf /venv
|
||||
RUN chown pathogen:pathogen /venv
|
||||
|
||||
USER xf
|
||||
USER pathogen
|
||||
ENV PYTHONDONTWRITEBYTECODE=1
|
||||
ENV PYTHONUNBUFFERED=1
|
||||
WORKDIR /code
|
||||
COPY requirements.txt /code/
|
||||
COPY docker/discord-patched.tgz /code/
|
||||
COPY discord-patched.tgz /code/
|
||||
|
||||
RUN python -m venv /venv
|
||||
RUN . /venv/bin/activate && pip install -r requirements.txt
|
||||
348
docker/docker-compose.prod.yml
Normal file
348
docker/docker-compose.prod.yml
Normal file
@@ -0,0 +1,348 @@
|
||||
version: "2.2"
|
||||
|
||||
# volumes:
|
||||
# metadata_data: {}
|
||||
# middle_var: {}
|
||||
# historical_var: {}
|
||||
# broker_var: {}
|
||||
# coordinator_var: {}
|
||||
# router_var: {}
|
||||
# druid_shared: {}
|
||||
|
||||
x-superset-image: &superset-image apache/superset:${TAG:-latest-dev}
|
||||
x-superset-depends-on: &superset-depends-on
|
||||
- db
|
||||
- redis_superset
|
||||
x-superset-volumes: &superset-volumes
|
||||
# /app/pythonpath_docker will be appended to the PYTHONPATH in the final container
|
||||
- ./docker/superset:/app/docker
|
||||
- superset_home:/app/superset_home
|
||||
|
||||
services:
|
||||
app:
|
||||
image: pathogen/monolith:latest
|
||||
container_name: monolith
|
||||
build: ./docker
|
||||
volumes:
|
||||
- ${PORTAINER_GIT_DIR}:/code
|
||||
env_file:
|
||||
- ../stack.env
|
||||
volumes_from:
|
||||
- tmp
|
||||
depends_on:
|
||||
broker:
|
||||
condition: service_started
|
||||
kafka:
|
||||
condition: service_healthy
|
||||
tmp:
|
||||
condition: service_started
|
||||
redis:
|
||||
condition: service_healthy
|
||||
# - db
|
||||
|
||||
threshold:
|
||||
image: pathogen/threshold:latest
|
||||
container_name: threshold
|
||||
build: ./legacy/docker
|
||||
volumes:
|
||||
- ${PORTAINER_GIT_DIR}:/code
|
||||
- ${THRESHOLD_CONFIG_DIR}:/code/legacy/conf/live
|
||||
#- ${THRESHOLD_TEMPLATE_DIR}:/code/conf/templates
|
||||
- ${THRESHOLD_CERT_DIR}:/code/legacy/conf/cert
|
||||
ports:
|
||||
- "${THRESHOLD_LISTENER_PORT}:${THRESHOLD_LISTENER_PORT}"
|
||||
- "${THRESHOLD_RELAY_PORT}:${THRESHOLD_RELAY_PORT}"
|
||||
- "${THRESHOLD_API_PORT}:${THRESHOLD_API_PORT}"
|
||||
env_file:
|
||||
- ../stack.env
|
||||
volumes_from:
|
||||
- tmp
|
||||
depends_on:
|
||||
tmp:
|
||||
condition: service_started
|
||||
redis:
|
||||
condition: service_healthy
|
||||
|
||||
# db:
|
||||
#image: pathogen/manticore:kibana
|
||||
# image: manticoresearch/manticore:latest
|
||||
#build:
|
||||
# context: ./docker/manticore
|
||||
# args:
|
||||
# DEV: 1
|
||||
# restart: always
|
||||
|
||||
|
||||
# turnilo:
|
||||
# container_name: turnilo
|
||||
# image: uchhatre/turnilo:latest
|
||||
# ports:
|
||||
# - 9093:9090
|
||||
# environment:
|
||||
# - DRUID_BROKER_URL=http://broker:8082
|
||||
# depends_on:
|
||||
# - broker
|
||||
|
||||
# metabase:
|
||||
# container_name: metabase
|
||||
# image: metabase/metabase:latest
|
||||
# ports:
|
||||
# - 3096:3000
|
||||
# environment:
|
||||
# JAVA_OPTS: -Xmx1g
|
||||
# MB_DB_TYPE: postgres
|
||||
# MB_DB_DBNAME: metabase
|
||||
# MB_DB_PORT: 5432
|
||||
# MB_DB_USER: druid
|
||||
# MB_DB_PASS: FoolishPassword
|
||||
# MB_DB_HOST: postgres
|
||||
# depends_on:
|
||||
# - broker
|
||||
|
||||
redis_superset:
|
||||
image: redis:latest
|
||||
container_name: superset_cache
|
||||
restart: unless-stopped
|
||||
volumes:
|
||||
- redis:/data
|
||||
db:
|
||||
env_file: .env-non-dev
|
||||
image: postgres:10
|
||||
container_name: superset_db
|
||||
restart: unless-stopped
|
||||
volumes:
|
||||
- db_home:/var/lib/postgresql/data
|
||||
|
||||
superset:
|
||||
env_file: .env-non-dev
|
||||
image: *superset-image
|
||||
container_name: superset_app
|
||||
command: ["/app/docker/docker-bootstrap.sh", "app-gunicorn"]
|
||||
user: "root"
|
||||
restart: unless-stopped
|
||||
ports:
|
||||
- 8088:8088
|
||||
depends_on: *superset-depends-on
|
||||
volumes: *superset-volumes
|
||||
|
||||
superset-init:
|
||||
image: *superset-image
|
||||
container_name: superset_init
|
||||
command: ["/app/docker/docker-init.sh"]
|
||||
env_file: .env-non-dev
|
||||
depends_on: *superset-depends-on
|
||||
user: "root"
|
||||
volumes: *superset-volumes
|
||||
|
||||
superset-worker:
|
||||
image: *superset-image
|
||||
container_name: superset_worker
|
||||
command: ["/app/docker/docker-bootstrap.sh", "worker"]
|
||||
env_file: .env-non-dev
|
||||
restart: unless-stopped
|
||||
depends_on: *superset-depends-on
|
||||
user: "root"
|
||||
volumes: *superset-volumes
|
||||
|
||||
superset-worker-beat:
|
||||
image: *superset-image
|
||||
container_name: superset_worker_beat
|
||||
command: ["/app/docker/docker-bootstrap.sh", "beat"]
|
||||
env_file: .env-non-dev
|
||||
restart: unless-stopped
|
||||
depends_on: *superset-depends-on
|
||||
user: "root"
|
||||
volumes: *superset-volumes
|
||||
|
||||
postgres:
|
||||
container_name: postgres
|
||||
image: postgres:latest
|
||||
volumes:
|
||||
- /block/store/metadata_data:/var/lib/postgresql/data
|
||||
environment:
|
||||
- POSTGRES_PASSWORD=FoolishPassword
|
||||
- POSTGRES_USER=druid
|
||||
- POSTGRES_DB=druid
|
||||
|
||||
# Need 3.5 or later for container nodes
|
||||
zookeeper:
|
||||
container_name: zookeeper
|
||||
image: zookeeper:3.5
|
||||
ports:
|
||||
- "2181:2181"
|
||||
environment:
|
||||
- ZOO_MY_ID=1
|
||||
|
||||
kafka:
|
||||
image: wurstmeister/kafka:latest
|
||||
container_name: kafka
|
||||
depends_on:
|
||||
- zookeeper
|
||||
ports:
|
||||
- 9092:9092
|
||||
- 29092:29092
|
||||
environment:
|
||||
KAFKA_BROKER_ID: 1
|
||||
KAFKA_ZOOKEEPER_CONNECT: zookeeper:2181
|
||||
KAFKA_ADVERTISED_LISTENERS: PLAINTEXT://kafka:9092,PLAINTEXT_HOST://localhost:29092
|
||||
KAFKA_LISTENERS: PLAINTEXT://kafka:9092,PLAINTEXT_HOST://localhost:29092
|
||||
KAFKA_LISTENER_SECURITY_PROTOCOL_MAP: PLAINTEXT:PLAINTEXT,PLAINTEXT_HOST:PLAINTEXT
|
||||
KAFKA_INTER_BROKER_LISTENER_NAME: PLAINTEXT
|
||||
KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR: 1
|
||||
KAFKA_AUTO_CREATE_TOPICS_ENABLE: 'true'
|
||||
KAFKA_MESSAGE_MAX_BYTES: 2000000
|
||||
#KAFKA_HEAP_OPTS: -Xmx2g
|
||||
healthcheck:
|
||||
test: ["CMD", "kafka-topics.sh", "--list", "--bootstrap-server", "kafka:9092"]
|
||||
start_period: 15s
|
||||
interval: 30s
|
||||
timeout: 30s
|
||||
retries: 45
|
||||
|
||||
coordinator:
|
||||
image: apache/druid:0.23.0
|
||||
container_name: coordinator
|
||||
volumes:
|
||||
- /block/store/druid_shared:/opt/shared
|
||||
- /block/store/coordinator_var:/opt/druid/var
|
||||
depends_on:
|
||||
- zookeeper
|
||||
- postgres
|
||||
ports:
|
||||
- "8081:8081"
|
||||
command:
|
||||
- coordinator
|
||||
env_file:
|
||||
- environment
|
||||
|
||||
broker:
|
||||
image: apache/druid:0.23.0
|
||||
container_name: broker
|
||||
volumes:
|
||||
- /block/store/broker_var:/opt/druid/var
|
||||
depends_on:
|
||||
- zookeeper
|
||||
- postgres
|
||||
- coordinator
|
||||
ports:
|
||||
- "8082:8082"
|
||||
command:
|
||||
- broker
|
||||
env_file:
|
||||
- environment
|
||||
|
||||
historical:
|
||||
image: apache/druid:0.23.0
|
||||
container_name: historical
|
||||
volumes:
|
||||
- /block/store/druid_shared:/opt/shared
|
||||
- /block/store/historical_var:/opt/druid/var
|
||||
depends_on:
|
||||
- zookeeper
|
||||
- postgres
|
||||
- coordinator
|
||||
ports:
|
||||
- "8083:8083"
|
||||
command:
|
||||
- historical
|
||||
env_file:
|
||||
- environment
|
||||
|
||||
middlemanager:
|
||||
image: apache/druid:0.23.0
|
||||
container_name: middlemanager
|
||||
volumes:
|
||||
- /block/store/druid_shared:/opt/shared
|
||||
- /block/store/middle_var:/opt/druid/var
|
||||
depends_on:
|
||||
- zookeeper
|
||||
- postgres
|
||||
- coordinator
|
||||
ports:
|
||||
- "8091:8091"
|
||||
- "8100-8105:8100-8105"
|
||||
command:
|
||||
- middleManager
|
||||
env_file:
|
||||
- environment
|
||||
|
||||
router:
|
||||
image: apache/druid:0.23.0
|
||||
container_name: router
|
||||
volumes:
|
||||
- /block/store/router_var:/opt/druid/var
|
||||
depends_on:
|
||||
- zookeeper
|
||||
- postgres
|
||||
- coordinator
|
||||
ports:
|
||||
- "8888:8888"
|
||||
command:
|
||||
- router
|
||||
env_file:
|
||||
- environment
|
||||
|
||||
# db:
|
||||
# #image: pathogen/manticore:kibana
|
||||
# image: manticoresearch/manticore:dev
|
||||
# #build:
|
||||
# # context: ./docker/manticore
|
||||
# # args:
|
||||
# # DEV: 1
|
||||
# restart: always
|
||||
# ports:
|
||||
# - 9308
|
||||
# - 9312
|
||||
# - 9306
|
||||
# ulimits:
|
||||
# nproc: 65535
|
||||
# nofile:
|
||||
# soft: 65535
|
||||
# hard: 65535
|
||||
# memlock:
|
||||
# soft: -1
|
||||
# hard: -1
|
||||
# environment:
|
||||
# - MCL=1
|
||||
# volumes:
|
||||
# - ./docker/data:/var/lib/manticore
|
||||
# - ./docker/manticore.conf:/etc/manticoresearch/manticore.conf
|
||||
|
||||
tmp:
|
||||
image: busybox
|
||||
command: chmod -R 777 /var/run/redis
|
||||
volumes:
|
||||
- /var/run/redis
|
||||
|
||||
redis:
|
||||
image: redis
|
||||
command: redis-server /etc/redis.conf
|
||||
ulimits:
|
||||
nproc: 65535
|
||||
nofile:
|
||||
soft: 65535
|
||||
hard: 65535
|
||||
volumes:
|
||||
- ${PORTAINER_GIT_DIR}/docker/redis.conf:/etc/redis.conf
|
||||
- redis_data:/data
|
||||
volumes_from:
|
||||
- tmp
|
||||
healthcheck:
|
||||
test: "redis-cli -s /var/run/redis/redis.sock ping"
|
||||
interval: 2s
|
||||
timeout: 2s
|
||||
retries: 15
|
||||
|
||||
networks:
|
||||
default:
|
||||
external:
|
||||
name: pathogen
|
||||
|
||||
volumes:
|
||||
redis_data: {}
|
||||
superset_home:
|
||||
external: false
|
||||
db_home:
|
||||
external: false
|
||||
redis:
|
||||
external: false
|
||||
87
docker/environment
Normal file
87
docker/environment
Normal file
@@ -0,0 +1,87 @@
|
||||
#
|
||||
# Licensed to the Apache Software Foundation (ASF) under one
|
||||
# or more contributor license agreements. See the NOTICE file
|
||||
# distributed with this work for additional information
|
||||
# regarding copyright ownership. The ASF licenses this file
|
||||
# to you under the Apache License, Version 2.0 (the
|
||||
# "License"); you may not use this file except in compliance
|
||||
# with the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing,
|
||||
# software distributed under the License is distributed on an
|
||||
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
# KIND, either express or implied. See the License for the
|
||||
# specific language governing permissions and limitations
|
||||
# under the License.
|
||||
#
|
||||
|
||||
# Java tuning
|
||||
#DRUID_XMX=1g
|
||||
#DRUID_XMS=1g
|
||||
#DRUID_MAXNEWSIZE=250m
|
||||
#DRUID_NEWSIZE=250m
|
||||
#DRUID_MAXDIRECTMEMORYSIZE=1g
|
||||
|
||||
#druid_emitter_logging_logLevel=debug
|
||||
|
||||
#druid_extensions_loadList=["druid-histogram", "druid-datasketches", "druid-lookups-cached-global", "postgresql-metadata-storage", "druid-kafka-indexing-service"]
|
||||
|
||||
#druid_zk_service_host=zookeeper
|
||||
|
||||
#druid_metadata_storage_host=
|
||||
#druid_metadata_storage_type=postgresql
|
||||
#druid_metadata_storage_connector_connectURI=jdbc:postgresql://postgres:5432/druid
|
||||
#druid_metadata_storage_connector_user=druid
|
||||
#druid_metadata_storage_connector_password=FoolishPassword
|
||||
|
||||
#druid_coordinator_balancer_strategy=cachingCost
|
||||
|
||||
#druid_indexer_runner_javaOptsArray=["-server", "-Xmx1g", "-Xms1g", "-XX:MaxDirectMemorySize=3g", "-Duser.timezone=UTC", "-Dfile.encoding=UTF-8", "-Djava.util.logging.manager=org.apache.logging.log4j.jul.LogManager"]
|
||||
#druid_indexer_fork_property_druid_processing_buffer_sizeBytes=128MiB
|
||||
#druid_processing_buffer_sizeBytes=268435456 # 256MiB
|
||||
|
||||
#druid_storage_type=local
|
||||
#druid_storage_storageDirectory=/opt/shared/segments
|
||||
#druid_indexer_logs_type=file
|
||||
#druid_indexer_logs_directory=/opt/shared/indexing-logs
|
||||
|
||||
#druid_processing_numThreads=1
|
||||
#druid_processing_numMergeBuffers=1
|
||||
|
||||
#DRUID_LOG4J=<?xml version="1.0" encoding="UTF-8" ?><Configuration status="WARN"><Appenders><Console name="Console" target="SYSTEM_OUT"><PatternLayout pattern="%d{ISO8601} %p [%t] %c - %m%n"/></Console></Appenders><Loggers><Root level="info"><AppenderRef ref="Console"/></Root><Logger name="org.apache.druid.jetty.RequestLog" additivity="false" level="DEBUG"><AppenderRef ref="Console"/></Logger></Loggers></Configuration>
|
||||
# Java tuning
|
||||
#DRUID_XMX=1g
|
||||
#DRUID_XMS=1g
|
||||
#DRUID_MAXNEWSIZE=250m
|
||||
#DRUID_NEWSIZE=250m
|
||||
#DRUID_MAXDIRECTMEMORYSIZE=6172m
|
||||
DRUID_SINGLE_NODE_CONF=nano-quickstart
|
||||
|
||||
druid_emitter_logging_logLevel=debug
|
||||
|
||||
druid_extensions_loadList=["druid-histogram", "druid-datasketches", "druid-lookups-cached-global", "postgresql-metadata-storage", "druid-kafka-indexing-service"]
|
||||
|
||||
druid_zk_service_host=zookeeper
|
||||
|
||||
druid_metadata_storage_host=
|
||||
druid_metadata_storage_type=postgresql
|
||||
druid_metadata_storage_connector_connectURI=jdbc:postgresql://postgres:5432/druid
|
||||
druid_metadata_storage_connector_user=druid
|
||||
druid_metadata_storage_connector_password=FoolishPassword
|
||||
|
||||
druid_coordinator_balancer_strategy=cachingCost
|
||||
|
||||
druid_indexer_runner_javaOptsArray=["-server", "-Xmx1g", "-Xms1g", "-XX:MaxDirectMemorySize=3g", "-Duser.timezone=UTC", "-Dfile.encoding=UTF-8", "-Djava.util.logging.manager=org.apache.logging.log4j.jul.LogManager"]
|
||||
druid_indexer_fork_property_druid_processing_buffer_sizeBytes=256MiB
|
||||
|
||||
druid_storage_type=local
|
||||
druid_storage_storageDirectory=/opt/shared/segments
|
||||
druid_indexer_logs_type=file
|
||||
druid_indexer_logs_directory=/opt/shared/indexing-logs
|
||||
|
||||
druid_processing_numThreads=2
|
||||
druid_processing_numMergeBuffers=2
|
||||
|
||||
DRUID_LOG4J=<?xml version="1.0" encoding="UTF-8" ?><Configuration status="WARN"><Appenders><Console name="Console" target="SYSTEM_OUT"><PatternLayout pattern="%d{ISO8601} %p [%t] %c - %m%n"/></Console></Appenders><Loggers><Root level="info"><AppenderRef ref="Console"/></Root><Logger name="org.apache.druid.jetty.RequestLog" additivity="false" level="DEBUG"><AppenderRef ref="Console"/></Logger></Loggers></Configuration>
|
||||
265
docker/manticore.conf
Normal file
265
docker/manticore.conf
Normal file
@@ -0,0 +1,265 @@
|
||||
#!/bin/sh
|
||||
ip=`hostname -i|rev|cut -d\ -f 1|rev`
|
||||
cat << EOF
|
||||
searchd {
|
||||
# https://manual.manticoresearch.com/Server_settings/Searchd#access_plain_attrs
|
||||
# access_plain_attrs = mmap_preread
|
||||
|
||||
# https://manual.manticoresearch.com/Server_settings/Searchd#access_blob_attrs
|
||||
# access_blob_attrs = mmap_preread
|
||||
|
||||
# https://manual.manticoresearch.com/Server_settings/Searchd#access_doclists
|
||||
# access_doclists = file
|
||||
|
||||
# https://manual.manticoresearch.com/Server_settings/Searchd#access_hitlists
|
||||
# access_hitlists = file
|
||||
|
||||
# https://manual.manticoresearch.com/Server_settings/Searchd#agent_connect_timeout
|
||||
# agent_connect_timeout =
|
||||
|
||||
# https://manual.manticoresearch.com/Server_settings/Searchd#agent_query_timeout
|
||||
# agent_query_timeout =
|
||||
|
||||
# https://manual.manticoresearch.com/Server_settings/Searchd#agent_retry_count
|
||||
# agent_retry_count = 0
|
||||
|
||||
# https://manual.manticoresearch.com/Server_settings/Searchd#agent_retry_delay
|
||||
# agent_retry_delay = 500
|
||||
|
||||
# https://manual.manticoresearch.com/Server_settings/Searchd#attr_flush_period
|
||||
# attr_flush_period = 0
|
||||
|
||||
# https://manual.manticoresearch.com/Server_settings/Searchd#binlog_flush
|
||||
# binlog_flush = 2
|
||||
|
||||
# https://manual.manticoresearch.com/Server_settings/Searchd#binlog_max_log_size
|
||||
# binlog_max_log_size = 268435456
|
||||
|
||||
# https://manual.manticoresearch.com/Server_settings/Searchd#binlog_path
|
||||
# binlog_path =
|
||||
|
||||
# https://manual.manticoresearch.com/Server_settings/Searchd#client_timeout
|
||||
# client_timeout = 300
|
||||
|
||||
# https://manual.manticoresearch.com/Server_settings/Searchd#collation_libc_locale
|
||||
# collation_libc_locale = C
|
||||
|
||||
# https://manual.manticoresearch.com/Server_settings/Searchd#collation_server
|
||||
# collation_server = libc_ci
|
||||
|
||||
# https://manual.manticoresearch.com/Server_settings/Searchd#data_dir
|
||||
data_dir = /var/lib/manticore
|
||||
|
||||
# https://manual.manticoresearch.com/Server_settings/Searchd#docstore_cache_size
|
||||
# docstore_cache_size = 16m
|
||||
|
||||
# https://manual.manticoresearch.com/Server_settings/Searchd#expansion_limit
|
||||
# expansion_limit = 0
|
||||
|
||||
# https://manual.manticoresearch.com/Server_settings/Searchd#grouping_in_utc
|
||||
# grouping_in_utc = 0
|
||||
|
||||
# https://manual.manticoresearch.com/Server_settings/Searchd#ha_period_karma
|
||||
# ha_period_karma = 60
|
||||
|
||||
# https://manual.manticoresearch.com/Server_settings/Searchd#ha_ping_interval
|
||||
# ha_ping_interval = 1000
|
||||
|
||||
# https://manual.manticoresearch.com/Server_settings/Searchd#hostname_lookup
|
||||
# hostname_lookup =
|
||||
|
||||
# https://manual.manticoresearch.com/Server_settings/Searchd#jobs_queue_size
|
||||
# jobs_queue_size =
|
||||
|
||||
# https://manual.manticoresearch.com/Server_settings/Searchd#listen_backlog
|
||||
# listen_backlog = 5
|
||||
|
||||
# https://manual.manticoresearch.com/Server_settings/Searchd#listen
|
||||
# listen_env = this directive allows to append listeners from environment variables
|
||||
|
||||
listen = 9306:mysql41
|
||||
listen = /var/run/mysqld/mysqld.sock:mysql41
|
||||
listen = $ip:9312
|
||||
listen = 9308:http
|
||||
listen = $ip:9315-9325:replication
|
||||
|
||||
# https://manual.manticoresearch.com/Server_settings/Searchd#listen_tfo
|
||||
# listen_tfo = 0
|
||||
|
||||
# https://manual.manticoresearch.com/Server_settings/Searchd#log
|
||||
log = /var/log/manticore/searchd.log
|
||||
|
||||
# https://manual.manticoresearch.com/Server_settings/Searchd#max_batch_queries
|
||||
# max_batch_queries = 32
|
||||
|
||||
# https://manual.manticoresearch.com/Server_settings/Searchd#threads
|
||||
# threads =
|
||||
|
||||
# https://manual.manticoresearch.com/Server_settings/Searchd#max_filters
|
||||
# max_filters = 256
|
||||
|
||||
# https://manual.manticoresearch.com/Server_settings/Searchd#max_filter_values
|
||||
# max_filter_values = 4096
|
||||
|
||||
# https://manual.manticoresearch.com/Server_settings/Searchd#max_open_files
|
||||
# max_open_files = max
|
||||
|
||||
# https://manual.manticoresearch.com/Server_settings/Searchd#max_packet_size
|
||||
max_packet_size = 128M
|
||||
|
||||
# https://manual.manticoresearch.com/Server_settings/Searchd#mysql_version_string
|
||||
# mysql_version_string =
|
||||
|
||||
# https://manual.manticoresearch.com/Server_settings/Searchd#net_workers
|
||||
# net_workers = 1
|
||||
|
||||
# https://manual.manticoresearch.com/Server_settings/Searchd#net_wait_tm
|
||||
# net_wait_tm = -1
|
||||
|
||||
# https://manual.manticoresearch.com/Server_settings/Searchd#net_throttle_accept
|
||||
# net_throttle_accept = 0
|
||||
|
||||
# https://manual.manticoresearch.com/Server_settings/Searchd#net_throttle_action
|
||||
# net_throttle_action = 0
|
||||
|
||||
# https://manual.manticoresearch.com/Server_settings/Searchd#node_address
|
||||
# node_address =
|
||||
|
||||
# https://manual.manticoresearch.com/Server_settings/Searchd#ondisk_attrs_default
|
||||
# ondisk_attrs_default = 0
|
||||
|
||||
# https://manual.manticoresearch.com/Server_settings/Searchd#persistent_connections_limit
|
||||
# persistent_connections_limit =
|
||||
|
||||
# https://manual.manticoresearch.com/Server_settings/Searchd#pid_file
|
||||
pid_file = /var/run/manticore/searchd.pid
|
||||
|
||||
# https://manual.manticoresearch.com/Server_settings/Searchd#predicted_time_costs
|
||||
# predicted_time_costs = doc=64, hit=48, skip=2048, match=64
|
||||
|
||||
# https://manual.manticoresearch.com/Server_settings/Searchd#preopen_indexes
|
||||
# preopen_indexes = 1
|
||||
|
||||
# https://manual.manticoresearch.com/Server_settings/Searchd#qcache_max_bytes
|
||||
qcache_max_bytes = 128Mb
|
||||
|
||||
# https://manual.manticoresearch.com/Server_settings/Searchd#qcache_thresh_msec
|
||||
qcache_thresh_msec = 150
|
||||
|
||||
# https://manual.manticoresearch.com/Server_settings/Searchd#qcache_ttl_sec
|
||||
qcache_ttl_sec = 120
|
||||
|
||||
# https://manual.manticoresearch.com/Server_settings/Searchd#query_log_format
|
||||
query_log_format = sphinxql
|
||||
|
||||
# https://manual.manticoresearch.com/Server_settings/Searchd#query_log_min_msec
|
||||
# query_log_min_msec = 0
|
||||
|
||||
# https://manual.manticoresearch.com/Server_settings/Searchd#query_log
|
||||
# query_log = /var/log/manticore/query.log
|
||||
|
||||
# https://manual.manticoresearch.com/Server_settings/Searchd#query_log_mode
|
||||
# query_log_mode = 600
|
||||
|
||||
# https://manual.manticoresearch.com/Server_settings/Searchd#max_connections
|
||||
# max_connections =
|
||||
|
||||
# https://manual.manticoresearch.com/Server_settings/Searchd#network_timeout
|
||||
# network_timeout = 5
|
||||
|
||||
# https://manual.manticoresearch.com/Server_settings/Searchd#read_buffer
|
||||
# read_buffer = 256K
|
||||
|
||||
# https://manual.manticoresearch.com/Server_settings/Searchd#read_buffer_docs
|
||||
# read_buffer_docs = 256K
|
||||
|
||||
# https://manual.manticoresearch.com/Server_settings/Searchd#read_buffer_hits
|
||||
# read_buffer_hits = 256K
|
||||
|
||||
# https://manual.manticoresearch.com/Server_settings/Searchd#read_unhinted
|
||||
# read_unhinted 32K
|
||||
|
||||
# https://manual.manticoresearch.com/Server_settings/Searchd#rt_flush_period
|
||||
# rt_flush_period =
|
||||
|
||||
# https://manual.manticoresearch.com/Server_settings/Searchd#rt_merge_iops
|
||||
# rt_merge_iops = 0
|
||||
|
||||
# https://manual.manticoresearch.com/Server_settings/Searchd#rt_merge_maxiosize
|
||||
# rt_merge_maxiosize = 0
|
||||
|
||||
# https://manual.manticoresearch.com/Server_settings/Searchd#seamless_rotate
|
||||
# seamless_rotate = 1
|
||||
|
||||
# https://manual.manticoresearch.com/Server_settings/Searchd#server_id
|
||||
# server_id =
|
||||
|
||||
# https://manual.manticoresearch.com/Server_settings/Searchd#shutdown_timeout
|
||||
# shutdown_timeout = 3
|
||||
|
||||
# https://manual.manticoresearch.com/Server_settings/Searchd#shutdown_token
|
||||
# shutdown_token =
|
||||
|
||||
# https://manual.manticoresearch.com/Server_settings/Searchd#snippets_file_prefix
|
||||
# snippets_file_prefix =
|
||||
|
||||
# https://manual.manticoresearch.com/Server_settings/Searchd#sphinxql_state
|
||||
# sphinxql_state =
|
||||
|
||||
# https://manual.manticoresearch.com/Server_settings/Searchd#sphinxql_timeout
|
||||
# sphinxql_timeout = 900
|
||||
|
||||
# https://manual.manticoresearch.com/Server_settings/Searchd#ssl_ca
|
||||
# ssl_ca =
|
||||
|
||||
# https://manual.manticoresearch.com/Server_settings/Searchd#ssl_cert
|
||||
# ssl_cert =
|
||||
|
||||
# https://manual.manticoresearch.com/Server_settings/Searchd#ssl_key
|
||||
# ssl_key =
|
||||
|
||||
# https://manual.manticoresearch.com/Server_settings/Searchd#subtree_docs_cache
|
||||
# subtree_docs_cache = 0
|
||||
|
||||
# https://manual.manticoresearch.com/Server_settings/Searchd#subtree_hits_cache
|
||||
# subtree_hits_cache = 0
|
||||
|
||||
# https://manual.manticoresearch.com/Server_settings/Searchd#thread_stack
|
||||
# thread_stack =
|
||||
|
||||
# https://manual.manticoresearch.com/Server_settings/Searchd#unlink_old
|
||||
# unlink_old = 1
|
||||
|
||||
# https://manual.manticoresearch.com/Server_settings/Searchd#watchdog
|
||||
# watchdog = 1
|
||||
}
|
||||
|
||||
common {
|
||||
|
||||
# https://manual.manticoresearch.com/Server_settings/Common#lemmatizer_base
|
||||
# lemmatizer_base = /usr/local/share
|
||||
|
||||
# https://manual.manticoresearch.com/Server_settings/Common#progressive_merge
|
||||
# progressive_merge =
|
||||
|
||||
# https://manual.manticoresearch.com/Server_settings/Common#json_autoconv_keynames
|
||||
# json_autoconv_keynames =
|
||||
|
||||
# https://manual.manticoresearch.com/Server_settings/Common#json_autoconv_numbers
|
||||
# json_autoconv_numbers = 0
|
||||
|
||||
# https://manual.manticoresearch.com/Server_settings/Common#on_json_attr_error
|
||||
# on_json_attr_error = ignore_attr
|
||||
|
||||
# https://manual.manticoresearch.com/Server_settings/Common#plugin_dir
|
||||
# plugin_dir =
|
||||
}
|
||||
|
||||
# indexer {
|
||||
# lemmatizer_cache = 1024M
|
||||
# max_iops = 0
|
||||
# max_iosize = 0
|
||||
# mem_limit = 1024M
|
||||
# }
|
||||
|
||||
EOF
|
||||
12
docker/manticore/.mysql_history
Normal file
12
docker/manticore/.mysql_history
Normal file
@@ -0,0 +1,12 @@
|
||||
_HiStOrY_V2_
|
||||
SELECT * FROM films WHERE MATCH('"shark monkey boy robot"/2') AND release_year IN(2006,2007) AND rental_rate BETWEEN 2.0 and 3.0;
|
||||
SELECT title, HIGHLIGHT({},'description') FROM films WHERE MATCH('"shark monkey boy robot"/2');
|
||||
SELECT * FROM films WHERE MATCH('" shark monkey boy robot "/2');
|
||||
SELECT * FROM films WHERE MATCH('Emotional drama') FACET release_year FACET category_id;
|
||||
SELECT * FROM films WHERE MATCH('Emotional drama') GROUP BY release_year;
|
||||
SELECT * FROM films WHERE MATCH('Emotional drama -dog -shark');
|
||||
SELECT * FROM films WHERE MATCH('Emotional drama');
|
||||
SELECT * FROM films;
|
||||
DESCRIBE films;
|
||||
SHOW TABLES;
|
||||
SOURCE /sandbox.sql
|
||||
76
docker/manticore/Dockerfile
Normal file
76
docker/manticore/Dockerfile
Normal file
@@ -0,0 +1,76 @@
|
||||
FROM ubuntu:focal
|
||||
|
||||
ARG DEV
|
||||
ARG DAEMON_URL
|
||||
ARG MCL_URL
|
||||
|
||||
RUN groupadd -r manticore && useradd -r -g manticore manticore
|
||||
|
||||
ENV GOSU_VERSION 1.11
|
||||
ENV MCL_URL=${MCL_URL:-"https://repo.manticoresearch.com/repository/manticoresearch_focal/dists/focal/main/binary-amd64/manticore-columnar-lib_1.15.4-220522-2fef34e_amd64.deb"}
|
||||
ENV DAEMON_URL=${DAEMON_URL:-"https://repo.manticoresearch.com/repository/manticoresearch_focal/dists/manticore_5.0.2-220530-348514c86_amd64.tgz"}
|
||||
ENV BETA_URL=${BETA_URL:-"https://repo.manticoresearch.com/repository/kibana_beta/ubuntu/focal.zip"}
|
||||
|
||||
|
||||
RUN set -x \
|
||||
&& apt-get update && apt-get -y install --no-install-recommends ca-certificates binutils wget gnupg dirmngr unzip && rm -rf /var/lib/apt/lists/* \
|
||||
&& wget -O /usr/local/bin/gosu "https://github.com/tianon/gosu/releases/download/$GOSU_VERSION/gosu-$(dpkg --print-architecture)" \
|
||||
&& wget -O /usr/local/bin/gosu.asc "https://github.com/tianon/gosu/releases/download/$GOSU_VERSION/gosu-$(dpkg --print-architecture).asc" \
|
||||
&& export GNUPGHOME="$(mktemp -d)" \
|
||||
&& gpg --batch --keyserver hkps://keys.openpgp.org --recv-keys B42F6819007F00F88E364FD4036A9C25BF357DD4 \
|
||||
&& gpg --batch --verify /usr/local/bin/gosu.asc /usr/local/bin/gosu \
|
||||
&& { command -v gpgconf > /dev/null && gpgconf --kill all || :; } \
|
||||
&& rm -rf "$GNUPGHOME" /usr/local/bin/gosu.asc \
|
||||
&& chmod +x /usr/local/bin/gosu \
|
||||
&& gosu nobody true && \
|
||||
if [ "${DEV}" = "1" ]; then \
|
||||
echo "DEV IS ONE" && \
|
||||
exit && \
|
||||
wget https://repo.manticoresearch.com/manticore-dev-repo.noarch.deb \
|
||||
&& dpkg -i manticore-dev-repo.noarch.deb \
|
||||
&& apt-key adv --fetch-keys 'https://repo.manticoresearch.com/GPG-KEY-manticore' && apt-get -y update && apt-get -y install manticore \
|
||||
&& apt-get update \
|
||||
&& echo $(apt-get -y download --print-uris manticore-columnar-lib | cut -d" " -f1 | cut -d "'" -f 2) > /mcl.url ;\
|
||||
elif [ "${DEV}" = "2" ]; then \
|
||||
echo "DEV IS TWO" && \
|
||||
wget $BETA_URL && unzip focal.zip && rm focal.zip && \
|
||||
dpkg -i build/* && echo $MCL_URL > /mcl.url; rm build/* ;\
|
||||
else \
|
||||
echo "DEV NOT EITHER" && \
|
||||
exit && \
|
||||
wget $DAEMON_URL && ARCHIVE_NAME=$(ls | grep '.tgz' | head -n1 ) && tar -xf $ARCHIVE_NAME && rm $ARCHIVE_NAME && \
|
||||
dpkg -i manticore* && echo $MCL_URL > /mcl.url && rm *.deb ; \
|
||||
fi \
|
||||
&& mkdir -p /var/run/manticore && mkdir -p /var/lib/manticore/replication \
|
||||
&& apt-get update && apt-get -y install libexpat1 libodbc1 libpq5 openssl libcrypto++6 libmysqlclient21 mysql-client \
|
||||
&& apt-get -y purge --auto-remove \
|
||||
&& rm -rf /var/lib/apt/lists/* \
|
||||
&& rm -f /usr/bin/mariabackup /usr/bin/mysqldump /usr/bin/mysqlslap /usr/bin/mysqladmin /usr/bin/mysqlimport \
|
||||
/usr/bin/mysqlshow /usr/bin/mbstream /usr/bin/mysql_waitpid /usr/bin/innotop /usr/bin/mysqlaccess /usr/bin/mytop \
|
||||
/usr/bin/mysqlreport /usr/bin/mysqldumpslow /usr/bin/mysql_find_rows /usr/bin/mysql_fix_extensions \
|
||||
/usr/bin/mysql_embedded /usr/bin/mysqlcheck \
|
||||
&& rm -f /usr/bin/spelldump /usr/bin/wordbreaker \
|
||||
&& mkdir -p /var/run/mysqld/ && chown manticore:manticore /var/run/mysqld/ \
|
||||
&& echo "\n[mysql]\nsilent\nwait\ntable\n" >> /etc/mysql/my.cnf && \
|
||||
wget -P /tmp https://repo.manticoresearch.com/repository/morphology/en.pak.tgz && \
|
||||
wget -P /tmp https://repo.manticoresearch.com/repository/morphology/de.pak.tgz && \
|
||||
wget -P /tmp https://repo.manticoresearch.com/repository/morphology/ru.pak.tgz && \
|
||||
tar -xf /tmp/en.pak.tgz -C /usr/share/manticore/ && \
|
||||
tar -xf /tmp/de.pak.tgz -C /usr/share/manticore/ && \
|
||||
tar -xf /tmp/ru.pak.tgz -C /usr/share/manticore/
|
||||
|
||||
|
||||
COPY manticore.conf /etc/manticoresearch/
|
||||
COPY sandbox.sql /sandbox.sql
|
||||
COPY .mysql_history /root/.mysql_history
|
||||
|
||||
COPY docker-entrypoint.sh /usr/local/bin/
|
||||
RUN ln -s usr/local/bin/docker-entrypoint.sh /entrypoint.sh
|
||||
WORKDIR /var/lib/manticore
|
||||
ENTRYPOINT ["docker-entrypoint.sh"]
|
||||
EXPOSE 9306
|
||||
EXPOSE 9308
|
||||
EXPOSE 9312
|
||||
ENV LANG C.UTF-8
|
||||
ENV LC_ALL C.UTF-8
|
||||
CMD ["sh", "-c", "(echo 'START WAIT' && sleep 5 && echo 'END WAIT' && mysql -P9306 -h0 -e 'set global log_management = 0; set global log_management = 1;') & searchd --nodetach"]
|
||||
278
docker/manticore/README.md
Normal file
278
docker/manticore/README.md
Normal file
@@ -0,0 +1,278 @@
|
||||
# Manticore Search Docker image
|
||||
|
||||
This is the git repo of official [Docker image](https://hub.docker.com/r/manticoresearch/manticore/) for [Manticore Search](https://github.com/manticoresoftware/manticoresearch).
|
||||
|
||||
Manticore Search is an easy to use open source fast database for search. It helps thousands of companies from small to large, such as Craigslist, to search and filter petabytes of text data on a single or hundreds of nodes, do stream full-text filtering, add auto-complete, spell correction, more-like-this, faceting and other search-related technologies to their websites and applications.
|
||||
|
||||
The default configuration includes a sample Real-Time index and listens on the default ports:
|
||||
* `9306` for connections from a MySQL client
|
||||
* `9308` for connections via HTTP
|
||||
* `9312` for connections via a binary protocol (e.g. in case you run a cluster)
|
||||
|
||||
The image comes with libraries for easy indexing data from MySQL, PostgreSQL XML and CSV files.
|
||||
|
||||
# How to run Manticore Search Docker image
|
||||
|
||||
## Quick usage
|
||||
|
||||
The below is the simplest way to start Manticore in a container and log in to it via mysql client:
|
||||
|
||||
```bash
|
||||
docker run --name manticore --rm -d manticoresearch/manticore && sleep 3 && docker exec -it manticore mysql && docker stop manticore
|
||||
```
|
||||
|
||||
When you exit from the mysql client it stops and removes the container, so **use it only for testing / sandboxing purposes**. See below how to use it in production.
|
||||
|
||||
The image comes with a sample index which can be loaded like this:
|
||||
|
||||
```mysql
|
||||
mysql> source /sandbox.sql
|
||||
```
|
||||
|
||||
Also the mysql client has in history several sample queries that you can run on the above index, just use Up/Down keys in the client to see and run them.
|
||||
|
||||
## Production use
|
||||
|
||||
|
||||
### Ports and mounting points
|
||||
|
||||
For data persistence `/var/lib/manticore/` should be mounted to local storage or other desired storage engine.
|
||||
|
||||
```bash
|
||||
docker run --name manticore -v $(pwd)/data:/var/lib/manticore -p 127.0.0.1:9306:9306 -p 127.0.0.1:9308:9308 -d manticoresearch/manticore
|
||||
```
|
||||
|
||||
Configuration file inside the instance is located at `/etc/manticoresearch/manticore.conf`. For custom settings, this file should be mounted to your own configuration file.
|
||||
|
||||
The ports are 9306/9308/9312 for SQL/HTTP/Binary, expose them depending on how you are going to use Manticore. For example:
|
||||
|
||||
```bash
|
||||
docker run --name manticore -v $(pwd)/manticore.conf:/etc/manticoresearch/manticore.conf -v $(pwd)/data:/var/lib/manticore/ -p 127.0.0.1:9306:9306 -p 127.0.0.1:9308:9308 -d manticoresearch/manticore
|
||||
```
|
||||
|
||||
Make sure to remove `127.0.0.1:` if you want the ports to be available for external hosts.
|
||||
|
||||
### Manticore Columnar Library
|
||||
|
||||
The docker image doesn't include [Manticore Columnar Library](https://github.com/manticoresoftware/columnar) which has to be used if you need:
|
||||
* columnar storage
|
||||
* secondary indexes
|
||||
|
||||
but you can easily enable it in runtime by using environment variable `MCL=1`, i.e. `docker run -e MCL=1 ... manticoresearch/manticore`. It will then download and install the library and put it to the data dir (which is normally mapped as a volume in production). Next time you run the container the library will be already there, hence it won't be downloaded again unless you change the Manticore Search version.
|
||||
|
||||
### Docker-compose
|
||||
|
||||
In many cases you might want to use Manticore together with other images specified in a docker-compose YAML file. Here is the minimal recommended specification for Manticore Search in docker-compose.yml:
|
||||
|
||||
```yaml
|
||||
version: '2.2'
|
||||
|
||||
services:
|
||||
manticore:
|
||||
container_name: manticore
|
||||
image: manticoresearch/manticore
|
||||
restart: always
|
||||
ports:
|
||||
- 127.0.0.1:9306:9306
|
||||
- 127.0.0.1:9308:9308
|
||||
ulimits:
|
||||
nproc: 65535
|
||||
nofile:
|
||||
soft: 65535
|
||||
hard: 65535
|
||||
memlock:
|
||||
soft: -1
|
||||
hard: -1
|
||||
environment:
|
||||
- MCL=1
|
||||
volumes:
|
||||
- ./data:/var/lib/manticore
|
||||
# - ./manticore.conf:/etc/manticoresearch/manticore.conf # uncommment if you use a custom config
|
||||
```
|
||||
|
||||
Besides using the exposed ports 9306 and 9308 you can log into the instance by running `docker-compose exec manticore mysql`.
|
||||
|
||||
### HTTP protocol
|
||||
|
||||
Manticore is accessible via HTTP on ports 9308 and 9312. You can map either of them locally and connect with curl:
|
||||
|
||||
```bash
|
||||
docker run --name manticore -p 9308:9308 -d manticoresearch/manticore
|
||||
```
|
||||
|
||||
Create a table:
|
||||
```bash
|
||||
curl -X POST 'http://127.0.0.1:9308/sql' -d 'mode=raw&query=CREATE TABLE testrt ( title text, content text, gid integer)'
|
||||
```
|
||||
Insert a document:
|
||||
|
||||
```bash
|
||||
curl -X POST 'http://127.0.0.1:9308/json/insert' -d'{"index":"testrt","id":1,"doc":{"title":"Hello","content":"world","gid":1}}'
|
||||
```
|
||||
|
||||
Perform a simple search:
|
||||
|
||||
```bash
|
||||
curl -X POST 'http://127.0.0.1:9308/json/search' -d '{"index":"testrt","query":{"match":{"*":"hello world"}}}'
|
||||
```
|
||||
|
||||
### Logging
|
||||
|
||||
By default, Manticore logs to `/dev/stdout`, so you can watch the log on the host with:
|
||||
|
||||
```bash
|
||||
docker logs manticore
|
||||
```
|
||||
|
||||
If you want to get log of your queries the same way you can do it by passing environment variable `QUERY_LOG_TO_STDOUT=true`.
|
||||
|
||||
### Multi-node cluster with replication
|
||||
|
||||
Here is a simple `docker-compose.yml` for defining a two node cluster:
|
||||
|
||||
```yaml
|
||||
version: '2.2'
|
||||
|
||||
services:
|
||||
|
||||
manticore-1:
|
||||
image: manticoresearch/manticore
|
||||
restart: always
|
||||
ulimits:
|
||||
nproc: 65535
|
||||
nofile:
|
||||
soft: 65535
|
||||
hard: 65535
|
||||
memlock:
|
||||
soft: -1
|
||||
hard: -1
|
||||
environment:
|
||||
- MCL=1
|
||||
networks:
|
||||
- manticore
|
||||
manticore-2:
|
||||
image: manticoresearch/manticore
|
||||
restart: always
|
||||
ulimits:
|
||||
nproc: 65535
|
||||
nofile:
|
||||
soft: 65535
|
||||
hard: 65535
|
||||
memlock:
|
||||
soft: -1
|
||||
hard: -1
|
||||
environment:
|
||||
- MCL=1
|
||||
networks:
|
||||
- manticore
|
||||
networks:
|
||||
manticore:
|
||||
driver: bridge
|
||||
```
|
||||
* Start it: `docker-compose up`
|
||||
* Create a cluster with a table:
|
||||
```mysql
|
||||
$ docker-compose exec manticore-1 mysql
|
||||
|
||||
mysql> CREATE TABLE testrt ( title text, content text, gid integer);
|
||||
|
||||
mysql> CREATE CLUSTER posts;
|
||||
Query OK, 0 rows affected (0.24 sec)
|
||||
|
||||
mysql> ALTER CLUSTER posts ADD testrt;
|
||||
Query OK, 0 rows affected (0.07 sec)
|
||||
|
||||
MySQL [(none)]> exit
|
||||
Bye
|
||||
```
|
||||
* Join to the the cluster on the 2nd instance and insert smth to the table:
|
||||
```mysql
|
||||
$ docker-compose exec manticore-2 mysql
|
||||
|
||||
mysql> JOIN CLUSTER posts AT 'manticore-1:9312';
|
||||
mysql> INSERT INTO posts:testrt(title,content,gid) VALUES('hello','world',1);
|
||||
Query OK, 1 row affected (0.00 sec)
|
||||
|
||||
MySQL [(none)]> exit
|
||||
Bye
|
||||
```
|
||||
|
||||
* If you now go back to the first instance you'll see the new record:
|
||||
```mysql
|
||||
$ docker-compose exec manticore-1 mysql
|
||||
|
||||
MySQL [(none)]> select * from testrt;
|
||||
+---------------------+------+-------+---------+
|
||||
| id | gid | title | content |
|
||||
+---------------------+------+-------+---------+
|
||||
| 3891565839006040065 | 1 | hello | world |
|
||||
+---------------------+------+-------+---------+
|
||||
1 row in set (0.00 sec)
|
||||
|
||||
MySQL [(none)]> exit
|
||||
Bye
|
||||
```
|
||||
|
||||
## Memory locking and limits
|
||||
|
||||
It's recommended to overwrite the default ulimits of docker for the Manticore instance:
|
||||
|
||||
```bash
|
||||
--ulimit nofile=65536:65536
|
||||
```
|
||||
|
||||
For the best performance, Manticore tables' components can be locked into memory. When Manticore is run under Docker, the instance requires additional privileges to allow memory locking. The following options must be added when running the instance:
|
||||
|
||||
```bash
|
||||
--cap-add=IPC_LOCK --ulimit memlock=-1:-1
|
||||
```
|
||||
|
||||
## Configuring Manticore Search with Docker
|
||||
|
||||
If you want to run Manticore with your custom config containing indexes definition you will need to mount the configuration to the instance:
|
||||
|
||||
```bash
|
||||
docker run --name manticore -v $(pwd)/manticore.conf:/etc/manticoresearch/manticore.conf -v $(pwd)/data/:/var/lib/manticore -p 127.0.0.1:9306:9306 -d manticoresearch/manticore
|
||||
```
|
||||
|
||||
Take into account that Manticore search inside the container is run under user `manticore`. Performing operations with tables (like creating or rotating plain indexes) should be also done under `manticore`. Otherwise the files will be created under `root` and the search daemon won't have rights to open them. For example here is how you can rotate all plain indexes:
|
||||
|
||||
```bash
|
||||
docker exec -it manticore gosu manticore indexer --all --rotate
|
||||
```
|
||||
|
||||
### Environment variables
|
||||
|
||||
You can also set individual `searchd` and `common` configuration settings using Docker environment variables.
|
||||
|
||||
The settings must be prefixed with their section name, for example to change value of setting `mysql_version_string` in section `searchd` the variable must be named `searchd_mysql_version_string`:
|
||||
|
||||
|
||||
```bash
|
||||
docker run --name manticore -p 127.0.0.1:9306:9306 -e searchd_mysql_version_string='5.5.0' -d manticoresearch/manticore
|
||||
```
|
||||
|
||||
In case of `listen` directive, you can pass using Docker variable `searchd_listen` new listening interfaces in addition to the default ones. Multiple interfaces can be declared separated by semi-colon ("|").
|
||||
For listening only on network address, the `$ip` (retrieved internally from `hostname -i`) can be used as address alias.
|
||||
|
||||
For example `-e searchd_listen='9316:http|9307:mysql|$ip:5443:mysql_vip'` will add an additional SQL interface on port 9307, a SQL VIP on 5443 running only on the instance IP and HTTP on port 9316, beside the defaults on 9306 and 9308, respectively.
|
||||
|
||||
```bash
|
||||
$ docker run --rm -p 1188:9307 -e searchd_mysql_version_string='5.5.0' -e searchd_listen='9316:http|9307:mysql|$ip:5443:mysql_vip' manticore
|
||||
[Mon Aug 17 07:31:58.719 2020] [1] using config file '/etc/manticoresearch/manticore.conf' (9130 chars)...
|
||||
listening on all interfaces for http, port=9316
|
||||
listening on all interfaces for mysql, port=9307
|
||||
listening on 172.17.0.17:5443 for VIP mysql
|
||||
listening on all interfaces for mysql, port=9306
|
||||
listening on UNIX socket /var/run/mysqld/mysqld.sock
|
||||
listening on 172.17.0.17:9312 for sphinx
|
||||
listening on all interfaces for http, port=9308
|
||||
prereading 0 indexes
|
||||
prereaded 0 indexes in 0.000 sec
|
||||
accepting connections
|
||||
```
|
||||
|
||||
|
||||
# Issues
|
||||
|
||||
For reporting issues, please use the [issue tracker](https://github.com/manticoresoftware/docker/issues).
|
||||
118
docker/manticore/docker-entrypoint.sh
Executable file
118
docker/manticore/docker-entrypoint.sh
Executable file
@@ -0,0 +1,118 @@
|
||||
#!/bin/bash
|
||||
set -eo pipefail
|
||||
echo "RUNNING ENTRYPOINT"
|
||||
|
||||
# check to see if this file is being run or sourced from another script
|
||||
_is_sourced() {
|
||||
# https://unix.stackexchange.com/a/215279
|
||||
[ "${#FUNCNAME[@]}" -ge 2 ] &&
|
||||
[ "${FUNCNAME[0]}" = '_is_sourced' ] &&
|
||||
[ "${FUNCNAME[1]}" = 'source' ]
|
||||
}
|
||||
_searchd_want_help() {
|
||||
local arg
|
||||
for arg; do
|
||||
case "$arg" in
|
||||
-'?' | --help | -h | -v)
|
||||
return 0
|
||||
;;
|
||||
esac
|
||||
done
|
||||
return 1
|
||||
}
|
||||
|
||||
docker_setup_env() {
|
||||
if [ -n "$QUERY_LOG_TO_STDOUT" ]; then
|
||||
export searchd_query_log=/var/log/manticore/query.log
|
||||
[ ! -f /var/log/manticore/query.log ] && ln -sf /dev/stdout /var/log/manticore/query.log
|
||||
fi
|
||||
if [[ "${MCL}" == "1" ]]; then
|
||||
LIB_MANTICORE_COLUMNAR="/var/lib/manticore/.mcl/lib_manticore_columnar.so"
|
||||
LIB_MANTICORE_SECONDARY="/var/lib/manticore/.mcl/lib_manticore_secondary.so"
|
||||
|
||||
[ -L /usr/share/manticore/modules/lib_manticore_columnar.so ] || ln -s $LIB_MANTICORE_COLUMNAR /usr/share/manticore/modules/lib_manticore_columnar.so
|
||||
[ -L /usr/share/manticore/modules/lib_manticore_secondary.so ] || ln -s $LIB_MANTICORE_SECONDARY /usr/share/manticore/modules/lib_manticore_secondary.so
|
||||
|
||||
searchd -v|grep -i error|egrep "trying to load" \
|
||||
&& rm $LIB_MANTICORE_COLUMNAR $LIB_MANTICORE_SECONDARY \
|
||||
&& echo "WARNING: wrong MCL version was removed, installing the correct one"
|
||||
|
||||
if [[ ! -f "$LIB_MANTICORE_COLUMNAR" || ! -f "$LIB_MANTICORE_SECONDARY" ]]; then
|
||||
if ! mkdir -p /var/lib/manticore/.mcl/ ; then
|
||||
echo "ERROR: Manticore Columnar Library is inaccessible: couldn't create /var/lib/manticore/.mcl/."
|
||||
exit
|
||||
fi
|
||||
|
||||
MCL_URL=$(cat /mcl.url)
|
||||
wget -P /tmp $MCL_URL
|
||||
|
||||
LAST_PATH=$(pwd)
|
||||
cd /tmp
|
||||
PACKAGE_NAME=$(ls | grep manticore-columnar | head -n 1)
|
||||
ar -x $PACKAGE_NAME
|
||||
tar -xf data.tar.gz
|
||||
find . -name '*.so' -exec cp {} /var/lib/manticore/.mcl/ \;
|
||||
cd $LAST_PATH
|
||||
fi
|
||||
fi
|
||||
}
|
||||
_main() {
|
||||
# first arg is `h` or some `--option`
|
||||
if [ "${1#-}" != "$1" ]; then
|
||||
set -- searchd "$@"
|
||||
fi
|
||||
# Amended from searchd to sh since we're using sh to wait until searchd starts, then set the Kibana-specific options
|
||||
if [ "$1" = 'sh' ] && ! _searchd_want_help "@"; then
|
||||
docker_setup_env "$@"
|
||||
# allow the container to be started with `--user`
|
||||
if [ "$(id -u)" = '0' ]; then
|
||||
find /var/lib/manticore /var/log/manticore /var/run/manticore /etc/manticoresearch \! -user manticore -exec chown manticore '{}' +
|
||||
exec gosu manticore "$0" "$@"
|
||||
fi
|
||||
fi
|
||||
_replace_conf_from_env
|
||||
exec "$@"
|
||||
}
|
||||
|
||||
_replace_conf_from_env() {
|
||||
|
||||
sed_query=""
|
||||
|
||||
while IFS='=' read -r oldname value; do
|
||||
if [[ $oldname == 'searchd_'* || $oldname == 'common_'* ]]; then
|
||||
value=$(echo ${!oldname} | sed 's/\//\\\//g')
|
||||
oldname=$(echo $oldname | sed "s/searchd_//g;s/common_//g;")
|
||||
newname=$oldname
|
||||
|
||||
if [[ $newname == 'listen' ]]; then
|
||||
oldname="listen_env"
|
||||
IFS='|' read -ra ADDR <<<"$value"
|
||||
count=0
|
||||
|
||||
for i in "${ADDR[@]}"; do
|
||||
if [[ $count == 0 ]]; then
|
||||
value=$i
|
||||
else
|
||||
value="$value\n listen = $i"
|
||||
fi
|
||||
count=$((count + 1))
|
||||
done
|
||||
fi
|
||||
|
||||
if [[ -z $sed_query ]]; then
|
||||
sed_query="s/(#\s)*?$oldname\s?=\s?.*?$/$newname = $value/g"
|
||||
else
|
||||
sed_query="$sed_query;s/(#\s)*?$oldname\s?=\s?.*?$/$newname = $value/g"
|
||||
fi
|
||||
|
||||
fi
|
||||
done < <(env)
|
||||
|
||||
if [[ ! -z $sed_query ]]; then
|
||||
sed -i -E "$sed_query" /etc/manticoresearch/manticore.conf
|
||||
fi
|
||||
}
|
||||
# If we are sourced from elsewhere, don't perform any further actions
|
||||
if ! _is_sourced; then
|
||||
_main "$@"
|
||||
fi
|
||||
259
docker/manticore/manticore.conf
Normal file
259
docker/manticore/manticore.conf
Normal file
@@ -0,0 +1,259 @@
|
||||
#!/bin/sh
|
||||
ip=`hostname -i|rev|cut -d\ -f 1|rev`
|
||||
cat << EOF
|
||||
searchd {
|
||||
# https://manual.manticoresearch.com/Server_settings/Searchd#access_plain_attrs
|
||||
# access_plain_attrs = mmap_preread
|
||||
|
||||
# https://manual.manticoresearch.com/Server_settings/Searchd#access_blob_attrs
|
||||
# access_blob_attrs = mmap_preread
|
||||
|
||||
# https://manual.manticoresearch.com/Server_settings/Searchd#access_doclists
|
||||
# access_doclists = file
|
||||
|
||||
# https://manual.manticoresearch.com/Server_settings/Searchd#access_hitlists
|
||||
# access_hitlists = file
|
||||
|
||||
# https://manual.manticoresearch.com/Server_settings/Searchd#agent_connect_timeout
|
||||
# agent_connect_timeout =
|
||||
|
||||
# https://manual.manticoresearch.com/Server_settings/Searchd#agent_query_timeout
|
||||
# agent_query_timeout =
|
||||
|
||||
# https://manual.manticoresearch.com/Server_settings/Searchd#agent_retry_count
|
||||
# agent_retry_count = 0
|
||||
|
||||
# https://manual.manticoresearch.com/Server_settings/Searchd#agent_retry_delay
|
||||
# agent_retry_delay = 500
|
||||
|
||||
# https://manual.manticoresearch.com/Server_settings/Searchd#attr_flush_period
|
||||
# attr_flush_period = 0
|
||||
|
||||
# https://manual.manticoresearch.com/Server_settings/Searchd#binlog_flush
|
||||
# binlog_flush = 2
|
||||
|
||||
# https://manual.manticoresearch.com/Server_settings/Searchd#binlog_max_log_size
|
||||
# binlog_max_log_size = 268435456
|
||||
|
||||
# https://manual.manticoresearch.com/Server_settings/Searchd#binlog_path
|
||||
# binlog_path =
|
||||
|
||||
# https://manual.manticoresearch.com/Server_settings/Searchd#client_timeout
|
||||
# client_timeout = 300
|
||||
|
||||
# https://manual.manticoresearch.com/Server_settings/Searchd#collation_libc_locale
|
||||
# collation_libc_locale = C
|
||||
|
||||
# https://manual.manticoresearch.com/Server_settings/Searchd#collation_server
|
||||
# collation_server = libc_ci
|
||||
|
||||
# https://manual.manticoresearch.com/Server_settings/Searchd#data_dir
|
||||
data_dir = /var/lib/manticore
|
||||
|
||||
# https://manual.manticoresearch.com/Server_settings/Searchd#docstore_cache_size
|
||||
# docstore_cache_size = 16m
|
||||
|
||||
# https://manual.manticoresearch.com/Server_settings/Searchd#expansion_limit
|
||||
# expansion_limit = 0
|
||||
|
||||
# https://manual.manticoresearch.com/Server_settings/Searchd#grouping_in_utc
|
||||
# grouping_in_utc = 0
|
||||
|
||||
# https://manual.manticoresearch.com/Server_settings/Searchd#ha_period_karma
|
||||
# ha_period_karma = 60
|
||||
|
||||
# https://manual.manticoresearch.com/Server_settings/Searchd#ha_ping_interval
|
||||
# ha_ping_interval = 1000
|
||||
|
||||
# https://manual.manticoresearch.com/Server_settings/Searchd#hostname_lookup
|
||||
# hostname_lookup =
|
||||
|
||||
# https://manual.manticoresearch.com/Server_settings/Searchd#jobs_queue_size
|
||||
# jobs_queue_size =
|
||||
|
||||
# https://manual.manticoresearch.com/Server_settings/Searchd#listen_backlog
|
||||
# listen_backlog = 5
|
||||
|
||||
# https://manual.manticoresearch.com/Server_settings/Searchd#listen
|
||||
# listen_env = this directive allows to append listeners from environment variables
|
||||
|
||||
listen = 9306:mysql41
|
||||
listen = /var/run/mysqld/mysqld.sock:mysql41
|
||||
listen = $ip:9312
|
||||
listen = 9308:http
|
||||
listen = $ip:9315-9325:replication
|
||||
|
||||
# https://manual.manticoresearch.com/Server_settings/Searchd#listen_tfo
|
||||
# listen_tfo = 0
|
||||
|
||||
# https://manual.manticoresearch.com/Server_settings/Searchd#log
|
||||
log = /var/log/manticore/searchd.log
|
||||
|
||||
# https://manual.manticoresearch.com/Server_settings/Searchd#max_batch_queries
|
||||
# max_batch_queries = 32
|
||||
|
||||
# https://manual.manticoresearch.com/Server_settings/Searchd#threads
|
||||
# threads =
|
||||
|
||||
# https://manual.manticoresearch.com/Server_settings/Searchd#max_filters
|
||||
# max_filters = 256
|
||||
|
||||
# https://manual.manticoresearch.com/Server_settings/Searchd#max_filter_values
|
||||
# max_filter_values = 4096
|
||||
|
||||
# https://manual.manticoresearch.com/Server_settings/Searchd#max_open_files
|
||||
# max_open_files =
|
||||
|
||||
# https://manual.manticoresearch.com/Server_settings/Searchd#max_packet_size
|
||||
max_packet_size = 128M
|
||||
|
||||
# https://manual.manticoresearch.com/Server_settings/Searchd#mysql_version_string
|
||||
# mysql_version_string =
|
||||
|
||||
# https://manual.manticoresearch.com/Server_settings/Searchd#net_workers
|
||||
# net_workers = 1
|
||||
|
||||
# https://manual.manticoresearch.com/Server_settings/Searchd#net_wait_tm
|
||||
# net_wait_tm = -1
|
||||
|
||||
# https://manual.manticoresearch.com/Server_settings/Searchd#net_throttle_accept
|
||||
# net_throttle_accept = 0
|
||||
|
||||
# https://manual.manticoresearch.com/Server_settings/Searchd#net_throttle_action
|
||||
# net_throttle_action = 0
|
||||
|
||||
# https://manual.manticoresearch.com/Server_settings/Searchd#node_address
|
||||
# node_address =
|
||||
|
||||
# https://manual.manticoresearch.com/Server_settings/Searchd#ondisk_attrs_default
|
||||
# ondisk_attrs_default = 0
|
||||
|
||||
# https://manual.manticoresearch.com/Server_settings/Searchd#persistent_connections_limit
|
||||
# persistent_connections_limit =
|
||||
|
||||
# https://manual.manticoresearch.com/Server_settings/Searchd#pid_file
|
||||
pid_file = /var/run/manticore/searchd.pid
|
||||
|
||||
# https://manual.manticoresearch.com/Server_settings/Searchd#predicted_time_costs
|
||||
# predicted_time_costs = doc=64, hit=48, skip=2048, match=64
|
||||
|
||||
# https://manual.manticoresearch.com/Server_settings/Searchd#preopen_indexes
|
||||
# preopen_indexes = 1
|
||||
|
||||
# https://manual.manticoresearch.com/Server_settings/Searchd#qcache_max_bytes
|
||||
# qcache_max_bytes = 16Mb
|
||||
|
||||
# https://manual.manticoresearch.com/Server_settings/Searchd#qcache_thresh_msec
|
||||
# qcache_thresh_msec = 3000
|
||||
|
||||
# https://manual.manticoresearch.com/Server_settings/Searchd#qcache_ttl_sec
|
||||
# qcache_ttl_sec = 60
|
||||
|
||||
# https://manual.manticoresearch.com/Server_settings/Searchd#query_log_format
|
||||
query_log_format = sphinxql
|
||||
|
||||
# https://manual.manticoresearch.com/Server_settings/Searchd#query_log_min_msec
|
||||
query_log_min_msec = 0
|
||||
|
||||
# https://manual.manticoresearch.com/Server_settings/Searchd#query_log
|
||||
query_log = /var/log/manticore/query.log
|
||||
|
||||
# https://manual.manticoresearch.com/Server_settings/Searchd#query_log_mode
|
||||
# query_log_mode = 600
|
||||
|
||||
# https://manual.manticoresearch.com/Server_settings/Searchd#max_connections
|
||||
# max_connections =
|
||||
|
||||
# https://manual.manticoresearch.com/Server_settings/Searchd#network_timeout
|
||||
# network_timeout = 5
|
||||
|
||||
# https://manual.manticoresearch.com/Server_settings/Searchd#read_buffer
|
||||
# read_buffer = 256K
|
||||
|
||||
# https://manual.manticoresearch.com/Server_settings/Searchd#read_buffer_docs
|
||||
# read_buffer_docs = 256K
|
||||
|
||||
# https://manual.manticoresearch.com/Server_settings/Searchd#read_buffer_hits
|
||||
# read_buffer_hits = 256K
|
||||
|
||||
# https://manual.manticoresearch.com/Server_settings/Searchd#read_unhinted
|
||||
# read_unhinted 32K
|
||||
|
||||
# https://manual.manticoresearch.com/Server_settings/Searchd#rt_flush_period
|
||||
# rt_flush_period =
|
||||
|
||||
# https://manual.manticoresearch.com/Server_settings/Searchd#rt_merge_iops
|
||||
# rt_merge_iops = 0
|
||||
|
||||
# https://manual.manticoresearch.com/Server_settings/Searchd#rt_merge_maxiosize
|
||||
# rt_merge_maxiosize = 0
|
||||
|
||||
# https://manual.manticoresearch.com/Server_settings/Searchd#seamless_rotate
|
||||
# seamless_rotate = 1
|
||||
|
||||
# https://manual.manticoresearch.com/Server_settings/Searchd#server_id
|
||||
# server_id =
|
||||
|
||||
# https://manual.manticoresearch.com/Server_settings/Searchd#shutdown_timeout
|
||||
# shutdown_timeout = 3
|
||||
|
||||
# https://manual.manticoresearch.com/Server_settings/Searchd#shutdown_token
|
||||
# shutdown_token =
|
||||
|
||||
# https://manual.manticoresearch.com/Server_settings/Searchd#snippets_file_prefix
|
||||
# snippets_file_prefix =
|
||||
|
||||
# https://manual.manticoresearch.com/Server_settings/Searchd#sphinxql_state
|
||||
# sphinxql_state =
|
||||
|
||||
# https://manual.manticoresearch.com/Server_settings/Searchd#sphinxql_timeout
|
||||
# sphinxql_timeout = 900
|
||||
|
||||
# https://manual.manticoresearch.com/Server_settings/Searchd#ssl_ca
|
||||
# ssl_ca =
|
||||
|
||||
# https://manual.manticoresearch.com/Server_settings/Searchd#ssl_cert
|
||||
# ssl_cert =
|
||||
|
||||
# https://manual.manticoresearch.com/Server_settings/Searchd#ssl_key
|
||||
# ssl_key =
|
||||
|
||||
# https://manual.manticoresearch.com/Server_settings/Searchd#subtree_docs_cache
|
||||
# subtree_docs_cache = 0
|
||||
|
||||
# https://manual.manticoresearch.com/Server_settings/Searchd#subtree_hits_cache
|
||||
# subtree_hits_cache = 0
|
||||
|
||||
# https://manual.manticoresearch.com/Server_settings/Searchd#thread_stack
|
||||
# thread_stack =
|
||||
|
||||
# https://manual.manticoresearch.com/Server_settings/Searchd#unlink_old
|
||||
# unlink_old = 1
|
||||
|
||||
# https://manual.manticoresearch.com/Server_settings/Searchd#watchdog
|
||||
# watchdog = 1
|
||||
}
|
||||
|
||||
common {
|
||||
|
||||
# https://manual.manticoresearch.com/Server_settings/Common#lemmatizer_base
|
||||
# lemmatizer_base = /usr/local/share
|
||||
|
||||
# https://manual.manticoresearch.com/Server_settings/Common#progressive_merge
|
||||
# progressive_merge =
|
||||
|
||||
# https://manual.manticoresearch.com/Server_settings/Common#json_autoconv_keynames
|
||||
# json_autoconv_keynames =
|
||||
|
||||
# https://manual.manticoresearch.com/Server_settings/Common#json_autoconv_numbers
|
||||
# json_autoconv_numbers = 0
|
||||
|
||||
# https://manual.manticoresearch.com/Server_settings/Common#on_json_attr_error
|
||||
# on_json_attr_error = ignore_attr
|
||||
|
||||
# https://manual.manticoresearch.com/Server_settings/Common#plugin_dir
|
||||
# plugin_dir =
|
||||
|
||||
}
|
||||
|
||||
EOF
|
||||
9
docker/manticore/sandbox.sql
Normal file
9
docker/manticore/sandbox.sql
Normal file
File diff suppressed because one or more lines are too long
@@ -1,5 +1,2 @@
|
||||
unixsocket /var/run/monolith-redis.sock
|
||||
unixsocketperm 777
|
||||
port 0
|
||||
# port 6379
|
||||
# requirepass changeme
|
||||
unixsocket /var/run/redis/redis.sock
|
||||
unixsocketperm 777
|
||||
23
docker/requirements.txt
Normal file
23
docker/requirements.txt
Normal file
@@ -0,0 +1,23 @@
|
||||
wheel
|
||||
beautifulsoup4
|
||||
redis
|
||||
siphashc
|
||||
aiohttp[speedups]
|
||||
python-dotenv
|
||||
#manticoresearch
|
||||
numpy
|
||||
aioredis[hiredis]
|
||||
aiokafka
|
||||
vaderSentiment
|
||||
polyglot
|
||||
pyicu
|
||||
pycld2
|
||||
morfessor
|
||||
six
|
||||
nltk
|
||||
#spacy
|
||||
gensim
|
||||
python-Levenshtein
|
||||
orjson
|
||||
uvloop
|
||||
numba
|
||||
50
docker/superset/docker-bootstrap.sh
Executable file
50
docker/superset/docker-bootstrap.sh
Executable file
@@ -0,0 +1,50 @@
|
||||
#!/usr/bin/env bash
|
||||
#
|
||||
# Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
# contributor license agreements. See the NOTICE file distributed with
|
||||
# this work for additional information regarding copyright ownership.
|
||||
# The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
# (the "License"); you may not use this file except in compliance with
|
||||
# the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
set -eo pipefail
|
||||
|
||||
REQUIREMENTS_LOCAL="/app/docker/requirements-local.txt"
|
||||
# If Cypress run – overwrite the password for admin and export env variables
|
||||
if [ "$CYPRESS_CONFIG" == "true" ]; then
|
||||
export SUPERSET_CONFIG=tests.integration_tests.superset_test_config
|
||||
export SUPERSET_TESTENV=true
|
||||
export SUPERSET__SQLALCHEMY_DATABASE_URI=postgresql+psycopg2://superset:superset@db:5432/superset
|
||||
fi
|
||||
#
|
||||
# Make sure we have dev requirements installed
|
||||
#
|
||||
if [ -f "${REQUIREMENTS_LOCAL}" ]; then
|
||||
echo "Installing local overrides at ${REQUIREMENTS_LOCAL}"
|
||||
pip install -r "${REQUIREMENTS_LOCAL}"
|
||||
else
|
||||
echo "Skipping local overrides"
|
||||
fi
|
||||
|
||||
if [[ "${1}" == "worker" ]]; then
|
||||
echo "Starting Celery worker..."
|
||||
celery --app=superset.tasks.celery_app:app worker -Ofair -l INFO
|
||||
elif [[ "${1}" == "beat" ]]; then
|
||||
echo "Starting Celery beat..."
|
||||
celery --app=superset.tasks.celery_app:app beat --pidfile /tmp/celerybeat.pid -l INFO -s "${SUPERSET_HOME}"/celerybeat-schedule
|
||||
elif [[ "${1}" == "app" ]]; then
|
||||
echo "Starting web app..."
|
||||
flask run -p 8088 --with-threads --reload --debugger --host=0.0.0.0
|
||||
elif [[ "${1}" == "app-gunicorn" ]]; then
|
||||
echo "Starting web app..."
|
||||
/usr/bin/run-server.sh
|
||||
fi
|
||||
78
docker/superset/docker-init.sh
Executable file
78
docker/superset/docker-init.sh
Executable file
@@ -0,0 +1,78 @@
|
||||
#!/usr/bin/env bash
|
||||
#
|
||||
# Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
# contributor license agreements. See the NOTICE file distributed with
|
||||
# this work for additional information regarding copyright ownership.
|
||||
# The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
# (the "License"); you may not use this file except in compliance with
|
||||
# the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
set -e
|
||||
|
||||
#
|
||||
# Always install local overrides first
|
||||
#
|
||||
/app/docker/docker-bootstrap.sh
|
||||
|
||||
STEP_CNT=4
|
||||
|
||||
echo_step() {
|
||||
cat <<EOF
|
||||
|
||||
######################################################################
|
||||
|
||||
|
||||
Init Step ${1}/${STEP_CNT} [${2}] -- ${3}
|
||||
|
||||
|
||||
######################################################################
|
||||
|
||||
EOF
|
||||
}
|
||||
ADMIN_PASSWORD="admin"
|
||||
# If Cypress run – overwrite the password for admin and export env variables
|
||||
if [ "$CYPRESS_CONFIG" == "true" ]; then
|
||||
ADMIN_PASSWORD="general"
|
||||
export SUPERSET_CONFIG=tests.integration_tests.superset_test_config
|
||||
export SUPERSET_TESTENV=true
|
||||
export SUPERSET__SQLALCHEMY_DATABASE_URI=postgresql+psycopg2://superset:superset@db:5432/superset
|
||||
fi
|
||||
# Initialize the database
|
||||
echo_step "1" "Starting" "Applying DB migrations"
|
||||
superset db upgrade
|
||||
echo_step "1" "Complete" "Applying DB migrations"
|
||||
|
||||
# Create an admin user
|
||||
echo_step "2" "Starting" "Setting up admin user ( admin / $ADMIN_PASSWORD )"
|
||||
superset fab create-admin \
|
||||
--username admin \
|
||||
--firstname Superset \
|
||||
--lastname Admin \
|
||||
--email admin@superset.com \
|
||||
--password $ADMIN_PASSWORD
|
||||
echo_step "2" "Complete" "Setting up admin user"
|
||||
# Create default roles and permissions
|
||||
echo_step "3" "Starting" "Setting up roles and perms"
|
||||
superset init
|
||||
echo_step "3" "Complete" "Setting up roles and perms"
|
||||
|
||||
if [ "$SUPERSET_LOAD_EXAMPLES" = "yes" ]; then
|
||||
# Load some data to play with
|
||||
echo_step "4" "Starting" "Loading examples"
|
||||
# If Cypress run which consumes superset_test_config – load required data for tests
|
||||
if [ "$CYPRESS_CONFIG" == "true" ]; then
|
||||
superset load_test_users
|
||||
superset load_examples --load-test-data
|
||||
else
|
||||
superset load_examples
|
||||
fi
|
||||
echo_step "4" "Complete" "Loading examples"
|
||||
fi
|
||||
1
docker/superset/requirements-local.txt
Normal file
1
docker/superset/requirements-local.txt
Normal file
@@ -0,0 +1 @@
|
||||
pydruid
|
||||
27
env.example
27
env.example
@@ -1,6 +1,6 @@
|
||||
PORTAINER_GIT_DIR=..
|
||||
PORTAINER_GIT_DIR=.
|
||||
MODULES_ENABLED="dis"
|
||||
DISCORD_TOKEN=
|
||||
DISCORD_TOKEN="xx"
|
||||
THRESHOLD_LISTENER_HOST=0.0.0.0
|
||||
THRESHOLD_LISTENER_PORT=13867
|
||||
THRESHOLD_LISTENER_SSL=1
|
||||
@@ -13,16 +13,16 @@ THRESHOLD_RELAY_SSL=1
|
||||
THRESHOLD_API_ENABLED=1
|
||||
THRESHOLD_API_HOST=0.0.0.0
|
||||
THRESHOLD_API_PORT=13869
|
||||
PORTAINER_GIT_DIR=.
|
||||
|
||||
THRESHOLD_CONFIG_DIR=../legacy/conf/live/
|
||||
#THRESHOLD_TEMPLATE_DIR=../legacy/conf/templates/
|
||||
THRESHOLD_CERT_DIR=../legacy/conf/cert/
|
||||
THRESHOLD_CONFIG_DIR=./legacy/conf/live/
|
||||
THRESHOLD_CERT_DIR=./legacy/conf/cert/
|
||||
|
||||
# How many messages to ingest at once from Redis
|
||||
MONOLITH_INGEST_CHUNK_SIZE=70000
|
||||
MONOLITH_INGEST_CHUNK_SIZE=900
|
||||
|
||||
# Time to wait between polling Redis again
|
||||
MONOLITH_INGEST_ITER_DELAY=2
|
||||
MONOLITH_INGEST_ITER_DELAY=0.5
|
||||
|
||||
# Number of 4chan threads to request at once
|
||||
MONOLITH_CH4_THREADS_CONCURRENT=1000
|
||||
@@ -31,20 +31,11 @@ MONOLITH_CH4_THREADS_CONCURRENT=1000
|
||||
MONOLITH_CH4_THREADS_DELAY=0.1
|
||||
|
||||
# Time to wait after finishing a crawl before starting again
|
||||
MONOLITH_CH4_CRAWL_DELAY=60
|
||||
MONOLITH_CH4_CRAWL_DELAY=30
|
||||
|
||||
# Semaphore value
|
||||
MONOLITH_CH4_THREADS_SEMAPHORE=1000
|
||||
|
||||
# Threads to use for data processing
|
||||
# Leave uncommented to use all available threads
|
||||
MONOLITH_PROCESS_THREADS=7
|
||||
|
||||
# Enable performance metrics after message processing
|
||||
MONOLITH_PROCESS_PERFSTATS=0
|
||||
|
||||
# Elasticsearch
|
||||
ELASTICSEARCH_USERNAME=elastic
|
||||
ELASTICSEARCH_PASSWORD=
|
||||
ELASTICSEARCH_HOST=https://es01:9200
|
||||
ELASTICSEARCH_TLS=1
|
||||
# MONOLITH_PROCESS_THREADS=4
|
||||
87
environment
Normal file
87
environment
Normal file
@@ -0,0 +1,87 @@
|
||||
#
|
||||
# Licensed to the Apache Software Foundation (ASF) under one
|
||||
# or more contributor license agreements. See the NOTICE file
|
||||
# distributed with this work for additional information
|
||||
# regarding copyright ownership. The ASF licenses this file
|
||||
# to you under the Apache License, Version 2.0 (the
|
||||
# "License"); you may not use this file except in compliance
|
||||
# with the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing,
|
||||
# software distributed under the License is distributed on an
|
||||
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
# KIND, either express or implied. See the License for the
|
||||
# specific language governing permissions and limitations
|
||||
# under the License.
|
||||
#
|
||||
|
||||
# Java tuning
|
||||
#DRUID_XMX=1g
|
||||
#DRUID_XMS=1g
|
||||
#DRUID_MAXNEWSIZE=250m
|
||||
#DRUID_NEWSIZE=250m
|
||||
#DRUID_MAXDIRECTMEMORYSIZE=1g
|
||||
|
||||
#druid_emitter_logging_logLevel=debug
|
||||
|
||||
#druid_extensions_loadList=["druid-histogram", "druid-datasketches", "druid-lookups-cached-global", "postgresql-metadata-storage", "druid-kafka-indexing-service"]
|
||||
|
||||
#druid_zk_service_host=zookeeper
|
||||
|
||||
#druid_metadata_storage_host=
|
||||
#druid_metadata_storage_type=postgresql
|
||||
#druid_metadata_storage_connector_connectURI=jdbc:postgresql://postgres:5432/druid
|
||||
#druid_metadata_storage_connector_user=druid
|
||||
#druid_metadata_storage_connector_password=FoolishPassword
|
||||
|
||||
#druid_coordinator_balancer_strategy=cachingCost
|
||||
|
||||
#druid_indexer_runner_javaOptsArray=["-server", "-Xmx1g", "-Xms1g", "-XX:MaxDirectMemorySize=3g", "-Duser.timezone=UTC", "-Dfile.encoding=UTF-8", "-Djava.util.logging.manager=org.apache.logging.log4j.jul.LogManager"]
|
||||
#druid_indexer_fork_property_druid_processing_buffer_sizeBytes=128MiB
|
||||
#druid_processing_buffer_sizeBytes=268435456 # 256MiB
|
||||
|
||||
#druid_storage_type=local
|
||||
#druid_storage_storageDirectory=/opt/shared/segments
|
||||
#druid_indexer_logs_type=file
|
||||
#druid_indexer_logs_directory=/opt/shared/indexing-logs
|
||||
|
||||
#druid_processing_numThreads=1
|
||||
#druid_processing_numMergeBuffers=1
|
||||
|
||||
#DRUID_LOG4J=<?xml version="1.0" encoding="UTF-8" ?><Configuration status="WARN"><Appenders><Console name="Console" target="SYSTEM_OUT"><PatternLayout pattern="%d{ISO8601} %p [%t] %c - %m%n"/></Console></Appenders><Loggers><Root level="info"><AppenderRef ref="Console"/></Root><Logger name="org.apache.druid.jetty.RequestLog" additivity="false" level="DEBUG"><AppenderRef ref="Console"/></Logger></Loggers></Configuration>
|
||||
# Java tuning
|
||||
#DRUID_XMX=1g
|
||||
#DRUID_XMS=1g
|
||||
#DRUID_MAXNEWSIZE=250m
|
||||
#DRUID_NEWSIZE=250m
|
||||
#DRUID_MAXDIRECTMEMORYSIZE=6172m
|
||||
DRUID_SINGLE_NODE_CONF=nano-quickstart
|
||||
|
||||
druid_emitter_logging_logLevel=debug
|
||||
|
||||
druid_extensions_loadList=["druid-histogram", "druid-datasketches", "druid-lookups-cached-global", "postgresql-metadata-storage", "druid-kafka-indexing-service"]
|
||||
|
||||
druid_zk_service_host=zookeeper
|
||||
|
||||
druid_metadata_storage_host=
|
||||
druid_metadata_storage_type=postgresql
|
||||
druid_metadata_storage_connector_connectURI=jdbc:postgresql://postgres:5432/druid
|
||||
druid_metadata_storage_connector_user=druid
|
||||
druid_metadata_storage_connector_password=FoolishPassword
|
||||
|
||||
druid_coordinator_balancer_strategy=cachingCost
|
||||
|
||||
druid_indexer_runner_javaOptsArray=["-server", "-Xmx1g", "-Xms1g", "-XX:MaxDirectMemorySize=3g", "-Duser.timezone=UTC", "-Dfile.encoding=UTF-8", "-Djava.util.logging.manager=org.apache.logging.log4j.jul.LogManager"]
|
||||
druid_indexer_fork_property_druid_processing_buffer_sizeBytes=256MiB
|
||||
|
||||
druid_storage_type=local
|
||||
druid_storage_storageDirectory=/opt/shared/segments
|
||||
druid_indexer_logs_type=file
|
||||
druid_indexer_logs_directory=/opt/shared/indexing-logs
|
||||
|
||||
druid_processing_numThreads=2
|
||||
druid_processing_numMergeBuffers=2
|
||||
|
||||
DRUID_LOG4J=<?xml version="1.0" encoding="UTF-8" ?><Configuration status="WARN"><Appenders><Console name="Console" target="SYSTEM_OUT"><PatternLayout pattern="%d{ISO8601} %p [%t] %c - %m%n"/></Console></Appenders><Loggers><Root level="info"><AppenderRef ref="Console"/></Root><Logger name="org.apache.druid.jetty.RequestLog" additivity="false" level="DEBUG"><AppenderRef ref="Console"/></Logger></Loggers></Configuration>
|
||||
17
legacy/.pre-commit-config.yaml
Normal file
17
legacy/.pre-commit-config.yaml
Normal file
@@ -0,0 +1,17 @@
|
||||
repos:
|
||||
- repo: https://github.com/psf/black
|
||||
rev: 22.6.0
|
||||
hooks:
|
||||
- id: black
|
||||
args:
|
||||
- --line-length=120
|
||||
- repo: https://github.com/PyCQA/isort
|
||||
rev: 5.10.1
|
||||
hooks:
|
||||
- id: isort
|
||||
args: ["--profile", "black"]
|
||||
- repo: https://github.com/PyCQA/flake8
|
||||
rev: 4.0.1
|
||||
hooks:
|
||||
- id: flake8
|
||||
args: [--max-line-length=120]
|
||||
@@ -17,7 +17,7 @@
|
||||
},
|
||||
"Key": "key.pem",
|
||||
"Certificate": "cert.pem",
|
||||
"RedisSocket": "/var/run/socks/redis.sock",
|
||||
"RedisSocket": "/var/run/redis/redis.sock",
|
||||
"RedisDBEphemeral": 1,
|
||||
"RedisDBPersistent": 0,
|
||||
"UsePassword": false,
|
||||
|
||||
41
legacy/docker-compose.yml
Normal file
41
legacy/docker-compose.yml
Normal file
@@ -0,0 +1,41 @@
|
||||
version: "2"
|
||||
|
||||
services:
|
||||
app:
|
||||
image: pathogen/threshold:latest
|
||||
build: ./docker
|
||||
volumes:
|
||||
- ${PORTAINER_GIT_DIR}:/code
|
||||
- ${THRESHOLD_CONFIG_DIR}:/code/conf/live
|
||||
#- ${THRESHOLD_TEMPLATE_DIR}:/code/conf/templates
|
||||
- ${THRESHOLD_CERT_DIR}:/code/conf/cert
|
||||
ports:
|
||||
- "${THRESHOLD_LISTENER_PORT}:${THRESHOLD_LISTENER_PORT}"
|
||||
- "${THRESHOLD_RELAY_PORT}:${THRESHOLD_RELAY_PORT}"
|
||||
- "${THRESHOLD_API_PORT}:${THRESHOLD_API_PORT}"
|
||||
env_file:
|
||||
- .env
|
||||
# for development
|
||||
extra_hosts:
|
||||
- "host.docker.internal:host-gateway"
|
||||
volumes_from:
|
||||
- tmp
|
||||
|
||||
tmp:
|
||||
image: busybox
|
||||
command: chmod -R 777 /var/run/redis
|
||||
volumes:
|
||||
- /var/run/redis
|
||||
|
||||
redis:
|
||||
image: redis
|
||||
command: redis-server /etc/redis.conf
|
||||
volumes:
|
||||
- ${PORTAINER_GIT_DIR}/docker/redis.conf:/etc/redis.conf
|
||||
volumes_from:
|
||||
- tmp
|
||||
|
||||
networks:
|
||||
default:
|
||||
external:
|
||||
name: pathogen
|
||||
38
legacy/docker/docker-compose.prod.yml
Normal file
38
legacy/docker/docker-compose.prod.yml
Normal file
@@ -0,0 +1,38 @@
|
||||
version: "2"
|
||||
|
||||
services:
|
||||
app:
|
||||
image: pathogen/threshold:latest
|
||||
build: ./docker
|
||||
volumes:
|
||||
- ${PORTAINER_GIT_DIR}:/code
|
||||
- ${THRESHOLD_CONFIG_DIR}:/code/conf/live
|
||||
#- ${THRESHOLD_TEMPLATE_DIR}:/code/conf/templates
|
||||
- ${THRESHOLD_CERT_DIR}:/code/conf/cert
|
||||
ports:
|
||||
- "${THRESHOLD_LISTENER_PORT}:${THRESHOLD_LISTENER_PORT}"
|
||||
- "${THRESHOLD_RELAY_PORT}:${THRESHOLD_RELAY_PORT}"
|
||||
- "${THRESHOLD_API_PORT}:${THRESHOLD_API_PORT}"
|
||||
env_file:
|
||||
- ../stack.env
|
||||
volumes_from:
|
||||
- tmp
|
||||
|
||||
tmp:
|
||||
image: busybox
|
||||
command: chmod -R 777 /var/run/redis
|
||||
volumes:
|
||||
- /var/run/redis
|
||||
|
||||
redis:
|
||||
image: redis
|
||||
command: redis-server /etc/redis.conf
|
||||
volumes:
|
||||
- ${PORTAINER_GIT_DIR}/docker/redis.conf:/etc/redis.conf
|
||||
volumes_from:
|
||||
- tmp
|
||||
|
||||
networks:
|
||||
default:
|
||||
external:
|
||||
name: pathogen
|
||||
@@ -4,7 +4,6 @@ from os import urandom
|
||||
from os.path import exists
|
||||
from string import digits
|
||||
|
||||
import redis
|
||||
from redis import StrictRedis
|
||||
|
||||
# List of errors ZNC can give us
|
||||
@@ -122,7 +121,7 @@ def initConf():
|
||||
|
||||
|
||||
def initMain():
|
||||
global r, g, x
|
||||
global r, g
|
||||
initConf()
|
||||
r = StrictRedis(
|
||||
unix_socket_path=config["RedisSocket"], db=config["RedisDBEphemeral"] # noqa
|
||||
@@ -130,5 +129,3 @@ def initMain():
|
||||
g = StrictRedis(
|
||||
unix_socket_path=config["RedisSocket"], db=config["RedisDBPersistent"]
|
||||
) # noqa
|
||||
# SSDB for communication with Monolith
|
||||
x = redis.from_url("redis://ssdb:1289", db=0)
|
||||
|
||||
@@ -67,7 +67,7 @@ def parsemeta(numName, c):
|
||||
|
||||
def queue_message(c):
|
||||
message = json.dumps(c)
|
||||
main.x.lpush("queue", message)
|
||||
main.g.sadd("queue", message)
|
||||
|
||||
|
||||
def event(
|
||||
|
||||
9
legacy/requirements.txt
Normal file
9
legacy/requirements.txt
Normal file
@@ -0,0 +1,9 @@
|
||||
wheel
|
||||
pre-commit
|
||||
twisted
|
||||
pyOpenSSL
|
||||
redis
|
||||
pyYaML
|
||||
service_identity
|
||||
siphashc
|
||||
Klein
|
||||
@@ -98,6 +98,7 @@ class IRCRelayFactory(ReconnectingClientFactory):
|
||||
self.relayCommands, self.user, self.stage2 = relayCommands, user, stage2
|
||||
|
||||
def buildProtocol(self, addr):
|
||||
|
||||
entry = IRCRelay(self.num, self.relayCommands, self.user, self.stage2)
|
||||
|
||||
self.client = entry
|
||||
|
||||
28
monolith.py
28
monolith.py
@@ -1,10 +1,8 @@
|
||||
import asyncio
|
||||
from os import getenv
|
||||
from time import sleep
|
||||
|
||||
import uvloop
|
||||
|
||||
import db
|
||||
import util
|
||||
from sources.ch4 import Chan4
|
||||
from sources.dis import DiscordClient
|
||||
@@ -23,28 +21,14 @@ if not token:
|
||||
|
||||
|
||||
async def main(loop):
|
||||
if "ingest" in modules_enabled:
|
||||
ingest = Ingest()
|
||||
loop.create_task(ingest.run())
|
||||
client = DiscordClient()
|
||||
loop.create_task(client.start(token))
|
||||
|
||||
if "dis" in modules_enabled:
|
||||
client = DiscordClient()
|
||||
loop.create_task(client.start(token))
|
||||
chan = Chan4()
|
||||
loop.create_task(chan.run())
|
||||
|
||||
if "ch4" in modules_enabled:
|
||||
chan = Chan4()
|
||||
loop.create_task(chan.run())
|
||||
|
||||
|
||||
created = False
|
||||
while not created:
|
||||
try:
|
||||
db.create_index(db.api_client)
|
||||
created = True
|
||||
except Exception as e:
|
||||
print(f"Error creating index: {e}")
|
||||
sleep(1) # Block the thread, just wait for the DB
|
||||
db.update_schema()
|
||||
ingest = Ingest()
|
||||
loop.create_task(ingest.run())
|
||||
|
||||
|
||||
loop = asyncio.get_event_loop()
|
||||
|
||||
134
perf/throttle.py
134
perf/throttle.py
@@ -1,134 +0,0 @@
|
||||
import asyncio
|
||||
import time
|
||||
|
||||
import psutil
|
||||
|
||||
import util
|
||||
|
||||
|
||||
class DynamicThrottle(object):
|
||||
def __init__(self, **kwargs):
|
||||
self.target_cpu_usage = kwargs.get("target_cpu_usage", 50)
|
||||
self.sleep_interval = 0.0
|
||||
|
||||
self.sleep_increment = kwargs.get("sleep_increment", 0.01)
|
||||
self.sleep_decrement = kwargs.get("sleep_decrement", 0.01)
|
||||
|
||||
self.sleep_max = kwargs.get("sleep_max", 0.1)
|
||||
self.sleep_min = kwargs.get("sleep_min", 0.01)
|
||||
|
||||
self.psutil_interval = kwargs.get("psutil_interval", 0.1)
|
||||
|
||||
self.log = kwargs.get("log", util.get_logger(self.__class__.__name__))
|
||||
|
||||
self.consecutive_increments = 0
|
||||
self.consecutive_decrements = 0
|
||||
|
||||
self.consecutive_divisor = kwargs.get("consecutive_divisor", 1)
|
||||
|
||||
self.last_was_increment = kwargs.get("start_increment", True)
|
||||
|
||||
if kwargs.get("use_async"):
|
||||
self.wait = self.dynamic_throttle_async
|
||||
else:
|
||||
self.wait = self.dynamic_throttle
|
||||
|
||||
async def dynamic_throttle_async(self):
|
||||
"""
|
||||
Dynamically sleeps before a request if CPU usage is above our target.
|
||||
"""
|
||||
current_cpu_usage = psutil.cpu_percent(interval=self.psutil_interval)
|
||||
|
||||
if current_cpu_usage > self.target_cpu_usage:
|
||||
if self.last_was_increment:
|
||||
self.consecutive_increments += 1
|
||||
# self.log.debug(f"High CPU consecutive increments: {self.consecutive_increments}")
|
||||
else:
|
||||
self.consecutive_increments = 0 # ?
|
||||
self.consecutive_decrements = 0 # ?
|
||||
# self.log.debug(f"High CPU alert reset.")
|
||||
self.sleep_interval += self.sleep_increment * (
|
||||
max(1, self.consecutive_increments) / self.consecutive_divisor
|
||||
)
|
||||
self.last_was_increment = True
|
||||
if self.sleep_interval > self.sleep_max:
|
||||
self.sleep_interval = self.sleep_max
|
||||
# self.log.debug(f"High CPU, but not increasing above {self.sleep_max:.3f}s")
|
||||
# self.log.debug(
|
||||
# f"High CPU: {current_cpu_usage}% > {self.target_cpu_usage}%, "
|
||||
# f"=> sleep {self.sleep_interval:.3f}s"
|
||||
# )
|
||||
elif current_cpu_usage < self.target_cpu_usage:
|
||||
if not self.last_was_increment:
|
||||
self.consecutive_decrements += 1
|
||||
# self.log.debug(f"Low CPU consecutive decrements: {self.consecutive_decrements}")
|
||||
else:
|
||||
self.consecutive_decrements = 0 # ?
|
||||
self.consecutive_increments = 0 # ?
|
||||
# self.log.debug(f"Low CPU alert reset.")
|
||||
self.sleep_interval -= self.sleep_decrement * (
|
||||
max(1, self.consecutive_decrements) / self.consecutive_divisor
|
||||
)
|
||||
self.last_was_increment = False
|
||||
if self.sleep_interval < self.sleep_min:
|
||||
self.sleep_interval = self.sleep_min
|
||||
# self.log.debug(f"Low CPU, but not decreasing below {self.sleep_min:.3f}s")
|
||||
# self.log.debug(
|
||||
# f"Low CPU: {current_cpu_usage}% < {self.target_cpu_usage}%, "
|
||||
# f"=> sleep {self.sleep_interval:.3f}s"
|
||||
# )
|
||||
|
||||
if self.sleep_interval > 0:
|
||||
await asyncio.sleep(self.sleep_interval)
|
||||
return self.sleep_interval
|
||||
return 0.0
|
||||
|
||||
def dynamic_throttle(self):
|
||||
"""
|
||||
Dynamically sleeps before a request if CPU usage is above our target.
|
||||
"""
|
||||
current_cpu_usage = psutil.cpu_percent(interval=self.psutil_interval)
|
||||
|
||||
if current_cpu_usage > self.target_cpu_usage:
|
||||
if self.last_was_increment:
|
||||
self.consecutive_increments += 1
|
||||
# self.log.debug(f"High CPU consecutive increments: {self.consecutive_increments}")
|
||||
else:
|
||||
self.consecutive_increments = 0 # ?
|
||||
self.consecutive_decrements = 0 # ?
|
||||
# self.log.debug(f"High CPU alert reset.")
|
||||
self.sleep_interval += self.sleep_increment * (
|
||||
max(1, self.consecutive_increments) / self.consecutive_divisor
|
||||
)
|
||||
self.last_was_increment = True
|
||||
if self.sleep_interval > self.sleep_max:
|
||||
self.sleep_interval = self.sleep_max
|
||||
# self.log.debug(f"High CPU, but not increasing above {self.sleep_max:.3f}s")
|
||||
# self.log.debug(
|
||||
# f"High CPU: {current_cpu_usage}% > {self.target_cpu_usage}%, "
|
||||
# f"=> sleep {self.sleep_interval:.3f}s"
|
||||
# )
|
||||
elif current_cpu_usage < self.target_cpu_usage:
|
||||
if not self.last_was_increment:
|
||||
self.consecutive_decrements += 1
|
||||
# self.log.debug(f"Low CPU consecutive decrements: {self.consecutive_decrements}")
|
||||
else:
|
||||
self.consecutive_decrements = 0 # ?
|
||||
self.consecutive_increments = 0 # ?
|
||||
# self.log.debug(f"Low CPU alert reset.")
|
||||
self.sleep_interval -= self.sleep_decrement * (
|
||||
max(1, self.consecutive_decrements) / self.consecutive_divisor
|
||||
)
|
||||
self.last_was_increment = False
|
||||
if self.sleep_interval < self.sleep_min:
|
||||
self.sleep_interval = self.sleep_min
|
||||
# self.log.debug(f"Low CPU, but not decreasing below {self.sleep_min:.3f}s")
|
||||
# self.log.debug(
|
||||
# f"Low CPU: {current_cpu_usage}% < {self.target_cpu_usage}%, "
|
||||
# f"=> sleep {self.sleep_interval:.3f}s"
|
||||
# )
|
||||
|
||||
if self.sleep_interval > 0:
|
||||
time.sleep(self.sleep_interval)
|
||||
return self.sleep_interval
|
||||
return 0.0
|
||||
@@ -1 +0,0 @@
|
||||
# Resample 1Min into 5Min, 15Min, 30Min, 1H, 4H, 1D, 1W, 1M, 1Y
|
||||
@@ -14,7 +14,7 @@ from concurrent.futures import ProcessPoolExecutor
|
||||
|
||||
# For timestamp processing
|
||||
from datetime import datetime
|
||||
from os import getenv
|
||||
from math import ceil
|
||||
|
||||
import orjson
|
||||
import regex
|
||||
@@ -34,6 +34,7 @@ from gensim.parsing.preprocessing import ( # stem_text,
|
||||
strip_short,
|
||||
strip_tags,
|
||||
)
|
||||
from numpy import array_split
|
||||
from polyglot.detect.base import logger as polyglot_logger
|
||||
|
||||
# For NLP
|
||||
@@ -47,21 +48,9 @@ from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
|
||||
import db
|
||||
import util
|
||||
|
||||
# For throttling
|
||||
from perf.throttle import DynamicThrottle
|
||||
|
||||
# 4chan schema
|
||||
from schemas.ch4_s import ATTRMAP
|
||||
|
||||
trues = ("true", "1", "t", True)
|
||||
|
||||
KEYNAME = "queue"
|
||||
|
||||
MONOLITH_PROCESS_PERFSTATS = (
|
||||
getenv("MONOLITH_PROCESS_PERFSTATS", "false").lower() in trues
|
||||
)
|
||||
TARGET_CPU_USAGE = float(os.getenv("MONOLITH_PROCESS_TARGET_CPU_USAGE", 50.0))
|
||||
|
||||
CUSTOM_FILTERS = [
|
||||
lambda x: x.lower(),
|
||||
strip_tags, #
|
||||
@@ -92,19 +81,6 @@ CPU_THREADS = int(os.getenv("MONOLITH_PROCESS_THREADS", os.cpu_count()))
|
||||
|
||||
p = ProcessPoolExecutor(CPU_THREADS)
|
||||
|
||||
throttle = DynamicThrottle(
|
||||
target_cpu_usage=TARGET_CPU_USAGE,
|
||||
sleep_increment=0.02,
|
||||
sleep_decrement=0.01,
|
||||
sleep_max=0.5,
|
||||
sleep_min=0,
|
||||
psutil_interval=0.1,
|
||||
consecutive_divisor=2,
|
||||
log=log,
|
||||
start_increment=True,
|
||||
use_async=False,
|
||||
)
|
||||
|
||||
|
||||
def get_hash_key():
|
||||
hash_key = db.r.get("hashing_key")
|
||||
@@ -123,44 +99,38 @@ hash_key = get_hash_key()
|
||||
|
||||
|
||||
@asyncio.coroutine
|
||||
async def spawn_processing_threads(chunk, length):
|
||||
log.debug(f"Spawning processing threads for chunk {chunk} of length {length}")
|
||||
async def spawn_processing_threads(data):
|
||||
len_data = len(data)
|
||||
|
||||
loop = asyncio.get_event_loop()
|
||||
tasks = []
|
||||
|
||||
if length < CPU_THREADS * 100:
|
||||
cores = 1
|
||||
chunk_size = length
|
||||
if len(data) < CPU_THREADS * 100:
|
||||
split_data = [data]
|
||||
else:
|
||||
cores = CPU_THREADS
|
||||
chunk_size = int(length / cores)
|
||||
|
||||
for index in range(cores):
|
||||
log.debug(
|
||||
f"[{chunk}/{index}] Delegating {chunk_size} messages to thread {index}"
|
||||
)
|
||||
task = loop.run_in_executor(p, process_data, chunk, index, chunk_size)
|
||||
msg_per_core = int(len(data) / CPU_THREADS)
|
||||
split_data = array_split(data, ceil(len(data) / msg_per_core))
|
||||
for index, split in enumerate(split_data):
|
||||
log.debug(f"Delegating processing of {len(split)} messages to thread {index}")
|
||||
task = loop.run_in_executor(p, process_data, split)
|
||||
tasks.append(task)
|
||||
|
||||
results = [await task for task in tasks]
|
||||
log.debug(
|
||||
(
|
||||
f"Results from processing of {len_data} messages in "
|
||||
f"{len(split_data)} threads: {len(results)}"
|
||||
)
|
||||
)
|
||||
|
||||
# Join the results back from the split list
|
||||
flat_list = [item for sublist in results for item in sublist]
|
||||
total_messages = len(flat_list)
|
||||
log.info(
|
||||
(
|
||||
f"[{chunk}/{index}] Results from processing of {length} messages in "
|
||||
f"{cores} threads: {len(flat_list)}"
|
||||
)
|
||||
)
|
||||
await db.store_batch(flat_list)
|
||||
return total_messages
|
||||
await db.store_kafka_batch(flat_list)
|
||||
|
||||
# log.debug(f"Finished processing {len_data} messages")
|
||||
|
||||
|
||||
def process_data(chunk, index, chunk_size):
|
||||
log.debug(f"[{chunk}/{index}] Processing {chunk_size} messages")
|
||||
def process_data(data):
|
||||
to_store = []
|
||||
|
||||
sentiment_time = 0.0
|
||||
@@ -169,38 +139,15 @@ def process_data(chunk, index, chunk_size):
|
||||
date_time = 0.0
|
||||
nlp_time = 0.0
|
||||
normalise_time = 0.0
|
||||
hash_time = 0.0
|
||||
normal2_time = 0.0
|
||||
soup_time = 0.0
|
||||
sleep_time = 0.0
|
||||
|
||||
total_time = 0.0
|
||||
|
||||
# Initialise sentiment analyser
|
||||
analyzer = SentimentIntensityAnalyzer()
|
||||
|
||||
for msg_index in range(chunk_size):
|
||||
# Print percentage of msg_index relative to chunk_size
|
||||
if msg_index % 10 == 0:
|
||||
percentage_done = (msg_index / chunk_size) * 100
|
||||
log.debug(
|
||||
f"[{chunk}/{index}] {percentage_done:.2f}% done ({msg_index}/{chunk_size})"
|
||||
)
|
||||
|
||||
msg = db.r.rpop(KEYNAME)
|
||||
if not msg:
|
||||
return
|
||||
msg = orjson.loads(msg)
|
||||
if msg["src"] == "4ch":
|
||||
board = msg["net"]
|
||||
thread = msg["channel"]
|
||||
redis_key = (
|
||||
f"cache.{board}.{thread}.{msg['no']}.{msg['resto']}.{msg['now']}"
|
||||
)
|
||||
key_content = db.r.get(redis_key)
|
||||
if key_content is not None:
|
||||
continue
|
||||
db.r.set(redis_key, "1")
|
||||
|
||||
for msg in data:
|
||||
total_start = time.process_time()
|
||||
# normalise fields
|
||||
start = time.process_time()
|
||||
@@ -226,6 +173,24 @@ def process_data(chunk, index, chunk_size):
|
||||
board = msg["net"]
|
||||
thread = msg["channel"]
|
||||
|
||||
# Calculate hash for post
|
||||
start = time.process_time()
|
||||
post_normalised = orjson.dumps(msg, option=orjson.OPT_SORT_KEYS)
|
||||
hash = siphash(hash_key, post_normalised)
|
||||
hash = str(hash)
|
||||
redis_key = f"cache.{board}.{thread}.{msg['no']}"
|
||||
key_content = db.r.get(redis_key)
|
||||
if key_content:
|
||||
key_content = key_content.decode("ascii")
|
||||
if key_content == hash:
|
||||
# This deletes the message since the append at the end won't be hit
|
||||
continue
|
||||
else:
|
||||
msg["type"] = "update"
|
||||
db.r.set(redis_key, hash)
|
||||
time_took = (time.process_time() - start) * 1000
|
||||
hash_time += time_took
|
||||
|
||||
start = time.process_time()
|
||||
for key2, value in list(msg.items()):
|
||||
if key2 in ATTRMAP:
|
||||
@@ -243,10 +208,9 @@ def process_data(chunk, index, chunk_size):
|
||||
old_ts = datetime.strptime(old_time, "%m/%d/%y(%a)%H:%M:%S")
|
||||
else:
|
||||
old_ts = datetime.strptime(old_time, "%m/%d/%y(%a)%H:%M")
|
||||
# iso_ts = old_ts.isoformat()
|
||||
# new_ts = old_ts.isoformat()
|
||||
new_ts = int(old_ts.timestamp())
|
||||
msg["ts"] = new_ts
|
||||
# msg["iso"] = iso_ts
|
||||
else:
|
||||
raise Exception("No TS in msg")
|
||||
time_took = (time.process_time() - start) * 1000
|
||||
@@ -272,7 +236,7 @@ def process_data(chunk, index, chunk_size):
|
||||
msg["lang_code"] = lang_code
|
||||
msg["lang_name"] = lang_name
|
||||
except cld2_error as e:
|
||||
log.error(f"[{chunk}/{index}] Error detecting language: {e}")
|
||||
log.error(f"Error detecting language: {e}")
|
||||
# So below block doesn't fail
|
||||
lang_code = None
|
||||
time_took = (time.process_time() - start) * 1000
|
||||
@@ -291,7 +255,7 @@ def process_data(chunk, index, chunk_size):
|
||||
# Tokens
|
||||
start = time.process_time()
|
||||
tokens = preprocess_string(msg["msg"], CUSTOM_FILTERS)
|
||||
msg["tokens"] = str(tokens)
|
||||
msg["tokens"] = tokens
|
||||
# n = nlp(msg["msg"])
|
||||
# for tag in TAGS:
|
||||
# tag_name = tag.lower()
|
||||
@@ -303,25 +267,17 @@ def process_data(chunk, index, chunk_size):
|
||||
# Add the mutated message to the return buffer
|
||||
to_store.append(msg)
|
||||
total_time += (time.process_time() - total_start) * 1000
|
||||
|
||||
# Dynamic throttling to reduce CPU usage
|
||||
if msg_index % 5 == 0:
|
||||
sleep_time += throttle.wait()
|
||||
|
||||
if MONOLITH_PROCESS_PERFSTATS:
|
||||
log.info("=====================================")
|
||||
log.info(f"Chunk: {chunk}")
|
||||
log.info(f"Index: {index}")
|
||||
log.info(f"Sentiment: {sentiment_time}")
|
||||
log.info(f"Regex: {regex_time}")
|
||||
log.info(f"Polyglot: {polyglot_time}")
|
||||
log.info(f"Date: {date_time}")
|
||||
log.info(f"NLP: {nlp_time}")
|
||||
log.info(f"Normalise: {normalise_time}")
|
||||
log.info(f"Normal2: {normal2_time}")
|
||||
log.info(f"Soup: {soup_time}")
|
||||
log.info(f"Total: {total_time}")
|
||||
log.info(f"Throttling: {sleep_time}")
|
||||
log.info("=====================================")
|
||||
log.debug("=====================================")
|
||||
log.debug(f"Sentiment: {sentiment_time}")
|
||||
log.debug(f"Regex: {regex_time}")
|
||||
log.debug(f"Polyglot: {polyglot_time}")
|
||||
log.debug(f"Date: {date_time}")
|
||||
log.debug(f"NLP: {nlp_time}")
|
||||
log.debug(f"Normalise: {normalise_time}")
|
||||
log.debug(f"Hash: {hash_time}")
|
||||
log.debug(f"Normal2: {normal2_time}")
|
||||
log.debug(f"Soup: {soup_time}")
|
||||
log.debug(f"Total: {total_time}")
|
||||
log.debug("=====================================")
|
||||
|
||||
return to_store
|
||||
|
||||
@@ -1,13 +1,14 @@
|
||||
wheel
|
||||
pre-commit
|
||||
beautifulsoup4
|
||||
redis
|
||||
siphashc
|
||||
aiohttp[speedups]
|
||||
python-dotenv
|
||||
manticoresearch
|
||||
#manticoresearch
|
||||
numpy
|
||||
aioredis[hiredis]
|
||||
#aiokafka
|
||||
aiokafka
|
||||
vaderSentiment
|
||||
polyglot
|
||||
pyicu
|
||||
@@ -20,10 +21,4 @@ gensim
|
||||
python-Levenshtein
|
||||
orjson
|
||||
uvloop
|
||||
elasticsearch[async]
|
||||
msgpack
|
||||
# flpc
|
||||
psutil
|
||||
pymexc
|
||||
websockets
|
||||
aiomysql
|
||||
numba
|
||||
|
||||
186
rts.py
186
rts.py
@@ -1,186 +0,0 @@
|
||||
import asyncio
|
||||
import logging
|
||||
from os import getenv
|
||||
|
||||
import orjson
|
||||
import websockets
|
||||
|
||||
import db
|
||||
|
||||
# Logger setup
|
||||
logging.basicConfig(level=logging.INFO)
|
||||
log = logging.getLogger("RTS")
|
||||
|
||||
# Environment variables
|
||||
MONOLITH_RTS_MEXC_API_ACCESS_KEY = getenv("MONOLITH_RTS_MEXC_API_ACCESS_KEY", None)
|
||||
MONOLITH_RTS_MEXC_API_SECRET_KEY = getenv("MONOLITH_RTS_MEXC_API_SECRET_KEY", None)
|
||||
|
||||
# WebSocket endpoint
|
||||
MEXC_WS_URL = "wss://wbs.mexc.com/ws"
|
||||
|
||||
{
|
||||
"d": {
|
||||
"e": "spot@public.kline.v3.api",
|
||||
"k": {
|
||||
"t": 1737901140, # TS
|
||||
"o": "684.4", # Open
|
||||
"c": "684.5", # Close
|
||||
"h": "684.5", # High
|
||||
"l": "684.4", # Low
|
||||
"v": "0.173", # Volume of the base
|
||||
"a": "118.41", # Volume of the quote (Quantity)
|
||||
"T": 1737901200, # ?
|
||||
"i": "Min1", # ?
|
||||
},
|
||||
},
|
||||
"c": "spot@public.kline.v3.api@BNBUSDT@Min1", # Channel
|
||||
"t": 1737901159239,
|
||||
"s": "BNBUSDT", # Symbol
|
||||
}
|
||||
|
||||
# Scan DB for last endtime (T)
|
||||
# Request Kline data from last endtime (T) to now
|
||||
|
||||
|
||||
# Check Server Time
|
||||
|
||||
# Response
|
||||
|
||||
# {
|
||||
# "serverTime" : 1645539742000
|
||||
# }
|
||||
|
||||
# GET /api/v3/time
|
||||
|
||||
# Weight(IP): 1
|
||||
|
||||
# Parameter:
|
||||
|
||||
# NONE
|
||||
|
||||
# Kline/Candlestick Data
|
||||
|
||||
# Response
|
||||
|
||||
# [
|
||||
# [
|
||||
# 1640804880000,
|
||||
# "47482.36",
|
||||
# "47482.36",
|
||||
# "47416.57",
|
||||
# "47436.1",
|
||||
# "3.550717",
|
||||
# 1640804940000,
|
||||
# "168387.3"
|
||||
# ]
|
||||
# ]
|
||||
|
||||
# GET /api/v3/klines
|
||||
|
||||
# Weight(IP): 1
|
||||
|
||||
# Kline/candlestick bars for a symbol. Klines are uniquely identified by their open time.
|
||||
|
||||
# Parameters:
|
||||
# Name Type Mandatory Description
|
||||
# symbol string YES
|
||||
# interval ENUM YES ENUM: Kline Interval
|
||||
# startTime long NO
|
||||
# endTime long NO
|
||||
# limit integer NO Default 500; max 1000.
|
||||
|
||||
# Scrub function:
|
||||
# For each record, ensure there are no time gaps
|
||||
# When the 1m window goes over, the next t is always the last T.
|
||||
# Check for gaps, and request all klines between those gaps to ensure a full DB, even with restarts.
|
||||
|
||||
|
||||
# Idle jitter function - compare our time with server time.
|
||||
# Compare ts with our time and print jitter. Add jitter warning to log and OHLC.
|
||||
# High jitter may prevent us from getting the correct data for trading.
|
||||
async def mex_handle(data):
|
||||
message = orjson.loads(data)
|
||||
# print(orjson.dumps(message, option=orjson.OPT_INDENT_2).decode("utf-8"))
|
||||
if "code" in message:
|
||||
if message["code"] == 0:
|
||||
log.info("Control message received")
|
||||
return
|
||||
|
||||
symbol = message["s"]
|
||||
open = message["d"]["k"]["o"]
|
||||
close = message["d"]["k"]["c"]
|
||||
high = message["d"]["k"]["h"]
|
||||
low = message["d"]["k"]["l"]
|
||||
volume_base = message["d"]["k"]["v"] # ERROR IN API DOCS
|
||||
volume_quote = message["d"]["k"]["a"] # > a bigDecimal volume
|
||||
|
||||
interval = message["d"]["k"]["i"]
|
||||
|
||||
start_time = message["d"]["k"]["t"] # > t long stratTime
|
||||
end_time = message["d"]["k"]["T"] # > T long endTime
|
||||
event_time = message["t"] # t long eventTime
|
||||
|
||||
index = f"mex_ohlc_{symbol.lower()}"
|
||||
|
||||
reformatted = {
|
||||
"s": symbol,
|
||||
"o": float(open),
|
||||
"c": float(close),
|
||||
"h": float(high),
|
||||
"l": float(low),
|
||||
"v": float(volume_base),
|
||||
"a": float(volume_quote),
|
||||
"i": interval,
|
||||
"t": int(start_time),
|
||||
"t2": int(end_time),
|
||||
"ts": int(event_time),
|
||||
}
|
||||
|
||||
await db.rts_store_message(index, reformatted)
|
||||
print(index)
|
||||
print(orjson.dumps(reformatted, option=orjson.OPT_INDENT_2).decode("utf-8"))
|
||||
print()
|
||||
|
||||
|
||||
# Kline WebSocket handler
|
||||
async def mex_main():
|
||||
await db.init_mysql_pool()
|
||||
async with websockets.connect(MEXC_WS_URL) as websocket:
|
||||
log.info("WebSocket connected")
|
||||
|
||||
# Define symbols and intervals
|
||||
symbols = ["BTCUSDT"] # Add more symbols as needed
|
||||
interval = "Min1" # Kline interval
|
||||
|
||||
# Prepare subscription requests for Kline streams
|
||||
# Request: spot@public.kline.v3.api@<symbol>@<interval>
|
||||
subscriptions = [
|
||||
f"spot@public.kline.v3.api@{symbol}@{interval}" for symbol in symbols
|
||||
]
|
||||
|
||||
# Send subscription requests
|
||||
subscribe_request = {
|
||||
"method": "SUBSCRIPTION",
|
||||
"params": subscriptions,
|
||||
# "id": 1,
|
||||
}
|
||||
await websocket.send(orjson.dumps(subscribe_request).decode("utf-8"))
|
||||
|
||||
log.info(f"Subscribed to: {subscriptions}")
|
||||
|
||||
# Listen for messages
|
||||
while True:
|
||||
try:
|
||||
message = await websocket.recv()
|
||||
await mex_handle(message)
|
||||
except websockets.exceptions.ConnectionClosed as e:
|
||||
log.error(f"WebSocket connection closed: {e}")
|
||||
break
|
||||
|
||||
|
||||
# Entry point
|
||||
if __name__ == "__main__":
|
||||
try:
|
||||
asyncio.run(mex_main())
|
||||
except KeyboardInterrupt:
|
||||
log.info("RTS process terminated.")
|
||||
@@ -129,19 +129,8 @@ schema_main = {
|
||||
"version_sentiment": "int",
|
||||
# 1, 2
|
||||
"version_tokens": "int",
|
||||
# en, ru
|
||||
"lang_code": "string indexed attribute",
|
||||
"lang_name": "text",
|
||||
"match_ts": "timestamp",
|
||||
"batch_id": "bigint",
|
||||
"rule_id": "bigint",
|
||||
"index": "string indexed attribute",
|
||||
"meta": "text",
|
||||
# "iso": "string indexed attribute",
|
||||
}
|
||||
|
||||
schema_rule_storage = schema_main
|
||||
|
||||
schema_meta = {
|
||||
"id": "bigint",
|
||||
# 393598265, #main, Rust Programmer's Club
|
||||
|
||||
@@ -10,7 +10,6 @@ from numpy import array_split
|
||||
|
||||
import db
|
||||
import util
|
||||
from perf.throttle import DynamicThrottle
|
||||
|
||||
# CONFIGURATION #
|
||||
|
||||
@@ -26,12 +25,6 @@ CRAWL_DELAY = int(getenv("MONOLITH_CH4_CRAWL_DELAY", 5))
|
||||
# Semaphore value ?
|
||||
THREADS_SEMAPHORE = int(getenv("MONOLITH_CH4_THREADS_SEMAPHORE", 1000))
|
||||
|
||||
# Target CPU usage percentage
|
||||
TARGET_CPU_USAGE = float(getenv("MONOLITH_CH4_TARGET_CPU_USAGE", 50.0))
|
||||
|
||||
# Boards to crawl
|
||||
BOARDS = getenv("MONOLITH_CH4_BOARDS", "").split(",")
|
||||
|
||||
# CONFIGURATION END #
|
||||
|
||||
|
||||
@@ -44,19 +37,6 @@ class Chan4(object):
|
||||
name = self.__class__.__name__
|
||||
self.log = util.get_logger(name)
|
||||
|
||||
self.throttle = DynamicThrottle(
|
||||
target_cpu_usage=TARGET_CPU_USAGE,
|
||||
sleep_increment=0.01,
|
||||
sleep_decrement=0.01,
|
||||
sleep_max=0.1,
|
||||
sleep_min=0,
|
||||
psutil_interval=0.1,
|
||||
log=self.log,
|
||||
start_increment=False,
|
||||
use_async=True,
|
||||
)
|
||||
self.wait = self.throttle.wait
|
||||
|
||||
self.api_endpoint = "https://a.4cdn.org"
|
||||
# self.boards = ["out", "g", "a", "3", "pol"] #
|
||||
self.boards = []
|
||||
@@ -73,14 +53,12 @@ class Chan4(object):
|
||||
self.log.debug(f"Created new hash key: {self.hash_key}")
|
||||
db.r.set("hashing_key", self.hash_key)
|
||||
else:
|
||||
|
||||
self.hash_key = self.hash_key.decode("ascii")
|
||||
self.log.debug(f"Decoded hash key: {self.hash_key}")
|
||||
|
||||
async def run(self):
|
||||
if "ALL" in BOARDS:
|
||||
await self.get_board_list()
|
||||
else:
|
||||
self.boards = BOARDS
|
||||
await self.get_board_list()
|
||||
while True:
|
||||
await self.get_thread_lists(self.boards)
|
||||
await asyncio.sleep(CRAWL_DELAY)
|
||||
@@ -93,37 +71,29 @@ class Chan4(object):
|
||||
for board in response["boards"]:
|
||||
self.boards.append(board["board"])
|
||||
self.log.debug(f"Got boards: {self.boards}")
|
||||
# await self.dynamic_throttle()
|
||||
# TODO
|
||||
|
||||
async def get_thread_lists(self, boards):
|
||||
# self.log.debug(f"Getting thread list for {boards}")
|
||||
board_urls = {board: f"{board}/threads.json" for board in boards}
|
||||
board_urls = {board: f"{board}/catalog.json" for board in boards}
|
||||
responses = await self.api_call(board_urls)
|
||||
to_get = []
|
||||
flat_map = [board for board, thread in responses]
|
||||
self.log.debug(f"Got thread list for {len(responses)} boards: {flat_map}")
|
||||
for board, response in responses:
|
||||
self.log.debug(f"Got thread list for {flat_map}: {len(responses)}")
|
||||
for mapped, response in responses:
|
||||
if not response:
|
||||
continue
|
||||
for page in response:
|
||||
for threads in page["threads"]:
|
||||
no = threads["no"]
|
||||
to_get.append((board, no))
|
||||
# await self.dynamic_throttle()
|
||||
# TODO
|
||||
to_get.append((mapped, no))
|
||||
|
||||
if not to_get:
|
||||
return
|
||||
self.log.debug(f"Got {len(to_get)} threads to fetch")
|
||||
split_threads = array_split(to_get, ceil(len(to_get) / THREADS_CONCURRENT))
|
||||
self.log.debug(f"Split threads into {len(split_threads)} series")
|
||||
for index, thr in enumerate(split_threads):
|
||||
self.log.debug(f"Series {index} - getting {len(thr)} threads")
|
||||
await self.get_threads_content(thr)
|
||||
# await self.dynamic_throttle()
|
||||
# TODO
|
||||
for threads in split_threads:
|
||||
await self.get_threads_content(threads)
|
||||
await asyncio.sleep(THREADS_DELAY)
|
||||
# await self.get_threads_content(to_get)
|
||||
|
||||
def take_items(self, dict_list, n):
|
||||
i = 0
|
||||
@@ -153,8 +123,6 @@ class Chan4(object):
|
||||
continue
|
||||
board, thread = mapped
|
||||
all_posts[mapped] = response["posts"]
|
||||
# await self.dynamic_throttle()
|
||||
# TODO
|
||||
|
||||
if not all_posts:
|
||||
return
|
||||
@@ -164,16 +132,14 @@ class Chan4(object):
|
||||
to_store = []
|
||||
for key, post_list in posts.items():
|
||||
board, thread = key
|
||||
for post in post_list:
|
||||
post["type"] = "msg"
|
||||
for index, post in enumerate(post_list):
|
||||
posts[key][index]["type"] = "msg"
|
||||
|
||||
post["src"] = "4ch"
|
||||
post["net"] = board
|
||||
post["channel"] = thread
|
||||
posts[key][index]["src"] = "4ch"
|
||||
posts[key][index]["net"] = board
|
||||
posts[key][index]["channel"] = thread
|
||||
|
||||
to_store.append(post)
|
||||
# await self.dynamic_throttle()
|
||||
# TODO
|
||||
to_store.append(posts[key][index])
|
||||
|
||||
if to_store:
|
||||
await db.queue_message_bulk(to_store)
|
||||
@@ -188,7 +154,6 @@ class Chan4(object):
|
||||
async def bound_fetch(self, sem, url, session, mapped):
|
||||
# Getter function with semaphore.
|
||||
async with sem:
|
||||
await self.wait()
|
||||
try:
|
||||
return await self.fetch(url, session, mapped)
|
||||
except: # noqa
|
||||
|
||||
@@ -1,6 +1,8 @@
|
||||
import asyncio
|
||||
from os import getenv
|
||||
|
||||
import orjson
|
||||
|
||||
import db
|
||||
import util
|
||||
from processing import process
|
||||
@@ -11,22 +13,13 @@ KEYNAME = "queue"
|
||||
CHUNK_SIZE = int(getenv("MONOLITH_INGEST_CHUNK_SIZE", "900"))
|
||||
ITER_DELAY = float(getenv("MONOLITH_INGEST_ITER_DELAY", "0.5"))
|
||||
|
||||
INGEST_INCREASE_BELOW = int(getenv("MONOLITH_INGEST_INCREASE_BELOW", "2500"))
|
||||
INGEST_DECREASE_ABOVE = int(getenv("MONOLITH_INGEST_DECREASE_ABOVE", "10000"))
|
||||
|
||||
INGEST_INCREASE_BY = int(getenv("MONOLITH_INGEST_INCREASE_BY", "100"))
|
||||
INGEST_DECREASE_BY = int(getenv("MONOLITH_INGEST_DECREASE_BY", "100"))
|
||||
log = util.get_logger("ingest")
|
||||
|
||||
INGEST_MAX = int(getenv("MONOLITH_INGEST_MAX", "1000000"))
|
||||
INGEST_MIN = int(getenv("MONOLITH_INGEST_MIN", "100"))
|
||||
|
||||
|
||||
class Ingest(object):
|
||||
def __init__(self):
|
||||
name = self.__class__.__name__
|
||||
self.log = util.get_logger(name)
|
||||
self.current_chunk = 0
|
||||
self.log.info(
|
||||
(
|
||||
"Starting ingest handler for chunk size of "
|
||||
@@ -37,45 +30,17 @@ class Ingest(object):
|
||||
async def run(self):
|
||||
while True:
|
||||
await self.get_chunk()
|
||||
self.log.debug(f"Ingest chunk {self.current_chunk} complete")
|
||||
self.current_chunk += 1
|
||||
await asyncio.sleep(ITER_DELAY)
|
||||
|
||||
async def get_chunk(self):
|
||||
global CHUNK_SIZE
|
||||
length = await db.ar.llen(KEYNAME)
|
||||
if length > CHUNK_SIZE:
|
||||
length = CHUNK_SIZE
|
||||
if not length:
|
||||
items = []
|
||||
# for source in SOURCES:
|
||||
# key = f"{KEYPREFIX}{source}"
|
||||
chunk = await db.ar.spop(KEYNAME, CHUNK_SIZE)
|
||||
if not chunk:
|
||||
return
|
||||
ingested = await process.spawn_processing_threads(self.current_chunk, length)
|
||||
|
||||
if ingested < INGEST_INCREASE_BELOW:
|
||||
if CHUNK_SIZE + INGEST_INCREASE_BY < INGEST_MAX:
|
||||
self.log.debug(
|
||||
(
|
||||
f"Increasing chunk size to "
|
||||
f"{CHUNK_SIZE + INGEST_INCREASE_BY} "
|
||||
f"due to low ingestion ({ingested})"
|
||||
)
|
||||
)
|
||||
CHUNK_SIZE += INGEST_INCREASE_BY
|
||||
else:
|
||||
log.debug(
|
||||
f"Chunk size ({CHUNK_SIZE}) at maximum, not increasing above: {INGEST_MAX}"
|
||||
)
|
||||
|
||||
elif ingested > INGEST_DECREASE_ABOVE:
|
||||
if CHUNK_SIZE - INGEST_DECREASE_BY > INGEST_MIN:
|
||||
self.log.debug(
|
||||
(
|
||||
f"Decreasing chunk size to "
|
||||
f"{CHUNK_SIZE - INGEST_DECREASE_BY}"
|
||||
f"due to high ingestion ({ingested})"
|
||||
)
|
||||
)
|
||||
CHUNK_SIZE -= INGEST_DECREASE_BY
|
||||
else:
|
||||
log.debug(
|
||||
f"Chunk size ({CHUNK_SIZE}) at minimum, not decreasing below: {INGEST_MIN}"
|
||||
)
|
||||
for item in chunk:
|
||||
item = orjson.loads(item)
|
||||
items.append(item)
|
||||
if items:
|
||||
await process.spawn_processing_threads(items)
|
||||
|
||||
Reference in New Issue
Block a user