Compare commits

365 Commits
prod ... master

Author SHA1 Message Date
81f05d4263 Begin implementing RTS 2026-02-17 12:14:29 +00:00
dc533f266f Add psutil to requirements 2025-01-24 12:18:17 +00:00
ea4b5e6321 Begin implementation of dynamic throttling framework 2025-01-24 12:17:43 +00:00
54ecfbae64 Add throttling for performance 2025-01-24 12:17:22 +00:00
352909bec0 Begin implementing RTS process 2025-01-23 11:30:01 +00:00
1cc2ef629e Automatic optimisation for ingest chunking based on processed messages 2025-01-23 11:29:48 +00:00
1aeadaf3b7 Stringify tokens and return message number from processing 2025-01-23 11:29:07 +00:00
ba8c33d8fc Update socket and database URLs 2025-01-23 11:27:51 +00:00
2ef2249be7 Update container files to work with Podman 2025-01-23 11:26:58 +00:00
054e9caca0 Update to run with Podman 2024-12-29 17:35:57 +00:00
5ea4e5f460 Bump versions in pre-commit config 2023-02-09 07:20:13 +00:00
210237b50a Update pre-commit versions 2023-02-09 07:20:13 +00:00
87b81ac236 Retry Redis ingest if it failed 2023-01-13 07:20:27 +00:00
02071758b5 Send messages to Neptune Redis via PubSub 2023-01-12 07:20:43 +00:00
0ab67becff Give option for only crawling some boards 2022-12-22 07:20:26 +00:00
ebaf8c765d Allow disabling modules in environment variables 2022-12-22 07:20:26 +00:00
ce2d7684bc Run ingest task first 2022-12-22 10:11:48 +00:00
508b00e471 Pre-create meta index 2022-11-23 19:02:31 +00:00
9d1e4b44e8 Add pathogen network 2022-11-23 18:23:20 +00:00
371bce1094 Remove print statements 2022-11-22 21:43:56 +00:00
be0cf231b4 Fix mapping and make Threshold talk to SSDB 2022-11-22 21:42:35 +00:00
1993c8f1d2 Use Portainer Git directory for Redis config 2022-11-22 21:19:13 +00:00
9d35930b3b Re-add Portainer Git dir 2022-11-22 21:15:19 +00:00
34346006ab Remove leftover Docker files in legacy 2022-11-22 21:11:46 +00:00
42a3f5da03 Remove tarred Docker definition 2022-11-22 07:20:52 +00:00
42657aeee0 Lower Python version due to gensim incompatibility 2022-11-22 21:07:34 +00:00
1c34aa4a01 Clean up legacy and debugging code 2022-11-22 07:20:27 +00:00
6f3db61532 Add debugging statements for Portainer 2022-11-22 20:37:58 +00:00
3c18858c48 Use relative path for code 2022-11-22 20:35:13 +00:00
78c6ef96d2 Use relative path for build directory 2022-11-22 20:30:41 +00:00
d2a174c1c4 Remove Portainer Git volume 2022-11-22 20:28:44 +00:00
c53438d07b Remove port variable 2022-11-22 20:17:51 +00:00
93353f34e7 Update env example file 2022-11-22 20:17:40 +00:00
6b1604b724 Remove old compose file 2022-11-22 20:17:28 +00:00
49f46c33ba Fully implement Elasticsearch indexing 2022-11-22 20:15:02 +00:00
052631c71f Remove infrastructure Docker definition 2022-11-22 18:41:29 +00:00
7edc231ea9 Remove Manticore and Superset stuff 2022-11-22 18:37:49 +00:00
44d6d90325 Update Druid spec 2022-11-21 18:59:53 +00:00
1c2ff41b56 Add ripsecrets to pre-commit hook 2022-11-03 07:20:30 +00:00
51a9b2af79 Improve memory usage and fix 4chan crawler 2022-10-21 07:20:30 +01:00
2d7b6268dd Don't shadow previous iterator variable 2022-10-21 07:20:30 +01:00
e5b5268f5c Add example Druid spec 2022-10-21 07:20:30 +01:00
dc1ed1fe10 Print the length of the flattened list in debug message 2022-10-21 07:20:30 +01:00
eaf9a3c937 Remove unused ssdb_data volume 2022-10-21 07:20:30 +01:00
054a7a3ccf Don't mount the template directory 2022-10-21 07:20:30 +01:00
f774f4c2d2 Add some environment variables to control debug output 2022-10-21 07:20:30 +01:00
e32b330ef4 Switch to SSDB for message queueing 2022-10-21 11:53:29 +01:00
8c596ec516 Update gitignore 2022-10-21 11:53:28 +01:00
ab5e85c5c6 Begin switching away from Redis 2022-10-21 11:14:51 +01:00
7482064aee Clean up docker environment 2022-10-19 16:45:18 +01:00
dccbc6b158 Remove dependencies on infra stuff 2022-10-11 11:16:24 +01:00
8cc1a48a25 Separate out infra in production 2022-10-11 11:04:03 +01:00
83e8fb0e38 Remove event log file 2022-10-05 12:52:30 +01:00
64cf7d0d4a Set Superset directory relative to Portainer Git root 2022-10-04 21:43:16 +01:00
ae12e37e9b Set Superset path properly 2022-10-04 21:41:22 +01:00
5bb9bd3998 Use local storage in production 2022-10-04 21:33:08 +01:00
d96dc573c5 Update production compose 2022-10-04 21:32:14 +01:00
aea1c7faf6 Use one image for all the Druid services 2022-10-04 21:30:17 +01:00
2d6b3bb090 Set Superset volume relative to docker folder 2022-10-04 20:54:38 +01:00
83ffd6517c Switch quickstart setting to nano 2022-10-04 20:37:02 +01:00
8465e8fb77 Set Superset env file relative to docker directory 2022-10-04 20:30:14 +01:00
d7d9958e54 Add persistent Redis data store and copy over Druid config to production 2022-10-04 20:26:58 +01:00
464c831686 Add Apache Superset and fix Druid resource usage 2022-10-04 20:17:04 +01:00
5ad6cd0354 Add postgres config to Metabase 2022-10-02 14:29:40 +01:00
06e80a9759 Time stuff and switch to gensim for tokenisation 2022-10-01 14:46:45 +01:00
5c91f1af87 Remove commented debug code 2022-09-30 07:22:22 +01:00
02ff44a6f5 Use only one Redis key for the queue to make chunk size more precise for thread allocation 2022-09-30 07:22:22 +01:00
a5d29606e9 Remove ujson 2022-09-30 15:30:34 +01:00
6b549dee6a Reformat 2022-09-30 15:23:00 +01:00
2dd2360b4f Add config file to Turnilo 2022-09-27 08:30:28 +01:00
a2f88e29e6 Implement uvloop 2022-09-23 07:20:30 +01:00
f0df3e80fd Print Ingest settings on start 2022-09-23 08:32:29 +01:00
09fc63d0ad Make debug output cleaner 2022-09-22 17:39:29 +01:00
e9ae499ce8 Fix indexer options 2022-09-22 17:39:18 +01:00
b6f8dabccd Fix Java variable in indexer parameters 2022-09-22 08:41:59 +01:00
395dfb1e7b Decrease memory requirements further and switch Kafka image 2022-09-21 21:11:13 +01:00
ee79762c73 Set Kafka max heap size 2022-09-21 20:26:05 +01:00
e58b9960b2 Set max memory for Metabase 2022-09-21 14:39:11 +01:00
4a60dec964 Remove debugging code and fix regex substitution 2022-09-21 12:48:54 +01:00
9ee55a720b Change dev container names 2022-09-21 12:09:18 +01:00
799286ca76 Change prod container names 2022-09-21 12:08:29 +01:00
0e62a5b4b8 Remove prod compose comment 2022-09-21 12:04:54 +01:00
5ebae02bf2 Remove commented code for debugging 2022-09-21 10:02:05 +01:00
ced3a251b2 Normalise fields in processing and remove invalid characters 2022-09-21 10:01:12 +01:00
740f93208b Make production volumes point to external storage 2022-09-21 10:00:48 +01:00
2763e52e6b Don't muddle up the topics when sending Kafka batches 2022-09-20 23:03:02 +01:00
869af451e5 Document new PROCESS_THREADS setting in example file 2022-09-20 22:43:04 +01:00
31c58dd85b Make CPU threads configurable 2022-09-20 22:29:13 +01:00
40a0c2d22e Make performance settings configurable 2022-09-20 22:22:13 +01:00
9f4d4784af Set memory size to 2.5GB 2022-09-08 07:20:30 +01:00
72c22ed91e Update DirectMemorySize to be 1.5GB 2022-09-19 21:51:07 +01:00
ce62a84cec Make MaxDirectMemory 0.5*cores 2022-09-19 19:15:57 +01:00
41b5ca6afd Make max memory size 512m 2022-09-19 19:10:33 +01:00
7db3504251 Further decrease Druid memory requirements 2022-09-19 17:07:15 +01:00
1284700e61 Bump production Kafka healthcheck timeout 2022-09-19 11:18:52 +01:00
a9803fc79c Decrease production Druid max memory size 2022-09-19 10:51:34 +01:00
d4861811e5 Increase Kafka retries 2022-09-19 10:48:29 +01:00
3c2e8e8e67 Change Metabase port 2022-09-18 13:15:10 +01:00
f60c08918e Add docker environment file 2022-09-18 13:05:08 +01:00
0d6b3763f9 Update production compose 2022-09-18 13:04:08 +01:00
d4b8e11525 Reformat comment 2022-09-18 13:02:06 +01:00
38d00f2c21 Implement restricted sources 2022-09-18 13:01:19 +01:00
cb11ce9b12 Fix merge conflict 2022-09-16 17:45:24 +01:00
a89b5a8b6f Implement sentiment/NLP annotation and optimise processing 2022-09-16 17:09:49 +01:00
f432e9b29e Properly process Redis buffered messages and ingest into Kafka 2022-09-14 18:32:32 +01:00
c5f01c3084 Ingest into Kafka and queue messages better 2022-09-13 22:17:46 +01:00
47c5f89914 Implement Apache Druid/Kafka and Metabase 2022-09-13 22:17:32 +01:00
68fd5fa230 Switch to latest image for dev docker-compose 2022-09-13 09:20:43 +01:00
fd90c233c2 Begin implementing Apache Druid 2022-09-08 07:20:30 +01:00
0eb4a04b89 Use stable after all 2022-09-08 07:20:30 +01:00
e196172e04 Switch production image back to dev 2022-09-08 07:20:30 +01:00
41a8cea873 Lower memory requirements to prevent crashes 2022-09-08 07:20:30 +01:00
9cf4e945d1 Set dev image back to the default 2022-09-12 08:43:18 +01:00
04b5dec843 Treat text fields as string and try beta Kibana image 2022-09-12 08:27:13 +01:00
40492b1595 Add Mysql port to ports instead of expose 2022-09-10 13:20:06 +01:00
90fed411e6 Expose the Mysql port 2022-09-10 13:16:19 +01:00
0dde7d6f30 Use dev image of manticore 2022-09-10 12:03:45 +01:00
fed3cdbf5a Remove indexer block to attempt to prevent Manticore DB crash 2022-09-08 07:20:30 +01:00
c2bdb3fd15 Reformat 2022-09-07 07:20:30 +01:00
5d042c1259 Raise open files limit for Redis 2022-09-07 07:20:30 +01:00
92475ee9a9 Add 4chan update message type to main types 2022-09-07 07:20:30 +01:00
5c3b338017 Implement threshold writing to Redis and manticore ingesting from Redis 2022-09-07 07:20:30 +01:00
54ea5fa8e9 Add config directories to gitignore 2022-09-08 09:45:18 +01:00
e79de2b377 Add aioredis 2022-09-08 09:44:27 +01:00
79b1bee9e4 Implement ingesting to Redis from Threshold 2022-09-07 07:20:30 +01:00
e3b2e1f36d Config relative to Git dir 2022-09-05 07:20:30 +01:00
eb71dd76f8 Store persistent database elsewhere 2022-09-05 07:20:30 +01:00
cd5eb61455 Improve DB performance with caching 2022-09-05 07:20:30 +01:00
a198f2a487 Reformat legacy project 2022-09-05 07:20:30 +01:00
19ee7071f5 Merge branch 'threshold' 2022-09-06 12:50:25 +01:00
122fdca5db Moved files to subdirectory 2022-09-06 12:50:09 +01:00
7bb2264d91 Increase thread delay time 2022-09-05 07:20:30 +01:00
1858e06c4b Alter schemas and 4chan performance settings 2022-09-05 07:20:30 +01:00
ddcfa614ad Remove some debugging code 2022-09-05 07:20:30 +01:00
a1b193c6da Change Python to 3.10 2022-09-05 07:20:30 +01:00
7606d77c2b Update production env file path 2022-09-05 07:20:30 +01:00
aaf2d58d86 Remove development dotenv loading 2022-09-05 07:20:30 +01:00
d7e49777ef Add debug statement 2022-09-05 07:20:30 +01:00
d1c6bd1fb5 Reformat and set the net and channel for 4chan 2022-09-05 07:20:30 +01:00
b8d2ecc009 Make crawler more efficient and implement configurable parameters 2022-09-05 07:20:30 +01:00
f8fc5e1a1b Split thread list into chunks to save memory 2022-09-05 07:20:30 +01:00
6e00f70184 Reformat code 2022-09-04 21:40:04 +01:00
0f717b987d Reinstate Redis cache 2022-09-04 21:38:53 +01:00
60c43b4eb5 Run processing in thread 2022-09-04 21:29:00 +01:00
db23b31f30 Implement aiohttp 2022-09-04 19:44:25 +01:00
f7860bf08b Begin implementing aiohttp 2022-09-04 13:47:32 +01:00
734a2b7879 Implement running Discord and 4chan gathering simultaneously 2022-09-02 22:30:45 +01:00
2731713ede Fix error when no email can be found 2022-08-27 11:19:28 +01:00
2a2f24f570 Fix getting first relay when they are not sequential 2022-08-26 22:17:12 +01:00
c7941bfcda Log authentication messages 2022-08-16 23:01:42 +01:00
49b0b9db46 Implement deduplicating channels 2022-08-16 22:01:35 +01:00
07f1fff125 Switch to siphash 2022-08-18 07:20:30 +01:00
8816024d90 Re-add fake messages 2022-08-15 19:49:21 +01:00
b61316d805 Detect queries if nick and channel are the same 2022-08-15 19:24:42 +01:00
a65098c222 Add sinst fetch and fix message send logic 2022-08-15 19:15:12 +01:00
ed3c8497bc Switch debugging statements to trace in ChanKeep 2022-08-15 19:15:00 +01:00
0b69893e17 Fix query handling and don't send a fake message 2022-08-15 17:59:31 +01:00
e4c1d80250 Only run pingmsg after negative has completed 2022-08-18 07:20:30 +01:00
415a0b1135 Fix debug statements and amend function names 2022-08-18 07:20:30 +01:00
d026881086 Properly format string 2022-08-18 07:20:30 +01:00
ce32ab4722 Improve regPing debugging 2022-08-18 07:20:30 +01:00
2942929478 Improve regPing negative handling logic 2022-08-18 07:20:30 +01:00
53ee69540f Fix double messages and regPing logic 2022-08-18 07:20:30 +01:00
b25cb1699f Set the channel limit on connected relays, not active 2022-08-18 07:20:30 +01:00
7efde28d99 Look before you leap to confirming registrations 2022-08-18 07:20:30 +01:00
659162ebc6 Fix IRC config mutation 2022-08-18 07:20:30 +01:00
d0ea3bb221 Change authentication endpoint 2022-08-18 07:20:30 +01:00
e64aaf99d8 Reorder API endpoints to prevent clashing 2022-08-18 07:20:30 +01:00
f4225b622f Add more debugging information 2022-08-15 00:39:22 +01:00
6f44921647 Figure out the channel parsing logic 2022-08-15 00:36:36 +01:00
731c6a2fd1 Pass a list instead of listinfo 2022-08-15 00:29:08 +01:00
ffed420c11 Fix variable placement 2022-08-15 00:27:16 +01:00
66e046e15f Fix list parsing 2022-08-15 00:26:11 +01:00
8f44f34d0e Fix debugging code in keepChannels 2022-08-15 00:08:11 +01:00
1b68568fb7 Add debugging code in keepChannels 2022-08-15 00:07:29 +01:00
bdb2949d17 Subtract one from length of list for indices 2022-08-15 00:04:49 +01:00
560af8aeb0 Lower max_chans to length of LIST if it's shorter 2022-08-15 00:03:12 +01:00
153d3dd847 Reset negative pass status when requesting recheck 2022-08-14 23:58:35 +01:00
6cdadd23a0 Implement initial WHO loop delay 2022-08-14 20:58:41 +01:00
4fa5c25e94 Fix getting all unregistered relays 2022-08-14 20:58:30 +01:00
1b39b46121 Blacklist channels we are kicked from 2022-08-14 20:44:04 +01:00
c55a4058b1 Use JSON for sending messages 2022-08-14 16:45:40 +01:00
b62200d410 Implement API call to register 2022-08-14 16:26:09 +01:00
e30250603b Convert num to number in registration confirmation 2022-08-14 16:09:32 +01:00
02739abaf4 Allow current nick substitution in IRC commands 2022-08-14 15:53:18 +01:00
281eb75b26 Fix variable shadowing 2022-08-14 15:43:48 +01:00
559e1f4afd Print identification message 2022-08-14 13:51:13 +01:00
060ee4f0d5 Implement manual authentication mode 2022-08-14 13:13:05 +01:00
f7d390da32 Implement API for authentication management actions 2022-08-14 12:43:33 +01:00
0b20a05b19 More debugging for reg tests and getstr command 2022-08-14 11:41:29 +01:00
39059084ef Add allRelaysActive output to network info 2022-08-14 10:58:28 +01:00
feecf48b9b Add debug statements and only check if network is connected when parting channels 2022-08-14 09:25:54 +01:00
9b14979f29 Use JSON for joining channels and don't shadow auth variable when getting network info 2022-08-14 09:25:01 +01:00
a204be25c5 Make channel deletion endpoint accept JSON 2022-08-14 00:01:14 +01:00
a42c6be1b7 LBYL 2022-08-13 23:38:13 +01:00
a82355b660 Add more information to relay API return 2022-08-13 23:36:39 +01:00
2a3c9f80a3 Add even more debugging 2022-08-13 23:18:56 +01:00
3ca5a3452c Extra debugging for getting active relays 2022-08-13 23:17:26 +01:00
5f33ba7f1d Fix typo in module name 2022-08-13 23:14:51 +01:00
d9d3faf860 Extra debugging for get_first_relay 2022-08-13 23:14:17 +01:00
4c91b6ad2c Filter queries more carefully 2022-08-13 22:46:10 +01:00
abeba6bc06 Update CHANLIMIT on all instances when set via API 2022-08-13 22:36:52 +01:00
406b3d77f4 Add helper to get all active relays 2022-08-13 22:36:18 +01:00
047e9148aa Implement API endpoint to enable authentication 2022-08-13 22:25:29 +01:00
5db659b9af Filter AUTH channel (OFTC fix) 2022-08-13 22:15:50 +01:00
fced2b7d75 Use ChanKeep system for joining channels with joinSingle 2022-08-13 21:54:14 +01:00
16133fb7b4 Fully make use of ECA for multiple channels 2022-08-13 21:40:53 +01:00
5c95f35c61 Return chanlimit for each relay 2022-08-13 21:22:43 +01:00
ad7a5cfe49 Check token before attempting to confirm 2022-08-13 20:55:36 +01:00
92df4fb9a3 Implement API endpoint for provisioning relays 2022-08-13 20:51:31 +01:00
28c1a33615 Implement configurable chanlimit and add more fields about LIST output to Redis 2022-08-13 20:37:21 +01:00
9470f0d0d9 Implement updating registration via API 2022-08-13 20:36:51 +01:00
496a3d0374 Implement ChanKeep without requiring persistent chanlimits on all networks 2022-08-13 19:20:29 +01:00
75965497be Add some debug statements and statistics for chanlimits 2022-08-13 18:40:13 +01:00
1e9dd1b223 Print message if relay is unauthenticated/disconnected 2022-08-13 14:06:34 +01:00
df6b9e34a3 Return relay numbers with channel list 2022-08-13 13:47:42 +01:00
facf58ec2c Add connected status to IRC info return and check when getting active relays 2022-08-13 13:40:33 +01:00
21ed66bc00 Reformat code 2022-08-13 13:32:22 +01:00
5c63fb5048 Implement getting LIST information from API 2022-08-13 13:27:20 +01:00
c3fd8a97f7 Provision relay on creation 2022-08-13 00:18:06 +01:00
acc363d207 Add docstrings to chankeep 2022-08-12 23:53:02 +01:00
49214644ff Implement migrating networks 2022-08-12 23:32:00 +01:00
20f59362ff Subtract allocated channel slots from total 2022-08-12 22:31:12 +01:00
065fe94cbd Improve channel allocation and write basic tests for it 2022-08-12 22:27:49 +01:00
6306231098 Make channel join notification a TRACE 2022-08-12 20:19:39 +01:00
5c2ef740e6 Fix email command 2022-08-12 20:19:33 +01:00
7e51178a10 Add endpoint to get the bot's nickname 2022-08-09 07:20:30 +01:00
a2b6ebd912 Properly implement querying with API 2022-08-09 07:20:30 +01:00
ec943203d0 Get our hostname from WHO when we create fake events 2022-08-09 07:20:30 +01:00
8dc176aa54 Fire a fake event when we send a message 2022-08-09 07:20:30 +01:00
8ba4831d9c Implement best effort allocation 2022-08-11 21:44:19 +01:00
4c040bbf78 Simplify variable names and reformat 2022-08-11 20:51:41 +01:00
5a4ae2153e Use ceil instead of round for relay number rounding 2022-08-11 20:46:44 +01:00
8c3a75b3c8 Expand ECA secondary allocation algorithm 2022-08-11 20:43:34 +01:00
dc13515aa8 Adding more debug statements in ECA system 2022-08-11 20:36:24 +01:00
d38f7ba1ba Print information about received LIST 2022-08-11 20:32:49 +01:00
7c9903bca2 Return correct data type for provisioning relays 2022-08-11 20:29:01 +01:00
22e853a3f7 Simplify is_first_relay 2022-08-11 20:26:19 +01:00
b5326e92a1 Add even more debugging 2022-08-11 20:21:39 +01:00
604bee1b78 Add more LIST handling debugging 2022-08-11 20:18:49 +01:00
87ee96dd26 Don't add 1 to current relays when iterating 2022-08-11 20:13:30 +01:00
cc0e3b872b Add extra debug call for allRelaysActive 2022-08-11 20:12:38 +01:00
16d268ca90 Reformat helpers 2022-08-11 20:09:14 +01:00
6193502f2e Enable debug mode with env vars 2022-08-11 20:09:01 +01:00
b16289cded Update IRC template 2022-08-11 19:49:58 +01:00
502b45cda5 Allow gaps in relay numbering 2022-08-11 19:22:09 +01:00
4c8b584ef4 Implement deleting networks 2022-08-02 09:01:34 +01:00
b42c82eac2 More error handling when joining channels with ChanKeep 2022-08-02 09:01:24 +01:00
4c9ac3ec42 Implement adding networks 2022-08-01 23:02:20 +01:00
db4b6cc6f9 Implement requesting channel list for network 2022-08-01 21:38:46 +01:00
dae62ea544 Remove debugging code 2022-08-01 21:31:48 +01:00
e8870e95e7 Implement automatic provisioning 2022-08-01 19:34:35 +01:00
0dedb545f0 Implement updating aliases 2022-08-01 19:05:12 +01:00
6909fb68f7 Implement API endpoint to add next relay 2022-07-29 22:39:08 +01:00
54b5561a75 Implement deleeting relays and fix adding 2022-07-29 22:11:43 +01:00
d51e87b09f Reformat code 2022-07-29 17:28:19 +01:00
6359918639 Fix joining channels with inactive relays 2022-07-29 17:28:09 +01:00
ba1f8407d1 Implement creating relays via the API 2022-07-29 17:27:40 +01:00
78f3f4520d Fix Redis config path 2022-07-29 22:22:22 +01:00
deb89e9202 Use proper port for SSL listener 2022-07-29 22:22:22 +01:00
f88551f926 Use Git dir to make redis config absolute path 2022-07-29 09:06:13 +01:00
dc6dcd79db Use paths relative to root in production compose 2022-07-29 09:04:18 +01:00
1d8bb73645 Switch paths 2022-07-29 09:00:08 +01:00
9de0b0919d Use relative paths 2022-07-29 08:59:02 +01:00
bf79c013d5 Fix redis.conf location in prod compose 2022-07-29 08:48:30 +01:00
e1fc59f636 Don't pass template directory 2022-07-29 08:35:56 +01:00
e662d36542 Fix path issue 2022-07-29 08:32:39 +01:00
cd38aab318 Pass through configuration directories to compose 2022-07-29 08:31:01 +01:00
6e99605701 Fix environment variable path on production compose 2022-07-29 08:11:37 +01:00
248273648d Properly configure production compose file 2022-07-29 08:02:10 +01:00
479e5072d2 Create separate production configuration 2022-07-29 08:01:48 +01:00
a9f499ec67 Remove print statements 2022-07-28 21:30:23 +01:00
ef61145671 Add trailing slash to example directory 2022-07-28 21:29:08 +01:00
3818308b75 Add Portainer Git directory to env file 2022-07-28 21:27:26 +01:00
2f74d79bc4 Seamlessly handle nonexistent configurations 2022-07-28 21:11:01 +01:00
3d67578179 Add stack.env file 2022-07-28 19:57:26 +01:00
9715b28f47 Move env file to example 2022-07-28 19:50:48 +01:00
a258ec8ad1 Properly pass environment variables to the process 2022-07-28 19:50:07 +01:00
f66f998f54 Make some addresses and hosts configurable with environment variables 2022-07-28 19:38:37 +01:00
e3700e309d Lower compose version 2022-07-28 19:25:15 +01:00
4e195b2954 Add docker definitions 2022-07-28 19:21:08 +01:00
8409a39e57 Implement relay, channel and alias management 2022-07-27 22:03:42 +01:00
b30a3a535d Implement editing networks via the API 2022-07-27 08:59:17 +01:00
b9c1470410 Implement network and channels view 2022-07-26 22:16:35 +01:00
5aebf63c2e Implement API endpoint for network listing 2022-07-25 18:05:53 +01:00
b149886128 Don't send to Logstash if it's disabled 2022-07-21 13:40:40 +01:00
4b33559e65 Implement getting number of channels and users 2022-07-21 13:40:18 +01:00
f589c7fc16 Implement more API functions 2022-07-21 13:40:17 +01:00
47a3f84c1c Update config 2022-07-21 13:40:15 +01:00
f942e94ee5 Implement API 2022-07-21 13:40:13 +01:00
f0acbdbfa3 Begin work on API endpoint 2022-07-21 13:40:11 +01:00
e5a14b2c91 Reformat again 2022-07-21 13:40:09 +01:00
a5fd7d60fd Remove some legacy code 2022-07-21 13:40:07 +01:00
f4c5323de1 Reformat project 2022-07-21 13:40:05 +01:00
6c7d0d5c45 Reformat and fix circular import 2022-07-21 13:40:03 +01:00
3229d9b806 Revert "Reformat project"
This reverts commit 64e3e1160aa76d191740342ab3edc68807f890fb.
2022-07-21 13:40:01 +01:00
760e43b59a Reformat project 2022-07-21 13:39:59 +01:00
9d4d31fdc2 Don't attempt secondary registration if it is disabled 2022-07-21 13:39:57 +01:00
e4a6e0d3c2 Don't attempt to register if it is disabled 2022-07-21 13:39:56 +01:00
7ffdc63eeb Rename time to ts 2022-07-21 13:39:54 +01:00
757b22c4a1 Extra error handling around emails 2022-07-21 13:39:52 +01:00
1532cf482c Make Redis DBs configurable 2022-07-21 13:39:50 +01:00
4b2a1f2735 Add Redis DB numbers to configuration 2022-07-21 13:39:48 +01:00
5c7d71dc99 Fix provisioning with emails 2022-07-21 13:39:46 +01:00
745c7caa12 Fix some issues with the default config 2022-07-21 13:39:44 +01:00
e5685286ae Improve email command 2022-07-21 13:39:43 +01:00
ff1ee63900 Reformat code with pre-commit 2022-07-21 13:39:41 +01:00
Mark Veidemanis
0777a55264 Start implementing email command 2021-08-25 07:47:54 +00:00
Mark Veidemanis
152bc08970 Add Logstash file 2021-08-24 20:08:18 +00:00
Mark Veidemanis
edc5f85ba6 Implement modifying emails for aliases 2021-06-06 10:31:13 +00:00
Mark Veidemanis
c389094365 Finish Logstash implementation 2021-06-06 10:16:04 +00:00
Mark Veidemanis
5d63d7a1e9 Update requirements without versions 2021-06-06 10:13:43 +00:00
0959d978b3 Merge branch 'master' into datarestructure 2020-11-02 20:18:36 +00:00
9c95fa8eaf Implement relay-independent join 2020-11-02 20:14:02 +00:00
14daa9dfef Don't discard server messages 2020-11-02 20:13:36 +00:00
45fa21fea3 Use substitutions in registration tests 2020-11-01 22:19:03 +00:00
0473c57291 Additional error handling for command parsing 2020-11-01 22:18:48 +00:00
735fee9286 Fix bug with reg command 2020-11-01 20:43:51 +00:00
d405a4cd10 Add example file for blacklist 2020-11-01 19:55:32 +00:00
399075afd1 Implement channel blacklisting 2020-11-01 19:54:24 +00:00
a0bea0b18a Fix bug with using muser attribute when absent 2020-11-01 19:03:56 +00:00
5d09e1ade7 Fix syntax error in reg command 2020-11-01 18:50:17 +00:00
19e04dbf36 Implement setting modes in ZNC 2020-11-01 03:39:32 +00:00
abdfc48b95 Prepare command loader for reloading commands 2020-11-01 03:38:47 +00:00
f7e1f2d221 Implement registration at net-level 2020-11-01 03:37:29 +00:00
a78e05c0c3 Clarify message output on confirm command 2020-11-01 03:36:23 +00:00
e22349802b Log error when ZNC says a channel can't be joined 2020-10-31 23:58:51 +00:00
b652b11335 Fix registration cancellation bug in regproc 2020-10-31 23:58:03 +00:00
49fd03304d Fix various bugs and off by one with provisioning 2020-10-31 23:55:11 +00:00
b0eaa7fd47 Move WHO and NAMES logging to trace 2020-10-31 16:52:00 +00:00
9e17223258 Don't deduplicate global messages (NICK/QUIT) 2020-10-31 16:51:24 +00:00
d60d89dbf6 Improve authentication detection
Add a negative check in the event we are authenticated and registered,
but not confirmed, as this fools other checks.
2020-10-31 16:49:37 +00:00
eaeb4b72c2 Use zero-padded numbers to maximise usuable ports 2020-10-31 00:13:59 +00:00
388cd1e4b9 Error checking in testing for registration message 2020-10-31 00:13:09 +00:00
b986d6ac45 Deauth bot when disconnected and lowercase user 2020-10-31 00:12:06 +00:00
c06e922749 Clarify error message to be more helpful 2020-10-31 00:11:28 +00:00
8deac2ab17 Implement another level of logging for tracing 2020-10-31 00:10:33 +00:00
4d25505625 Note that arguments to list are optional 2020-10-31 00:06:35 +00:00
69fbe180f1 Implement authentication checking on connection 2020-10-28 22:50:12 +00:00
812db95995 Add checks in dedup for time-less messages 2020-10-28 22:46:22 +00:00
b16b5d690b Fix decoding issue with some Redis keys 2020-10-28 22:30:49 +00:00
6acb106761 Provision users with lowercase names 2020-10-28 22:30:04 +00:00
7d9a45ee91 Add the time field to some notifications 2020-10-28 22:26:41 +00:00
913009ab71 Fix circular import in ChanKeep/provisioning modules 2020-10-28 18:38:27 +00:00
82c5c2d163 Start implementing prefixes 2020-07-09 19:43:47 +01:00
3acf182171 Fixes to auth detection and message parsing
* don't check authentication if the network doesn't need to
  register
* don't pass through muser for ZNC type messages
* avoid duplicate message for queries containing highlights
* make a copy of the cast for metadata analysis to avoid poisoning it
* set up callback for when the instance is authenticated, so we can
  request a LIST immediately if so desired
* separate out seeding functions to populate CHANLIMIT to ease future
  work involving other options, such as PREFIX
2020-06-07 17:26:53 +01:00
2a9869d0f9 Remove condition-based monitoring system 2020-06-07 15:31:43 +01:00
1640955e5c Fix various bugs in the event system
Squash many bugs in the event notification system and simplify the
code.
2020-06-02 21:34:15 +01:00
290e0b5f87 Fix syntax error in redis query 2020-05-31 21:54:43 +01:00
097f100ec5 Implement authentication detection
* pending command to see which instances have never authenticated
* authcheck command to see which instances are not currently
  authenticated
2020-05-31 21:52:56 +01:00
586a337ea4 Add help for pending command 2020-05-31 16:40:51 +01:00
5ee53ace4c Add additional error handling in user queries 2020-05-31 13:44:34 +01:00
81b0450904 Function to select and merge IRC network defs 2020-05-31 13:23:09 +01:00
5c6b626396 Check registration status before joining channels
Do not join channels if any relay for a network is unregistered.
2020-05-31 13:09:58 +01:00
4f9ca6088b Allow sending LIST to all networks at once 2020-05-31 13:08:00 +01:00
efb9666b6a Add confirm command
Confirm command to check which relays need manual
confirmation.
2020-05-31 12:32:12 +01:00
aec683ccce Remove leftover irc.json file 2020-05-30 21:42:26 +01:00
a3cdb35e05 Implement registration and confirmation of nicks 2020-05-30 21:40:10 +01:00
d99c3c394f Restructure provisioning into fewer functions 2020-05-30 21:37:22 +01:00
1ac1061348 Add irc.json to gitignore 2020-05-30 21:35:50 +01:00
690bf93676 Fix variable scope in LIST error handling 2020-04-21 23:32:17 +01:00
f4e5d248d5 Separate provisioning into user and auth info 2019-12-28 17:51:03 +00:00
97a25334aa Add IRC network definitions 2019-12-28 17:50:38 +00:00
06903d872e Add more comments and remove obsolete code 2019-12-07 16:35:29 +00:00
e3e522ad1e Add requirements 2019-11-17 19:09:17 +00:00
46 changed files with 1200 additions and 2391 deletions

1
.gitignore vendored
View File

@@ -159,3 +159,4 @@ docker/data
*.pem *.pem
legacy/conf/live/ legacy/conf/live/
legacy/conf/cert/ legacy/conf/cert/
stack.env

View File

@@ -1,15 +1,21 @@
repos: repos:
- repo: https://github.com/psf/black - repo: https://github.com/psf/black
rev: 22.6.0 rev: 23.1.0
hooks: hooks:
- id: black - id: black
exclude: ^core/migrations
- repo: https://github.com/PyCQA/isort - repo: https://github.com/PyCQA/isort
rev: 5.10.1 rev: 5.11.5
hooks: hooks:
- id: isort - id: isort
args: ["--profile", "black"] args: ["--profile", "black"]
- repo: https://github.com/PyCQA/flake8 - repo: https://github.com/PyCQA/flake8
rev: 4.0.1 rev: 6.0.0
hooks: hooks:
- id: flake8 - id: flake8
args: [--max-line-length=88] args: [--max-line-length=88]
exclude: ^core/migrations
- repo: https://github.com/sirwart/ripsecrets.git
rev: v0.1.5
hooks:
- id: ripsecrets

View File

@@ -1,19 +1,19 @@
# syntax=docker/dockerfile:1 # syntax=docker/dockerfile:1
FROM python:3 FROM python:3.10
RUN useradd -d /code pathogen RUN useradd -d /code xf
RUN mkdir /code RUN mkdir /code
RUN chown pathogen:pathogen /code RUN chown xf:xf /code
RUN mkdir /venv RUN mkdir /venv
RUN chown pathogen:pathogen /venv RUN chown xf:xf /venv
USER pathogen USER xf
ENV PYTHONDONTWRITEBYTECODE=1 ENV PYTHONDONTWRITEBYTECODE=1
ENV PYTHONUNBUFFERED=1 ENV PYTHONUNBUFFERED=1
WORKDIR /code WORKDIR /code
COPY requirements.txt /code/ COPY requirements.txt /code/
COPY discord-patched.tgz /code/ COPY docker/discord-patched.tgz /code/
RUN python -m venv /venv RUN python -m venv /venv
RUN . /venv/bin/activate && pip install -r requirements.txt RUN . /venv/bin/activate && pip install -r requirements.txt

20
Makefile Normal file
View File

@@ -0,0 +1,20 @@
run:
docker-compose -f docker-compose.prod.yml --env-file=stack.env up -d
build:
docker-compose -f docker-compose.prod.yml --env-file=stack.env build
stop:
docker-compose -f docker-compose.prod.yml --env-file=stack.env down
log:
docker-compose -f docker-compose.prod.yml --env-file=stack.env logs -f --names
run-infra:
docker-compose -f docker-compose.infra.yml --env-file=stack.env up -d
stop-infra:
docker-compose -f docker-compose.infra.yml --env-file=stack.env down
log-infra:
docker-compose -f docker-compose.infra.yml --env-file=stack.env logs -f

253
db.py
View File

@@ -1,23 +1,52 @@
import random import asyncio
from math import ceil
from os import getenv
from time import sleep
import aiomysql
import aioredis import aioredis
import manticoresearch
import msgpack
import orjson import orjson
from manticoresearch.rest import ApiException
# Kafka from numpy import array_split
from aiokafka import AIOKafkaProducer
from redis import StrictRedis from redis import StrictRedis
import util import util
from schemas import mc_s
# KAFKA_TOPIC = "msg" mysql_pool = None
configuration = manticoresearch.Configuration(host="http://127.0.0.1:9308")
api_client = manticoresearch.ApiClient(configuration)
api_instance = manticoresearch.IndexApi(api_client)
log = util.get_logger("db") log = util.get_logger("db")
# Redis (legacy) # Redis (legacy)
r = StrictRedis(unix_socket_path="/var/run/redis/redis.sock", db=0) # r = StrictRedis(unix_socket_path="/var/run/redis/redis.sock", db=0)
r = StrictRedis(
host="127.0.0.1", # Replace with your Redis server's IP address
port=1289, # Replace with your Redis server's port
db=0, # Database number
)
# AIORedis # AIORedis
ar = aioredis.from_url("unix:///var/run/redis/redis.sock", db=0) # ar = aioredis.from_url("unix:///var/run/redis/redis.sock", db=0)
ar = aioredis.from_url("redis://127.0.0.1:1289", db=0)
# /var/run/neptune-redis.sock
# db = 10
pr = aioredis.from_url("unix://var/run/neptune-redis.sock", db=10)
# fr = aioredis.from_url("unix://var/run/fisk-redis.sock", db=10)
fr = aioredis.from_url("unix://var/run/redis.sock", db=10)
# pr = aioredis.from_url("redis://redis_neptune:6379", db=10, password=getenv("REDIS_PASSWORD"))
KEYNAME = "queue"
MESSAGE_KEY = "messages"
OHLC_MESSAGE_KEY = "ohlc"
TYPES_MAIN = [ TYPES_MAIN = [
"msg", "msg",
@@ -32,89 +61,174 @@ TYPES_MAIN = [
"topic", "topic",
"update", "update",
] ]
MAIN_SRC_MAP = {
"dis": "main",
"irc": "restricted",
"4ch": "main",
}
TYPES_META = ["who"] TYPES_META = ["who"]
TYPES_INT = ["conn", "highlight", "znc", "query", "self"] TYPES_INT = ["conn", "highlight", "znc", "query", "self"]
KEYNAME = "queue"
async def store_kafka_batch(data): async def init_mysql_pool():
# log.debug(f"Storing Kafka batch of {len(data)} messages") """
producer = AIOKafkaProducer(bootstrap_servers="kafka:9092") Initialize the MySQL connection pool.
await producer.start() """
topicmap = {} global mysql_pool
for msg in data: mysql_pool = await aiomysql.create_pool(
host="127.0.0.1", port=9306, db="Manticore", minsize=1, maxsize=10
)
async def rts_store_message(index, data):
"""
Store a RTS message into MySQL using an existing connection pool.
Prioritizes instant PubSub delivery, with minimal data storage overhead.
:param index: str
:param data: dict
"""
# Publish to Redis PubSub
packed_index = msgpack.packb({"index": index, "data": data}, use_bin_type=True)
try:
await fr.publish(OHLC_MESSAGE_KEY, packed_index)
except aioredis.exceptions.ConnectionError as e:
raise e
await asyncio.sleep(0.1)
# Insert data into MySQL
try:
async with mysql_pool.acquire() as conn:
async with conn.cursor() as cur:
# Insert data into the table
query = f"""
INSERT INTO {index} (s, o, c, h, l, v, a, i, t, t2, ts)
VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
"""
# Bind the values directly
await cur.execute(
query,
(
data["s"], # symbol
data["o"], # open
data["c"], # close
data["h"], # high
data["l"], # low
data["v"], # volume_base
data["a"], # volume_quote
data["i"], # interval
data["t"], # start_time
data["t2"], # end_time
data["ts"], # event_time
),
)
await conn.commit()
log.debug(f"Stored data for {data['s']} in MySQL.")
except aiomysql.Error as e:
log.error(f"MySQL error: {e}")
async def store_batch(data):
"""
Store a message into Manticore
:param data: list
"""
if not data:
return
# 10000: maximum inserts we can submit to
# Manticore as of Sept 2022
split_posts = array_split(data, ceil(len(data) / 10000))
for messages in split_posts:
total = []
indexmap = {}
for msg in messages:
if msg["type"] in TYPES_MAIN: if msg["type"] in TYPES_MAIN:
# index = "main" index = "main"
index = MAIN_SRC_MAP[msg["src"]] schema = mc_s.schema_main
# schema = mc_s.schema_main
elif msg["type"] in TYPES_META: elif msg["type"] in TYPES_META:
index = "meta" index = "meta"
# schema = mc_s.schema_meta schema = mc_s.schema_meta
elif msg["type"] in TYPES_INT: elif msg["type"] in TYPES_INT:
index = "internal" index = "internal"
# schema = mc_s.schema_int schema = mc_s.schema_int
# normalise fields
for key, value in list(msg.items()):
if value is None:
del msg[key]
if key in schema:
if isinstance(value, int):
if schema[key].startswith("string") or schema[key].startswith(
"text"
):
msg[key] = str(value)
KAFKA_TOPIC = index body = {"insert": {"index": index, "doc": msg}}
total.append(body)
# if key in schema:
# if isinstance(value, int):
# if schema[key].startswith("string") or schema[key].startswith(
# "text"
# ):
# msg[key] = str(value)
body = orjson.dumps(msg)
if "ts" not in msg: if "ts" not in msg:
raise Exception("No TS in msg") raise Exception("No TS in msg")
if KAFKA_TOPIC not in topicmap: if index not in indexmap:
topicmap[KAFKA_TOPIC] = [body] indexmap[index] = [msg]
else: else:
topicmap[KAFKA_TOPIC].append(body) indexmap[index].append(msg)
# END MSG IN MESSAGES
for topic, messages in topicmap.items(): # Pack the indexmap with msgpack and publish it to Neptune
batch = producer.create_batch() packed_index = msgpack.packb(indexmap, use_bin_type=True)
for body in messages: completed_publish = False
metadata = batch.append(key=None, value=body, timestamp=msg["ts"]) for i in range(10):
if metadata is None: if completed_publish:
partitions = await producer.partitions_for(topic) break
partition = random.choice(tuple(partitions)) try:
await producer.send_batch(batch, topic, partition=partition) await pr.publish(MESSAGE_KEY, packed_index)
# log.debug( completed_publish = True
# ( except aioredis.exceptions.ConnectionError as e:
# f"{batch.record_count()} messages sent to topic " raise e
# f"{topic} partition {partition}" await asyncio.sleep(0.1)
# ) if not completed_publish:
# ) log.error("Failed to publish to Neptune")
batch = producer.create_batch()
continue
partitions = await producer.partitions_for(topic) body_post = ""
partition = random.choice(tuple(partitions)) for item in total:
await producer.send_batch(batch, topic, partition=partition) # print("ITEM", item)
# log.debug( body_post += orjson.dumps(item).decode("utf-8")
# ( body_post += "\n"
# f"{batch.record_count()} messages sent to topic "
# f"{topic} partition {partition}" # print("BODY POST INDEX", index, body_post)
# )
# ) try:
log.debug( # Bulk index operations
"Kafka batches sent: " api_response = api_instance.bulk(body_post) # , async_req=True
+ ", ".join([topic + ": " + str(len(topicmap[topic])) for topic in topicmap]) except ApiException as e:
log.error("Exception when calling IndexApi->bulk: %s\n" % e)
log.error("body_post attempted to send", body_post)
log.info(f"Completed ingest to MC of length {len(total)}")
# END MESSAGES IN SPLIT
def update_schema():
pass
def create_index(api_client):
util_instance = manticoresearch.UtilsApi(api_client)
schemas = {
"main": mc_s.schema_main,
"rule_storage": mc_s.schema_rule_storage,
"meta": mc_s.schema_meta,
"internal": mc_s.schema_int,
}
for name, schema in schemas.items():
schema_types = ", ".join([f"{k} {v}" for k, v in schema.items()])
create_query = (
f"create table if not exists {name}({schema_types}) engine='columnar'"
) )
await producer.stop() print("Schema types", create_query)
util_instance.sql(create_query)
async def queue_message(msg): async def queue_message(msg):
""" """
Queue a message on the Redis buffer. Queue a message on the Redis buffer.
""" """
# TODO: msgpack
message = orjson.dumps(msg) message = orjson.dumps(msg)
await ar.sadd(KEYNAME, message) await ar.lpush(KEYNAME, message)
async def queue_message_bulk(data): async def queue_message_bulk(data):
@@ -122,5 +236,6 @@ async def queue_message_bulk(data):
Queue multiple messages on the Redis buffer. Queue multiple messages on the Redis buffer.
""" """
for msg in data: for msg in data:
# TODO: msgpack
message = orjson.dumps(msg) message = orjson.dumps(msg)
await ar.sadd(KEYNAME, message) await ar.lpush(KEYNAME, message)

174
db_old_ref.py Normal file
View File

@@ -0,0 +1,174 @@
import asyncio
from os import getenv
import aioredis
import msgpack
import orjson
import redis
# Elasticsearch
from elasticsearch import AsyncElasticsearch
import util
trues = ("true", "1", "t", True)
# INDEX = "msg"
log = util.get_logger("db")
# Redis (legacy)
# r = redis.from_url("redis://ssdb:1289", db=0)
# AIORedis
ar = aioredis.from_url("redis://ssdb:1289", db=0)
# Neptune redis for PubSub
pr = aioredis.from_url("redis://redis_neptune:6379", db=10)
TYPES_MAIN = [
"msg",
"notice",
"action",
"part",
"join",
"kick",
"quit",
"nick",
"mode",
"topic",
"update",
]
MAIN_SRC_MAP = {
"dis": "main",
"irc": "restricted",
"4ch": "main",
}
TYPES_META = ["who"]
TYPES_INT = ["conn", "highlight", "znc", "query", "self"]
KEYNAME = "queue"
MESSAGE_KEY = "messages"
ELASTICSEARCH_USERNAME = getenv("ELASTICSEARCH_USERNAME", "elastic")
ELASTICSEARCH_PASSWORD = getenv("ELASTICSEARCH_PASSWORD", "changeme")
ELASTICSEARCH_HOST = getenv("ELASTICSEARCH_HOST", "localhost")
ELASTICSEARCH_TLS = getenv("ELASTICSEARCH_TLS", "false") in trues
client = None
# These are sometimes numeric, sometimes strings.
# If they are seen to be numeric first, ES will erroneously
# index them as "long" and then subsequently fail to index messages
# with strings in the field.
keyword_fields = ["nick_id", "user_id", "net_id"]
mapping_int = {
"mappings": {
"properties": {
"ts": {"type": "date", "format": "epoch_second"},
"file_tim": {"type": "date", "format": "epoch_millis"},
}
}
}
mapping = dict(mapping_int)
for field in keyword_fields:
mapping["mappings"]["properties"][field] = {"type": "text"}
del mapping_int["mappings"]["properties"]["file_tim"]
async def initialise_elasticsearch():
"""
Initialise the Elasticsearch client.
"""
auth = (ELASTICSEARCH_USERNAME, ELASTICSEARCH_PASSWORD)
client = AsyncElasticsearch(ELASTICSEARCH_HOST, http_auth=auth, verify_certs=False)
for index in ("main", "meta", "restricted", "internal"):
if index == "internal":
map_dict = mapping_int
else:
map_dict = mapping
if await client.indices.exists(index=index):
# update index with mapping
await client.indices.put_mapping(
index=index, properties=map_dict["mappings"]["properties"]
)
else:
await client.indices.create(index=index, mappings=map_dict["mappings"])
return client
async def store_batch(data):
global client
if not client:
client = await initialise_elasticsearch()
indexmap = {}
for msg in data:
if msg["type"] in TYPES_MAIN:
# index = "main"
index = MAIN_SRC_MAP[msg["src"]]
# schema = mc_s.schema_main
elif msg["type"] in TYPES_META:
index = "meta"
# schema = mc_s.schema_meta
elif msg["type"] in TYPES_INT:
index = "internal"
# schema = mc_s.schema_int
INDEX = index
# if key in schema:
# if isinstance(value, int):
# if schema[key].startswith("string") or schema[key].startswith(
# "text"
# ):
# msg[key] = str(value)
# body = orjson.dumps(msg)
if "ts" not in msg:
raise Exception("No TS in msg")
if INDEX not in indexmap:
indexmap[INDEX] = [msg]
else:
indexmap[INDEX].append(msg)
# Pack the indexmap with msgpack and publish it to Neptune
packed_index = msgpack.packb(indexmap, use_bin_type=True)
completed_publish = False
for i in range(10):
if completed_publish:
break
try:
await pr.publish(MESSAGE_KEY, packed_index)
completed_publish = True
except aioredis.exceptions.ConnectionError:
await asyncio.sleep(0.1)
if not completed_publish:
log.error("Failed to publish to Neptune")
for index, index_messages in indexmap.items():
for message in index_messages:
result = await client.index(index=index, body=message)
if not result["result"] == "created":
log.error(f"Indexing failed: {result}")
log.debug(f"Indexed {len(data)} messages in ES")
async def queue_message(msg):
"""
Queue a message on the Redis buffer.
"""
# TODO: msgpack
message = orjson.dumps(msg)
await ar.lpush(KEYNAME, message)
async def queue_message_bulk(data):
"""
Queue multiple messages on the Redis buffer.
"""
for msg in data:
# TODO: msgpack
message = orjson.dumps(msg)
await ar.lpush(KEYNAME, message)

206
docker-compose.prod.yml Normal file
View File

@@ -0,0 +1,206 @@
version: "2.2"
services:
rts:
image: xf/monolith:latest
container_name: rts_monolith
command: sh -c '. /venv/bin/activate && exec python rts.py'
build: .
volumes:
- ${PORTAINER_GIT_DIR}:/code
- type: bind
source: /code/run
target: /var/run
environment:
PORTAINER_GIT_DIR: "${PORTAINER_GIT_DIR}"
MODULES_ENABLED: "${MODULES_ENABLED}"
MONOLITH_RTS_MEXC_API_ACCESS_KEY: "${MONOLITH_RTS_MEXC_API_ACCESS_KEY}"
MONOLITH_RTS_MEXC_API_SECRET_KEY: "${MONOLITH_RTS_MEXC_API_SECRET_KEY}"
deploy:
resources:
limits:
cpus: '0.5'
memory: 1.0G
network_mode: host
app:
image: xf/monolith:latest
container_name: monolith
#command: sh -c '. /venv/bin/activate && exec python -m cProfile -o /tmp/profile.out monolith.py'
build: .
volumes:
- ${PORTAINER_GIT_DIR}:/code
- type: bind
source: /code/run
target: /var/run
environment:
PORTAINER_GIT_DIR: "${PORTAINER_GIT_DIR}"
MODULES_ENABLED: "${MODULES_ENABLED}"
DISCORD_TOKEN: "${DISCORD_TOKEN}"
THRESHOLD_LISTENER_HOST: "${THRESHOLD_LISTENER_HOST}"
THRESHOLD_LISTENER_PORT: "${THRESHOLD_LISTENER_PORT}"
THRESHOLD_LISTENER_SSL: "${THRESHOLD_LISTENER_SSL}"
THRESHOLD_RELAY_ENABLED: "${THRESHOLD_RELAY_ENABLED}"
THRESHOLD_RELAY_HOST: "${THRESHOLD_RELAY_HOST}"
THRESHOLD_RELAY_PORT: "${THRESHOLD_RELAY_PORT}"
THRESHOLD_RELAY_SSL: "${THRESHOLD_RELAY_SSL}"
THRESHOLD_API_ENABLED: "${THRESHOLD_API_ENABLED}"
THRESHOLD_API_HOST: "${THRESHOLD_API_HOST}"
THRESHOLD_API_PORT: "${THRESHOLD_API_PORT}"
THRESHOLD_CONFIG_DIR: "${THRESHOLD_CONFIG_DIR}"
#THRESHOLD_TEMPLATE_DIR: "${#THRESHOLD_TEMPLATE_DIR}"
THRESHOLD_CERT_DIR: "${THRESHOLD_CERT_DIR}"
# How many messages to ingest at once from Redis
MONOLITH_INGEST_CHUNK_SIZE: "${MONOLITH_INGEST_CHUNK_SIZE}"
# Time to wait between polling Redis again
MONOLITH_INGEST_ITER_DELAY: "${MONOLITH_INGEST_ITER_DELAY}"
# Number of 4chan threads to request at once
MONOLITH_CH4_THREADS_CONCURRENT: "${MONOLITH_CH4_THREADS_CONCURRENT}"
# Time to wait between every MONOLITH_CH4_THREADS_CONCURRENT threads
MONOLITH_CH4_THREADS_DELAY: "${MONOLITH_CH4_THREADS_DELAY}"
# Time to wait after finishing a crawl before starting again
MONOLITH_CH4_CRAWL_DELAY: "${MONOLITH_CH4_CRAWL_DELAY}"
# Semaphore value
MONOLITH_CH4_THREADS_SEMAPHORE: "${MONOLITH_CH4_THREADS_SEMAPHORE}"
# Threads to use for data processing
# Leave uncommented to use all available threads
MONOLITH_PROCESS_THREADS: "${MONOLITH_PROCESS_THREADS}"
# Enable performance metrics after message processing
MONOLITH_PROCESS_PERFSTATS: "${MONOLITH_PROCESS_PERFSTATS}"
MONOLITH_PROCESS_TARGET_CPU_USAGE: "${MONOLITH_PROCESS_TARGET_CPU_USAGE}"
MONOLITH_CH4_TARGET_CPU_USAGE: "${MONOLITH_CH4_TARGET_CPU_USAGE}"
MONOLITH_CH4_BOARDS: "${MONOLITH_CH4_BOARDS}"
REDIS_PASSWORD: "${REDIS_PASSWORD}"
MONOLITH_INGEST_INCREASE_BELOW: "${MONOLITH_INGEST_INCREASE_BELOW}"
MONOLITH_INGEST_INCREASE_BY: "${MONOLITH_INGEST_INCREASE_BY}"
MONOLITH_INGEST_DECREASE_ABOVE: "${MONOLITH_INGEST_DECREASE_ABOVE}"
MONOLITH_INGEST_DECREASE_BY: "${MONOLITH_INGEST_DECREASE_BY}"
MONOLITH_INGEST_MAX: "${MONOLITH_INGEST_MAX}"
MONOLITH_INGEST_MIN: "${MONOLITH_INGEST_MIN}"
deploy:
resources:
limits:
cpus: '0.5'
memory: 1.0G
network_mode: host
threshold:
image: xf/threshold:latest
container_name: threshold
build: legacy/docker
volumes:
- ${PORTAINER_GIT_DIR}:/code
- ${THRESHOLD_CONFIG_DIR}:/code/legacy/conf/live
#- ${THRESHOLD_TEMPLATE_DIR}:/code/conf/templates
- ${THRESHOLD_CERT_DIR}:/code/legacy/conf/cert
volumes_from:
- tmp
ports:
- "${THRESHOLD_LISTENER_PORT}:${THRESHOLD_LISTENER_PORT}"
- "${THRESHOLD_RELAY_PORT}:${THRESHOLD_RELAY_PORT}"
- "${THRESHOLD_API_PORT}:${THRESHOLD_API_PORT}"
environment:
PORTAINER_GIT_DIR: "${PORTAINER_GIT_DIR}"
MODULES_ENABLED: "${MODULES_ENABLED}"
DISCORD_TOKEN: "${DISCORD_TOKEN}"
THRESHOLD_LISTENER_HOST: "${THRESHOLD_LISTENER_HOST}"
THRESHOLD_LISTENER_PORT: "${THRESHOLD_LISTENER_PORT}"
THRESHOLD_LISTENER_SSL: "${THRESHOLD_LISTENER_SSL}"
THRESHOLD_RELAY_ENABLED: "${THRESHOLD_RELAY_ENABLED}"
THRESHOLD_RELAY_HOST: "${THRESHOLD_RELAY_HOST}"
THRESHOLD_RELAY_PORT: "${THRESHOLD_RELAY_PORT}"
THRESHOLD_RELAY_SSL: "${THRESHOLD_RELAY_SSL}"
THRESHOLD_API_ENABLED: "${THRESHOLD_API_ENABLED}"
THRESHOLD_API_HOST: "${THRESHOLD_API_HOST}"
THRESHOLD_API_PORT: "${THRESHOLD_API_PORT}"
THRESHOLD_CONFIG_DIR: "${THRESHOLD_CONFIG_DIR}"
#THRESHOLD_TEMPLATE_DIR: "${#THRESHOLD_TEMPLATE_DIR}"
THRESHOLD_CERT_DIR: "${THRESHOLD_CERT_DIR}"
# How many messages to ingest at once from Redis
MONOLITH_INGEST_CHUNK_SIZE: "${MONOLITH_INGEST_CHUNK_SIZE}"
# Time to wait between polling Redis again
MONOLITH_INGEST_ITER_DELAY: "${MONOLITH_INGEST_ITER_DELAY}"
# Number of 4chan threads to request at once
MONOLITH_CH4_THREADS_CONCURRENT: "${MONOLITH_CH4_THREADS_CONCURRENT}"
# Time to wait between every MONOLITH_CH4_THREADS_CONCURRENT threads
MONOLITH_CH4_THREADS_DELAY: "${MONOLITH_CH4_THREADS_DELAY}"
# Time to wait after finishing a crawl before starting again
MONOLITH_CH4_CRAWL_DELAY: "${MONOLITH_CH4_CRAWL_DELAY}"
# Semaphore value
MONOLITH_CH4_THREADS_SEMAPHORE: "${MONOLITH_CH4_THREADS_SEMAPHORE}"
# Threads to use for data processing
# Leave uncommented to use all available threads
MONOLITH_PROCESS_THREADS: "${MONOLITH_PROCESS_THREADS}"
# Enable performance metrics after message processing
MONOLITH_PROCESS_PERFSTATS: "${MONOLITH_PROCESS_PERFSTATS}"
MONOLITH_CH4_BOARDS: "${MONOLITH_CH4_BOARDS}"
REDIS_PASSWORD: "${REDIS_PASSWORD}"
# for development
extra_hosts:
- "host.docker.internal:host-gateway"
network_mode: host
ssdb:
image: tsl0922/ssdb
container_name: ssdb_monolith
ports:
- "1289:1289"
environment:
- SSDB_PORT=1289
volumes:
- monolith_ssdb_data:/var/lib/ssdb
# networks:
# - default
# - db
deploy:
resources:
limits:
cpus: '0.5'
memory: 1.0G
network_mode: host
redis:
image: redis
container_name: redis_monolith
command: redis-server /etc/redis.conf
ulimits:
nproc: 65535
nofile:
soft: 65535
hard: 65535
volumes:
- ${PORTAINER_GIT_DIR}/docker/redis.conf:/etc/redis.conf
- monolith_redis_data:/data
- type: bind
source: /code/run
target: /var/run
# volumes_from:
# - tmp
healthcheck:
test: "redis-cli ping"
interval: 2s
timeout: 2s
retries: 15
# networks:
# - default
# - xf
# - db
deploy:
resources:
limits:
cpus: '0.5'
memory: 1.0G
network_mode: host
# networks:
# default:
# driver: bridge
# xf:
# external: true
# db:
# external: true
volumes:
monolith_redis_data:
monolith_ssdb_data:

View File

@@ -1,351 +0,0 @@
version: "2.2"
x-superset-image: &superset-image apache/superset:${TAG:-latest-dev}
x-superset-depends-on: &superset-depends-on
- db
- redis_superset
x-superset-volumes: &superset-volumes
# /app/pythonpath_docker will be appended to the PYTHONPATH in the final container
- ./docker/superset:/app/docker
- superset_home:/app/superset_home
services:
app:
image: pathogen/monolith:latest
container_name: monolith
build: ./docker
volumes:
- ${PORTAINER_GIT_DIR}:/code
env_file:
- .env
volumes_from:
- tmp
depends_on:
broker:
condition: service_started
kafka:
condition: service_healthy
tmp:
condition: service_started
redis:
condition: service_healthy
# - db
threshold:
image: pathogen/threshold:latest
container_name: threshold
build: ./legacy/docker
volumes:
- ${PORTAINER_GIT_DIR}:/code
- ${THRESHOLD_CONFIG_DIR}:/code/legacy/conf/live
#- ${THRESHOLD_TEMPLATE_DIR}:/code/conf/templates
- ${THRESHOLD_CERT_DIR}:/code/legacy/conf/cert
ports:
- "${THRESHOLD_LISTENER_PORT}:${THRESHOLD_LISTENER_PORT}"
- "${THRESHOLD_RELAY_PORT}:${THRESHOLD_RELAY_PORT}"
- "${THRESHOLD_API_PORT}:${THRESHOLD_API_PORT}"
env_file:
- .env
# for development
extra_hosts:
- "host.docker.internal:host-gateway"
volumes_from:
- tmp
depends_on:
tmp:
condition: service_started
redis:
condition: service_healthy
# db:
#image: pathogen/manticore:kibana
# image: manticoresearch/manticore:latest
#build:
# context: ./docker/manticore
# args:
# DEV: 1
# restart: always
# turnilo:
# container_name: turnilo
# image: uchhatre/turnilo:latest
# ports:
# - 9093:9090
# environment:
# - DRUID_BROKER_URL=http://broker:8082
# - CONFIG_FILE=/config.yaml
# volumes:
# - ${PORTAINER_GIT_DIR}/docker/turnilo.yaml:/config.yaml
# depends_on:
# - broker
# metabase:
# container_name: metabase
# image: metabase/metabase:latest
# ports:
# - 3096:3000
# environment:
# JAVA_OPTS: -Xmx1g
# MB_DB_TYPE: postgres
# MB_DB_DBNAME: metabase
# MB_DB_PORT: 5432
# MB_DB_USER: druid
# MB_DB_PASS: FoolishPassword
# MB_DB_HOST: postgres
# depends_on:
# - broker
redis_superset:
image: redis:latest
container_name: superset_cache
restart: unless-stopped
volumes:
- redis:/data
db:
env_file: docker/.env-non-dev
image: postgres:10
container_name: superset_db
restart: unless-stopped
volumes:
- db_home:/var/lib/postgresql/data
superset:
env_file: docker/.env-non-dev
image: *superset-image
container_name: superset_app
command: ["/app/docker/docker-bootstrap.sh", "app-gunicorn"]
user: "root"
restart: unless-stopped
ports:
- 8088:8088
depends_on: *superset-depends-on
volumes: *superset-volumes
superset-init:
image: *superset-image
container_name: superset_init
command: ["/app/docker/docker-init.sh"]
env_file: docker/.env-non-dev
depends_on: *superset-depends-on
user: "root"
volumes: *superset-volumes
superset-worker:
image: *superset-image
container_name: superset_worker
command: ["/app/docker/docker-bootstrap.sh", "worker"]
env_file: docker/.env-non-dev
restart: unless-stopped
depends_on: *superset-depends-on
user: "root"
volumes: *superset-volumes
superset-worker-beat:
image: *superset-image
container_name: superset_worker_beat
command: ["/app/docker/docker-bootstrap.sh", "beat"]
env_file: docker/.env-non-dev
restart: unless-stopped
depends_on: *superset-depends-on
user: "root"
volumes: *superset-volumes
postgres:
container_name: postgres
image: postgres:latest
volumes:
- metadata_data:/var/lib/postgresql/data
environment:
POSTGRES_PASSWORD: FoolishPassword
POSTGRES_USER: druid
POSTGRES_DB: druid
# Need 3.5 or later for container nodes
zookeeper:
container_name: zookeeper
image: zookeeper:3.5
ports:
- "2181:2181"
environment:
- ZOO_MY_ID=1
kafka:
image: wurstmeister/kafka:latest
container_name: kafka
depends_on:
- zookeeper
ports:
- 9092:9092
- 29092:29092
environment:
KAFKA_BROKER_ID: 1
KAFKA_ZOOKEEPER_CONNECT: zookeeper:2181
KAFKA_ADVERTISED_LISTENERS: PLAINTEXT://kafka:9092,PLAINTEXT_HOST://localhost:29092
KAFKA_LISTENERS: PLAINTEXT://kafka:9092,PLAINTEXT_HOST://localhost:29092
KAFKA_LISTENER_SECURITY_PROTOCOL_MAP: PLAINTEXT:PLAINTEXT,PLAINTEXT_HOST:PLAINTEXT
KAFKA_INTER_BROKER_LISTENER_NAME: PLAINTEXT
KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR: 1
KAFKA_AUTO_CREATE_TOPICS_ENABLE: 'true'
KAFKA_MESSAGE_MAX_BYTES: 2000000
#KAFKA_HEAP_OPTS: -Xmx2g
healthcheck:
test: ["CMD", "kafka-topics.sh", "--list", "--bootstrap-server", "kafka:9092"]
start_period: 15s
interval: 30s
timeout: 30s
retries: 45
coordinator:
image: apache/druid:0.23.0
container_name: coordinator
volumes:
- druid_shared:/opt/shared
- coordinator_var:/opt/druid/var
depends_on:
- zookeeper
- postgres
ports:
- "8081:8081"
command:
- coordinator
env_file:
- environment
broker:
image: apache/druid:0.23.0
container_name: broker
volumes:
- broker_var:/opt/druid/var
depends_on:
- zookeeper
- postgres
- coordinator
ports:
- "8082:8082"
command:
- broker
env_file:
- environment
historical:
image: apache/druid:0.23.0
container_name: historical
volumes:
- druid_shared:/opt/shared
- historical_var:/opt/druid/var
depends_on:
- zookeeper
- postgres
- coordinator
ports:
- "8083:8083"
command:
- historical
env_file:
- environment
middlemanager:
image: apache/druid:0.23.0
container_name: middlemanager
volumes:
- druid_shared:/opt/shared
- middle_var:/opt/druid/var
depends_on:
- zookeeper
- postgres
- coordinator
ports:
- "8091:8091"
- "8100-8105:8100-8105"
command:
- middleManager
env_file:
- environment
router:
image: apache/druid:0.23.0
container_name: router
volumes:
- router_var:/opt/druid/var
depends_on:
- zookeeper
- postgres
- coordinator
ports:
- "8888:8888"
command:
- router
env_file:
- environment
# db:
# #image: pathogen/manticore:kibana
# image: manticoresearch/manticore:dev
# #build:
# # context: ./docker/manticore
# # args:
# # DEV: 1
# restart: always
# ports:
# - 9308
# - 9312
# - 9306
# ulimits:
# nproc: 65535
# nofile:
# soft: 65535
# hard: 65535
# memlock:
# soft: -1
# hard: -1
# environment:
# - MCL=1
# volumes:
# - ./docker/data:/var/lib/manticore
# - ./docker/manticore.conf:/etc/manticoresearch/manticore.conf
tmp:
image: busybox
command: chmod -R 777 /var/run/redis
volumes:
- /var/run/redis
redis:
image: redis
command: redis-server /etc/redis.conf
ulimits:
nproc: 65535
nofile:
soft: 65535
hard: 65535
volumes:
- ${PORTAINER_GIT_DIR}/docker/redis.conf:/etc/redis.conf
- redis_data:/data
volumes_from:
- tmp
healthcheck:
test: "redis-cli -s /var/run/redis/redis.sock ping"
interval: 2s
timeout: 2s
retries: 15
networks:
default:
external:
name: pathogen
volumes:
superset_home:
external: false
db_home:
external: false
redis_superset:
external: false
redis_data: {}
metadata_data: {}
middle_var: {}
historical_var: {}
broker_var: {}
coordinator_var: {}
router_var: {}
druid_shared: {}

View File

@@ -1,46 +0,0 @@
#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
COMPOSE_PROJECT_NAME=superset
# database configurations (do not modify)
DATABASE_DB=superset
DATABASE_HOST=db
DATABASE_PASSWORD=superset
DATABASE_USER=superset
# database engine specific environment variables
# change the below if you prefers another database engine
DATABASE_PORT=5432
DATABASE_DIALECT=postgresql
POSTGRES_DB=superset
POSTGRES_USER=superset
POSTGRES_PASSWORD=superset
#MYSQL_DATABASE=superset
#MYSQL_USER=superset
#MYSQL_PASSWORD=superset
#MYSQL_RANDOM_ROOT_PASSWORD=yes
# Add the mapped in /app/pythonpath_docker which allows devs to override stuff
PYTHONPATH=/app/pythonpath:/app/docker/pythonpath_dev
REDIS_HOST=redis
REDIS_PORT=6379
FLASK_ENV=production
SUPERSET_ENV=production
SUPERSET_LOAD_EXAMPLES=yes
CYPRESS_CONFIG=false
SUPERSET_PORT=8088

View File

@@ -1,348 +0,0 @@
version: "2.2"
# volumes:
# metadata_data: {}
# middle_var: {}
# historical_var: {}
# broker_var: {}
# coordinator_var: {}
# router_var: {}
# druid_shared: {}
x-superset-image: &superset-image apache/superset:${TAG:-latest-dev}
x-superset-depends-on: &superset-depends-on
- db
- redis_superset
x-superset-volumes: &superset-volumes
# /app/pythonpath_docker will be appended to the PYTHONPATH in the final container
- ./docker/superset:/app/docker
- superset_home:/app/superset_home
services:
app:
image: pathogen/monolith:latest
container_name: monolith
build: ./docker
volumes:
- ${PORTAINER_GIT_DIR}:/code
env_file:
- ../stack.env
volumes_from:
- tmp
depends_on:
broker:
condition: service_started
kafka:
condition: service_healthy
tmp:
condition: service_started
redis:
condition: service_healthy
# - db
threshold:
image: pathogen/threshold:latest
container_name: threshold
build: ./legacy/docker
volumes:
- ${PORTAINER_GIT_DIR}:/code
- ${THRESHOLD_CONFIG_DIR}:/code/legacy/conf/live
#- ${THRESHOLD_TEMPLATE_DIR}:/code/conf/templates
- ${THRESHOLD_CERT_DIR}:/code/legacy/conf/cert
ports:
- "${THRESHOLD_LISTENER_PORT}:${THRESHOLD_LISTENER_PORT}"
- "${THRESHOLD_RELAY_PORT}:${THRESHOLD_RELAY_PORT}"
- "${THRESHOLD_API_PORT}:${THRESHOLD_API_PORT}"
env_file:
- ../stack.env
volumes_from:
- tmp
depends_on:
tmp:
condition: service_started
redis:
condition: service_healthy
# db:
#image: pathogen/manticore:kibana
# image: manticoresearch/manticore:latest
#build:
# context: ./docker/manticore
# args:
# DEV: 1
# restart: always
# turnilo:
# container_name: turnilo
# image: uchhatre/turnilo:latest
# ports:
# - 9093:9090
# environment:
# - DRUID_BROKER_URL=http://broker:8082
# depends_on:
# - broker
# metabase:
# container_name: metabase
# image: metabase/metabase:latest
# ports:
# - 3096:3000
# environment:
# JAVA_OPTS: -Xmx1g
# MB_DB_TYPE: postgres
# MB_DB_DBNAME: metabase
# MB_DB_PORT: 5432
# MB_DB_USER: druid
# MB_DB_PASS: FoolishPassword
# MB_DB_HOST: postgres
# depends_on:
# - broker
redis_superset:
image: redis:latest
container_name: superset_cache
restart: unless-stopped
volumes:
- redis:/data
db:
env_file: .env-non-dev
image: postgres:10
container_name: superset_db
restart: unless-stopped
volumes:
- db_home:/var/lib/postgresql/data
superset:
env_file: .env-non-dev
image: *superset-image
container_name: superset_app
command: ["/app/docker/docker-bootstrap.sh", "app-gunicorn"]
user: "root"
restart: unless-stopped
ports:
- 8088:8088
depends_on: *superset-depends-on
volumes: *superset-volumes
superset-init:
image: *superset-image
container_name: superset_init
command: ["/app/docker/docker-init.sh"]
env_file: .env-non-dev
depends_on: *superset-depends-on
user: "root"
volumes: *superset-volumes
superset-worker:
image: *superset-image
container_name: superset_worker
command: ["/app/docker/docker-bootstrap.sh", "worker"]
env_file: .env-non-dev
restart: unless-stopped
depends_on: *superset-depends-on
user: "root"
volumes: *superset-volumes
superset-worker-beat:
image: *superset-image
container_name: superset_worker_beat
command: ["/app/docker/docker-bootstrap.sh", "beat"]
env_file: .env-non-dev
restart: unless-stopped
depends_on: *superset-depends-on
user: "root"
volumes: *superset-volumes
postgres:
container_name: postgres
image: postgres:latest
volumes:
- /block/store/metadata_data:/var/lib/postgresql/data
environment:
- POSTGRES_PASSWORD=FoolishPassword
- POSTGRES_USER=druid
- POSTGRES_DB=druid
# Need 3.5 or later for container nodes
zookeeper:
container_name: zookeeper
image: zookeeper:3.5
ports:
- "2181:2181"
environment:
- ZOO_MY_ID=1
kafka:
image: wurstmeister/kafka:latest
container_name: kafka
depends_on:
- zookeeper
ports:
- 9092:9092
- 29092:29092
environment:
KAFKA_BROKER_ID: 1
KAFKA_ZOOKEEPER_CONNECT: zookeeper:2181
KAFKA_ADVERTISED_LISTENERS: PLAINTEXT://kafka:9092,PLAINTEXT_HOST://localhost:29092
KAFKA_LISTENERS: PLAINTEXT://kafka:9092,PLAINTEXT_HOST://localhost:29092
KAFKA_LISTENER_SECURITY_PROTOCOL_MAP: PLAINTEXT:PLAINTEXT,PLAINTEXT_HOST:PLAINTEXT
KAFKA_INTER_BROKER_LISTENER_NAME: PLAINTEXT
KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR: 1
KAFKA_AUTO_CREATE_TOPICS_ENABLE: 'true'
KAFKA_MESSAGE_MAX_BYTES: 2000000
#KAFKA_HEAP_OPTS: -Xmx2g
healthcheck:
test: ["CMD", "kafka-topics.sh", "--list", "--bootstrap-server", "kafka:9092"]
start_period: 15s
interval: 30s
timeout: 30s
retries: 45
coordinator:
image: apache/druid:0.23.0
container_name: coordinator
volumes:
- /block/store/druid_shared:/opt/shared
- /block/store/coordinator_var:/opt/druid/var
depends_on:
- zookeeper
- postgres
ports:
- "8081:8081"
command:
- coordinator
env_file:
- environment
broker:
image: apache/druid:0.23.0
container_name: broker
volumes:
- /block/store/broker_var:/opt/druid/var
depends_on:
- zookeeper
- postgres
- coordinator
ports:
- "8082:8082"
command:
- broker
env_file:
- environment
historical:
image: apache/druid:0.23.0
container_name: historical
volumes:
- /block/store/druid_shared:/opt/shared
- /block/store/historical_var:/opt/druid/var
depends_on:
- zookeeper
- postgres
- coordinator
ports:
- "8083:8083"
command:
- historical
env_file:
- environment
middlemanager:
image: apache/druid:0.23.0
container_name: middlemanager
volumes:
- /block/store/druid_shared:/opt/shared
- /block/store/middle_var:/opt/druid/var
depends_on:
- zookeeper
- postgres
- coordinator
ports:
- "8091:8091"
- "8100-8105:8100-8105"
command:
- middleManager
env_file:
- environment
router:
image: apache/druid:0.23.0
container_name: router
volumes:
- /block/store/router_var:/opt/druid/var
depends_on:
- zookeeper
- postgres
- coordinator
ports:
- "8888:8888"
command:
- router
env_file:
- environment
# db:
# #image: pathogen/manticore:kibana
# image: manticoresearch/manticore:dev
# #build:
# # context: ./docker/manticore
# # args:
# # DEV: 1
# restart: always
# ports:
# - 9308
# - 9312
# - 9306
# ulimits:
# nproc: 65535
# nofile:
# soft: 65535
# hard: 65535
# memlock:
# soft: -1
# hard: -1
# environment:
# - MCL=1
# volumes:
# - ./docker/data:/var/lib/manticore
# - ./docker/manticore.conf:/etc/manticoresearch/manticore.conf
tmp:
image: busybox
command: chmod -R 777 /var/run/redis
volumes:
- /var/run/redis
redis:
image: redis
command: redis-server /etc/redis.conf
ulimits:
nproc: 65535
nofile:
soft: 65535
hard: 65535
volumes:
- ${PORTAINER_GIT_DIR}/docker/redis.conf:/etc/redis.conf
- redis_data:/data
volumes_from:
- tmp
healthcheck:
test: "redis-cli -s /var/run/redis/redis.sock ping"
interval: 2s
timeout: 2s
retries: 15
networks:
default:
external:
name: pathogen
volumes:
redis_data: {}
superset_home:
external: false
db_home:
external: false
redis:
external: false

View File

@@ -1,87 +0,0 @@
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
#
# Java tuning
#DRUID_XMX=1g
#DRUID_XMS=1g
#DRUID_MAXNEWSIZE=250m
#DRUID_NEWSIZE=250m
#DRUID_MAXDIRECTMEMORYSIZE=1g
#druid_emitter_logging_logLevel=debug
#druid_extensions_loadList=["druid-histogram", "druid-datasketches", "druid-lookups-cached-global", "postgresql-metadata-storage", "druid-kafka-indexing-service"]
#druid_zk_service_host=zookeeper
#druid_metadata_storage_host=
#druid_metadata_storage_type=postgresql
#druid_metadata_storage_connector_connectURI=jdbc:postgresql://postgres:5432/druid
#druid_metadata_storage_connector_user=druid
#druid_metadata_storage_connector_password=FoolishPassword
#druid_coordinator_balancer_strategy=cachingCost
#druid_indexer_runner_javaOptsArray=["-server", "-Xmx1g", "-Xms1g", "-XX:MaxDirectMemorySize=3g", "-Duser.timezone=UTC", "-Dfile.encoding=UTF-8", "-Djava.util.logging.manager=org.apache.logging.log4j.jul.LogManager"]
#druid_indexer_fork_property_druid_processing_buffer_sizeBytes=128MiB
#druid_processing_buffer_sizeBytes=268435456 # 256MiB
#druid_storage_type=local
#druid_storage_storageDirectory=/opt/shared/segments
#druid_indexer_logs_type=file
#druid_indexer_logs_directory=/opt/shared/indexing-logs
#druid_processing_numThreads=1
#druid_processing_numMergeBuffers=1
#DRUID_LOG4J=<?xml version="1.0" encoding="UTF-8" ?><Configuration status="WARN"><Appenders><Console name="Console" target="SYSTEM_OUT"><PatternLayout pattern="%d{ISO8601} %p [%t] %c - %m%n"/></Console></Appenders><Loggers><Root level="info"><AppenderRef ref="Console"/></Root><Logger name="org.apache.druid.jetty.RequestLog" additivity="false" level="DEBUG"><AppenderRef ref="Console"/></Logger></Loggers></Configuration>
# Java tuning
#DRUID_XMX=1g
#DRUID_XMS=1g
#DRUID_MAXNEWSIZE=250m
#DRUID_NEWSIZE=250m
#DRUID_MAXDIRECTMEMORYSIZE=6172m
DRUID_SINGLE_NODE_CONF=nano-quickstart
druid_emitter_logging_logLevel=debug
druid_extensions_loadList=["druid-histogram", "druid-datasketches", "druid-lookups-cached-global", "postgresql-metadata-storage", "druid-kafka-indexing-service"]
druid_zk_service_host=zookeeper
druid_metadata_storage_host=
druid_metadata_storage_type=postgresql
druid_metadata_storage_connector_connectURI=jdbc:postgresql://postgres:5432/druid
druid_metadata_storage_connector_user=druid
druid_metadata_storage_connector_password=FoolishPassword
druid_coordinator_balancer_strategy=cachingCost
druid_indexer_runner_javaOptsArray=["-server", "-Xmx1g", "-Xms1g", "-XX:MaxDirectMemorySize=3g", "-Duser.timezone=UTC", "-Dfile.encoding=UTF-8", "-Djava.util.logging.manager=org.apache.logging.log4j.jul.LogManager"]
druid_indexer_fork_property_druid_processing_buffer_sizeBytes=256MiB
druid_storage_type=local
druid_storage_storageDirectory=/opt/shared/segments
druid_indexer_logs_type=file
druid_indexer_logs_directory=/opt/shared/indexing-logs
druid_processing_numThreads=2
druid_processing_numMergeBuffers=2
DRUID_LOG4J=<?xml version="1.0" encoding="UTF-8" ?><Configuration status="WARN"><Appenders><Console name="Console" target="SYSTEM_OUT"><PatternLayout pattern="%d{ISO8601} %p [%t] %c - %m%n"/></Console></Appenders><Loggers><Root level="info"><AppenderRef ref="Console"/></Root><Logger name="org.apache.druid.jetty.RequestLog" additivity="false" level="DEBUG"><AppenderRef ref="Console"/></Logger></Loggers></Configuration>

View File

@@ -1,265 +0,0 @@
#!/bin/sh
ip=`hostname -i|rev|cut -d\ -f 1|rev`
cat << EOF
searchd {
# https://manual.manticoresearch.com/Server_settings/Searchd#access_plain_attrs
# access_plain_attrs = mmap_preread
# https://manual.manticoresearch.com/Server_settings/Searchd#access_blob_attrs
# access_blob_attrs = mmap_preread
# https://manual.manticoresearch.com/Server_settings/Searchd#access_doclists
# access_doclists = file
# https://manual.manticoresearch.com/Server_settings/Searchd#access_hitlists
# access_hitlists = file
# https://manual.manticoresearch.com/Server_settings/Searchd#agent_connect_timeout
# agent_connect_timeout =
# https://manual.manticoresearch.com/Server_settings/Searchd#agent_query_timeout
# agent_query_timeout =
# https://manual.manticoresearch.com/Server_settings/Searchd#agent_retry_count
# agent_retry_count = 0
# https://manual.manticoresearch.com/Server_settings/Searchd#agent_retry_delay
# agent_retry_delay = 500
# https://manual.manticoresearch.com/Server_settings/Searchd#attr_flush_period
# attr_flush_period = 0
# https://manual.manticoresearch.com/Server_settings/Searchd#binlog_flush
# binlog_flush = 2
# https://manual.manticoresearch.com/Server_settings/Searchd#binlog_max_log_size
# binlog_max_log_size = 268435456
# https://manual.manticoresearch.com/Server_settings/Searchd#binlog_path
# binlog_path =
# https://manual.manticoresearch.com/Server_settings/Searchd#client_timeout
# client_timeout = 300
# https://manual.manticoresearch.com/Server_settings/Searchd#collation_libc_locale
# collation_libc_locale = C
# https://manual.manticoresearch.com/Server_settings/Searchd#collation_server
# collation_server = libc_ci
# https://manual.manticoresearch.com/Server_settings/Searchd#data_dir
data_dir = /var/lib/manticore
# https://manual.manticoresearch.com/Server_settings/Searchd#docstore_cache_size
# docstore_cache_size = 16m
# https://manual.manticoresearch.com/Server_settings/Searchd#expansion_limit
# expansion_limit = 0
# https://manual.manticoresearch.com/Server_settings/Searchd#grouping_in_utc
# grouping_in_utc = 0
# https://manual.manticoresearch.com/Server_settings/Searchd#ha_period_karma
# ha_period_karma = 60
# https://manual.manticoresearch.com/Server_settings/Searchd#ha_ping_interval
# ha_ping_interval = 1000
# https://manual.manticoresearch.com/Server_settings/Searchd#hostname_lookup
# hostname_lookup =
# https://manual.manticoresearch.com/Server_settings/Searchd#jobs_queue_size
# jobs_queue_size =
# https://manual.manticoresearch.com/Server_settings/Searchd#listen_backlog
# listen_backlog = 5
# https://manual.manticoresearch.com/Server_settings/Searchd#listen
# listen_env = this directive allows to append listeners from environment variables
listen = 9306:mysql41
listen = /var/run/mysqld/mysqld.sock:mysql41
listen = $ip:9312
listen = 9308:http
listen = $ip:9315-9325:replication
# https://manual.manticoresearch.com/Server_settings/Searchd#listen_tfo
# listen_tfo = 0
# https://manual.manticoresearch.com/Server_settings/Searchd#log
log = /var/log/manticore/searchd.log
# https://manual.manticoresearch.com/Server_settings/Searchd#max_batch_queries
# max_batch_queries = 32
# https://manual.manticoresearch.com/Server_settings/Searchd#threads
# threads =
# https://manual.manticoresearch.com/Server_settings/Searchd#max_filters
# max_filters = 256
# https://manual.manticoresearch.com/Server_settings/Searchd#max_filter_values
# max_filter_values = 4096
# https://manual.manticoresearch.com/Server_settings/Searchd#max_open_files
# max_open_files = max
# https://manual.manticoresearch.com/Server_settings/Searchd#max_packet_size
max_packet_size = 128M
# https://manual.manticoresearch.com/Server_settings/Searchd#mysql_version_string
# mysql_version_string =
# https://manual.manticoresearch.com/Server_settings/Searchd#net_workers
# net_workers = 1
# https://manual.manticoresearch.com/Server_settings/Searchd#net_wait_tm
# net_wait_tm = -1
# https://manual.manticoresearch.com/Server_settings/Searchd#net_throttle_accept
# net_throttle_accept = 0
# https://manual.manticoresearch.com/Server_settings/Searchd#net_throttle_action
# net_throttle_action = 0
# https://manual.manticoresearch.com/Server_settings/Searchd#node_address
# node_address =
# https://manual.manticoresearch.com/Server_settings/Searchd#ondisk_attrs_default
# ondisk_attrs_default = 0
# https://manual.manticoresearch.com/Server_settings/Searchd#persistent_connections_limit
# persistent_connections_limit =
# https://manual.manticoresearch.com/Server_settings/Searchd#pid_file
pid_file = /var/run/manticore/searchd.pid
# https://manual.manticoresearch.com/Server_settings/Searchd#predicted_time_costs
# predicted_time_costs = doc=64, hit=48, skip=2048, match=64
# https://manual.manticoresearch.com/Server_settings/Searchd#preopen_indexes
# preopen_indexes = 1
# https://manual.manticoresearch.com/Server_settings/Searchd#qcache_max_bytes
qcache_max_bytes = 128Mb
# https://manual.manticoresearch.com/Server_settings/Searchd#qcache_thresh_msec
qcache_thresh_msec = 150
# https://manual.manticoresearch.com/Server_settings/Searchd#qcache_ttl_sec
qcache_ttl_sec = 120
# https://manual.manticoresearch.com/Server_settings/Searchd#query_log_format
query_log_format = sphinxql
# https://manual.manticoresearch.com/Server_settings/Searchd#query_log_min_msec
# query_log_min_msec = 0
# https://manual.manticoresearch.com/Server_settings/Searchd#query_log
# query_log = /var/log/manticore/query.log
# https://manual.manticoresearch.com/Server_settings/Searchd#query_log_mode
# query_log_mode = 600
# https://manual.manticoresearch.com/Server_settings/Searchd#max_connections
# max_connections =
# https://manual.manticoresearch.com/Server_settings/Searchd#network_timeout
# network_timeout = 5
# https://manual.manticoresearch.com/Server_settings/Searchd#read_buffer
# read_buffer = 256K
# https://manual.manticoresearch.com/Server_settings/Searchd#read_buffer_docs
# read_buffer_docs = 256K
# https://manual.manticoresearch.com/Server_settings/Searchd#read_buffer_hits
# read_buffer_hits = 256K
# https://manual.manticoresearch.com/Server_settings/Searchd#read_unhinted
# read_unhinted 32K
# https://manual.manticoresearch.com/Server_settings/Searchd#rt_flush_period
# rt_flush_period =
# https://manual.manticoresearch.com/Server_settings/Searchd#rt_merge_iops
# rt_merge_iops = 0
# https://manual.manticoresearch.com/Server_settings/Searchd#rt_merge_maxiosize
# rt_merge_maxiosize = 0
# https://manual.manticoresearch.com/Server_settings/Searchd#seamless_rotate
# seamless_rotate = 1
# https://manual.manticoresearch.com/Server_settings/Searchd#server_id
# server_id =
# https://manual.manticoresearch.com/Server_settings/Searchd#shutdown_timeout
# shutdown_timeout = 3
# https://manual.manticoresearch.com/Server_settings/Searchd#shutdown_token
# shutdown_token =
# https://manual.manticoresearch.com/Server_settings/Searchd#snippets_file_prefix
# snippets_file_prefix =
# https://manual.manticoresearch.com/Server_settings/Searchd#sphinxql_state
# sphinxql_state =
# https://manual.manticoresearch.com/Server_settings/Searchd#sphinxql_timeout
# sphinxql_timeout = 900
# https://manual.manticoresearch.com/Server_settings/Searchd#ssl_ca
# ssl_ca =
# https://manual.manticoresearch.com/Server_settings/Searchd#ssl_cert
# ssl_cert =
# https://manual.manticoresearch.com/Server_settings/Searchd#ssl_key
# ssl_key =
# https://manual.manticoresearch.com/Server_settings/Searchd#subtree_docs_cache
# subtree_docs_cache = 0
# https://manual.manticoresearch.com/Server_settings/Searchd#subtree_hits_cache
# subtree_hits_cache = 0
# https://manual.manticoresearch.com/Server_settings/Searchd#thread_stack
# thread_stack =
# https://manual.manticoresearch.com/Server_settings/Searchd#unlink_old
# unlink_old = 1
# https://manual.manticoresearch.com/Server_settings/Searchd#watchdog
# watchdog = 1
}
common {
# https://manual.manticoresearch.com/Server_settings/Common#lemmatizer_base
# lemmatizer_base = /usr/local/share
# https://manual.manticoresearch.com/Server_settings/Common#progressive_merge
# progressive_merge =
# https://manual.manticoresearch.com/Server_settings/Common#json_autoconv_keynames
# json_autoconv_keynames =
# https://manual.manticoresearch.com/Server_settings/Common#json_autoconv_numbers
# json_autoconv_numbers = 0
# https://manual.manticoresearch.com/Server_settings/Common#on_json_attr_error
# on_json_attr_error = ignore_attr
# https://manual.manticoresearch.com/Server_settings/Common#plugin_dir
# plugin_dir =
}
# indexer {
# lemmatizer_cache = 1024M
# max_iops = 0
# max_iosize = 0
# mem_limit = 1024M
# }
EOF

View File

@@ -1,12 +0,0 @@
_HiStOrY_V2_
SELECT * FROM films WHERE MATCH('"shark monkey boy robot"/2') AND release_year IN(2006,2007) AND rental_rate BETWEEN 2.0 and 3.0;
SELECT title, HIGHLIGHT({},'description') FROM films WHERE MATCH('"shark monkey boy robot"/2');
SELECT * FROM films WHERE MATCH('" shark monkey boy robot "/2');
SELECT * FROM films WHERE MATCH('Emotional drama') FACET release_year FACET category_id;
SELECT * FROM films WHERE MATCH('Emotional drama') GROUP BY release_year;
SELECT * FROM films WHERE MATCH('Emotional drama -dog -shark');
SELECT * FROM films WHERE MATCH('Emotional drama');
SELECT * FROM films;
DESCRIBE films;
SHOW TABLES;
SOURCE /sandbox.sql

View File

@@ -1,76 +0,0 @@
FROM ubuntu:focal
ARG DEV
ARG DAEMON_URL
ARG MCL_URL
RUN groupadd -r manticore && useradd -r -g manticore manticore
ENV GOSU_VERSION 1.11
ENV MCL_URL=${MCL_URL:-"https://repo.manticoresearch.com/repository/manticoresearch_focal/dists/focal/main/binary-amd64/manticore-columnar-lib_1.15.4-220522-2fef34e_amd64.deb"}
ENV DAEMON_URL=${DAEMON_URL:-"https://repo.manticoresearch.com/repository/manticoresearch_focal/dists/manticore_5.0.2-220530-348514c86_amd64.tgz"}
ENV BETA_URL=${BETA_URL:-"https://repo.manticoresearch.com/repository/kibana_beta/ubuntu/focal.zip"}
RUN set -x \
&& apt-get update && apt-get -y install --no-install-recommends ca-certificates binutils wget gnupg dirmngr unzip && rm -rf /var/lib/apt/lists/* \
&& wget -O /usr/local/bin/gosu "https://github.com/tianon/gosu/releases/download/$GOSU_VERSION/gosu-$(dpkg --print-architecture)" \
&& wget -O /usr/local/bin/gosu.asc "https://github.com/tianon/gosu/releases/download/$GOSU_VERSION/gosu-$(dpkg --print-architecture).asc" \
&& export GNUPGHOME="$(mktemp -d)" \
&& gpg --batch --keyserver hkps://keys.openpgp.org --recv-keys B42F6819007F00F88E364FD4036A9C25BF357DD4 \
&& gpg --batch --verify /usr/local/bin/gosu.asc /usr/local/bin/gosu \
&& { command -v gpgconf > /dev/null && gpgconf --kill all || :; } \
&& rm -rf "$GNUPGHOME" /usr/local/bin/gosu.asc \
&& chmod +x /usr/local/bin/gosu \
&& gosu nobody true && \
if [ "${DEV}" = "1" ]; then \
echo "DEV IS ONE" && \
exit && \
wget https://repo.manticoresearch.com/manticore-dev-repo.noarch.deb \
&& dpkg -i manticore-dev-repo.noarch.deb \
&& apt-key adv --fetch-keys 'https://repo.manticoresearch.com/GPG-KEY-manticore' && apt-get -y update && apt-get -y install manticore \
&& apt-get update \
&& echo $(apt-get -y download --print-uris manticore-columnar-lib | cut -d" " -f1 | cut -d "'" -f 2) > /mcl.url ;\
elif [ "${DEV}" = "2" ]; then \
echo "DEV IS TWO" && \
wget $BETA_URL && unzip focal.zip && rm focal.zip && \
dpkg -i build/* && echo $MCL_URL > /mcl.url; rm build/* ;\
else \
echo "DEV NOT EITHER" && \
exit && \
wget $DAEMON_URL && ARCHIVE_NAME=$(ls | grep '.tgz' | head -n1 ) && tar -xf $ARCHIVE_NAME && rm $ARCHIVE_NAME && \
dpkg -i manticore* && echo $MCL_URL > /mcl.url && rm *.deb ; \
fi \
&& mkdir -p /var/run/manticore && mkdir -p /var/lib/manticore/replication \
&& apt-get update && apt-get -y install libexpat1 libodbc1 libpq5 openssl libcrypto++6 libmysqlclient21 mysql-client \
&& apt-get -y purge --auto-remove \
&& rm -rf /var/lib/apt/lists/* \
&& rm -f /usr/bin/mariabackup /usr/bin/mysqldump /usr/bin/mysqlslap /usr/bin/mysqladmin /usr/bin/mysqlimport \
/usr/bin/mysqlshow /usr/bin/mbstream /usr/bin/mysql_waitpid /usr/bin/innotop /usr/bin/mysqlaccess /usr/bin/mytop \
/usr/bin/mysqlreport /usr/bin/mysqldumpslow /usr/bin/mysql_find_rows /usr/bin/mysql_fix_extensions \
/usr/bin/mysql_embedded /usr/bin/mysqlcheck \
&& rm -f /usr/bin/spelldump /usr/bin/wordbreaker \
&& mkdir -p /var/run/mysqld/ && chown manticore:manticore /var/run/mysqld/ \
&& echo "\n[mysql]\nsilent\nwait\ntable\n" >> /etc/mysql/my.cnf && \
wget -P /tmp https://repo.manticoresearch.com/repository/morphology/en.pak.tgz && \
wget -P /tmp https://repo.manticoresearch.com/repository/morphology/de.pak.tgz && \
wget -P /tmp https://repo.manticoresearch.com/repository/morphology/ru.pak.tgz && \
tar -xf /tmp/en.pak.tgz -C /usr/share/manticore/ && \
tar -xf /tmp/de.pak.tgz -C /usr/share/manticore/ && \
tar -xf /tmp/ru.pak.tgz -C /usr/share/manticore/
COPY manticore.conf /etc/manticoresearch/
COPY sandbox.sql /sandbox.sql
COPY .mysql_history /root/.mysql_history
COPY docker-entrypoint.sh /usr/local/bin/
RUN ln -s usr/local/bin/docker-entrypoint.sh /entrypoint.sh
WORKDIR /var/lib/manticore
ENTRYPOINT ["docker-entrypoint.sh"]
EXPOSE 9306
EXPOSE 9308
EXPOSE 9312
ENV LANG C.UTF-8
ENV LC_ALL C.UTF-8
CMD ["sh", "-c", "(echo 'START WAIT' && sleep 5 && echo 'END WAIT' && mysql -P9306 -h0 -e 'set global log_management = 0; set global log_management = 1;') & searchd --nodetach"]

View File

@@ -1,278 +0,0 @@
# Manticore Search Docker image
This is the git repo of official [Docker image](https://hub.docker.com/r/manticoresearch/manticore/) for [Manticore Search](https://github.com/manticoresoftware/manticoresearch).
Manticore Search is an easy to use open source fast database for search. It helps thousands of companies from small to large, such as Craigslist, to search and filter petabytes of text data on a single or hundreds of nodes, do stream full-text filtering, add auto-complete, spell correction, more-like-this, faceting and other search-related technologies to their websites and applications.
The default configuration includes a sample Real-Time index and listens on the default ports:
* `9306` for connections from a MySQL client
* `9308` for connections via HTTP
* `9312` for connections via a binary protocol (e.g. in case you run a cluster)
The image comes with libraries for easy indexing data from MySQL, PostgreSQL XML and CSV files.
# How to run Manticore Search Docker image
## Quick usage
The below is the simplest way to start Manticore in a container and log in to it via mysql client:
```bash
docker run --name manticore --rm -d manticoresearch/manticore && sleep 3 && docker exec -it manticore mysql && docker stop manticore
```
When you exit from the mysql client it stops and removes the container, so **use it only for testing / sandboxing purposes**. See below how to use it in production.
The image comes with a sample index which can be loaded like this:
```mysql
mysql> source /sandbox.sql
```
Also the mysql client has in history several sample queries that you can run on the above index, just use Up/Down keys in the client to see and run them.
## Production use
### Ports and mounting points
For data persistence `/var/lib/manticore/` should be mounted to local storage or other desired storage engine.
```bash
docker run --name manticore -v $(pwd)/data:/var/lib/manticore -p 127.0.0.1:9306:9306 -p 127.0.0.1:9308:9308 -d manticoresearch/manticore
```
Configuration file inside the instance is located at `/etc/manticoresearch/manticore.conf`. For custom settings, this file should be mounted to your own configuration file.
The ports are 9306/9308/9312 for SQL/HTTP/Binary, expose them depending on how you are going to use Manticore. For example:
```bash
docker run --name manticore -v $(pwd)/manticore.conf:/etc/manticoresearch/manticore.conf -v $(pwd)/data:/var/lib/manticore/ -p 127.0.0.1:9306:9306 -p 127.0.0.1:9308:9308 -d manticoresearch/manticore
```
Make sure to remove `127.0.0.1:` if you want the ports to be available for external hosts.
### Manticore Columnar Library
The docker image doesn't include [Manticore Columnar Library](https://github.com/manticoresoftware/columnar) which has to be used if you need:
* columnar storage
* secondary indexes
but you can easily enable it in runtime by using environment variable `MCL=1`, i.e. `docker run -e MCL=1 ... manticoresearch/manticore`. It will then download and install the library and put it to the data dir (which is normally mapped as a volume in production). Next time you run the container the library will be already there, hence it won't be downloaded again unless you change the Manticore Search version.
### Docker-compose
In many cases you might want to use Manticore together with other images specified in a docker-compose YAML file. Here is the minimal recommended specification for Manticore Search in docker-compose.yml:
```yaml
version: '2.2'
services:
manticore:
container_name: manticore
image: manticoresearch/manticore
restart: always
ports:
- 127.0.0.1:9306:9306
- 127.0.0.1:9308:9308
ulimits:
nproc: 65535
nofile:
soft: 65535
hard: 65535
memlock:
soft: -1
hard: -1
environment:
- MCL=1
volumes:
- ./data:/var/lib/manticore
# - ./manticore.conf:/etc/manticoresearch/manticore.conf # uncommment if you use a custom config
```
Besides using the exposed ports 9306 and 9308 you can log into the instance by running `docker-compose exec manticore mysql`.
### HTTP protocol
Manticore is accessible via HTTP on ports 9308 and 9312. You can map either of them locally and connect with curl:
```bash
docker run --name manticore -p 9308:9308 -d manticoresearch/manticore
```
Create a table:
```bash
curl -X POST 'http://127.0.0.1:9308/sql' -d 'mode=raw&query=CREATE TABLE testrt ( title text, content text, gid integer)'
```
Insert a document:
```bash
curl -X POST 'http://127.0.0.1:9308/json/insert' -d'{"index":"testrt","id":1,"doc":{"title":"Hello","content":"world","gid":1}}'
```
Perform a simple search:
```bash
curl -X POST 'http://127.0.0.1:9308/json/search' -d '{"index":"testrt","query":{"match":{"*":"hello world"}}}'
```
### Logging
By default, Manticore logs to `/dev/stdout`, so you can watch the log on the host with:
```bash
docker logs manticore
```
If you want to get log of your queries the same way you can do it by passing environment variable `QUERY_LOG_TO_STDOUT=true`.
### Multi-node cluster with replication
Here is a simple `docker-compose.yml` for defining a two node cluster:
```yaml
version: '2.2'
services:
manticore-1:
image: manticoresearch/manticore
restart: always
ulimits:
nproc: 65535
nofile:
soft: 65535
hard: 65535
memlock:
soft: -1
hard: -1
environment:
- MCL=1
networks:
- manticore
manticore-2:
image: manticoresearch/manticore
restart: always
ulimits:
nproc: 65535
nofile:
soft: 65535
hard: 65535
memlock:
soft: -1
hard: -1
environment:
- MCL=1
networks:
- manticore
networks:
manticore:
driver: bridge
```
* Start it: `docker-compose up`
* Create a cluster with a table:
```mysql
$ docker-compose exec manticore-1 mysql
mysql> CREATE TABLE testrt ( title text, content text, gid integer);
mysql> CREATE CLUSTER posts;
Query OK, 0 rows affected (0.24 sec)
mysql> ALTER CLUSTER posts ADD testrt;
Query OK, 0 rows affected (0.07 sec)
MySQL [(none)]> exit
Bye
```
* Join to the the cluster on the 2nd instance and insert smth to the table:
```mysql
$ docker-compose exec manticore-2 mysql
mysql> JOIN CLUSTER posts AT 'manticore-1:9312';
mysql> INSERT INTO posts:testrt(title,content,gid) VALUES('hello','world',1);
Query OK, 1 row affected (0.00 sec)
MySQL [(none)]> exit
Bye
```
* If you now go back to the first instance you'll see the new record:
```mysql
$ docker-compose exec manticore-1 mysql
MySQL [(none)]> select * from testrt;
+---------------------+------+-------+---------+
| id | gid | title | content |
+---------------------+------+-------+---------+
| 3891565839006040065 | 1 | hello | world |
+---------------------+------+-------+---------+
1 row in set (0.00 sec)
MySQL [(none)]> exit
Bye
```
## Memory locking and limits
It's recommended to overwrite the default ulimits of docker for the Manticore instance:
```bash
--ulimit nofile=65536:65536
```
For the best performance, Manticore tables' components can be locked into memory. When Manticore is run under Docker, the instance requires additional privileges to allow memory locking. The following options must be added when running the instance:
```bash
--cap-add=IPC_LOCK --ulimit memlock=-1:-1
```
## Configuring Manticore Search with Docker
If you want to run Manticore with your custom config containing indexes definition you will need to mount the configuration to the instance:
```bash
docker run --name manticore -v $(pwd)/manticore.conf:/etc/manticoresearch/manticore.conf -v $(pwd)/data/:/var/lib/manticore -p 127.0.0.1:9306:9306 -d manticoresearch/manticore
```
Take into account that Manticore search inside the container is run under user `manticore`. Performing operations with tables (like creating or rotating plain indexes) should be also done under `manticore`. Otherwise the files will be created under `root` and the search daemon won't have rights to open them. For example here is how you can rotate all plain indexes:
```bash
docker exec -it manticore gosu manticore indexer --all --rotate
```
### Environment variables
You can also set individual `searchd` and `common` configuration settings using Docker environment variables.
The settings must be prefixed with their section name, for example to change value of setting `mysql_version_string` in section `searchd` the variable must be named `searchd_mysql_version_string`:
```bash
docker run --name manticore -p 127.0.0.1:9306:9306 -e searchd_mysql_version_string='5.5.0' -d manticoresearch/manticore
```
In case of `listen` directive, you can pass using Docker variable `searchd_listen` new listening interfaces in addition to the default ones. Multiple interfaces can be declared separated by semi-colon ("|").
For listening only on network address, the `$ip` (retrieved internally from `hostname -i`) can be used as address alias.
For example `-e searchd_listen='9316:http|9307:mysql|$ip:5443:mysql_vip'` will add an additional SQL interface on port 9307, a SQL VIP on 5443 running only on the instance IP and HTTP on port 9316, beside the defaults on 9306 and 9308, respectively.
```bash
$ docker run --rm -p 1188:9307 -e searchd_mysql_version_string='5.5.0' -e searchd_listen='9316:http|9307:mysql|$ip:5443:mysql_vip' manticore
[Mon Aug 17 07:31:58.719 2020] [1] using config file '/etc/manticoresearch/manticore.conf' (9130 chars)...
listening on all interfaces for http, port=9316
listening on all interfaces for mysql, port=9307
listening on 172.17.0.17:5443 for VIP mysql
listening on all interfaces for mysql, port=9306
listening on UNIX socket /var/run/mysqld/mysqld.sock
listening on 172.17.0.17:9312 for sphinx
listening on all interfaces for http, port=9308
prereading 0 indexes
prereaded 0 indexes in 0.000 sec
accepting connections
```
# Issues
For reporting issues, please use the [issue tracker](https://github.com/manticoresoftware/docker/issues).

View File

@@ -1,118 +0,0 @@
#!/bin/bash
set -eo pipefail
echo "RUNNING ENTRYPOINT"
# check to see if this file is being run or sourced from another script
_is_sourced() {
# https://unix.stackexchange.com/a/215279
[ "${#FUNCNAME[@]}" -ge 2 ] &&
[ "${FUNCNAME[0]}" = '_is_sourced' ] &&
[ "${FUNCNAME[1]}" = 'source' ]
}
_searchd_want_help() {
local arg
for arg; do
case "$arg" in
-'?' | --help | -h | -v)
return 0
;;
esac
done
return 1
}
docker_setup_env() {
if [ -n "$QUERY_LOG_TO_STDOUT" ]; then
export searchd_query_log=/var/log/manticore/query.log
[ ! -f /var/log/manticore/query.log ] && ln -sf /dev/stdout /var/log/manticore/query.log
fi
if [[ "${MCL}" == "1" ]]; then
LIB_MANTICORE_COLUMNAR="/var/lib/manticore/.mcl/lib_manticore_columnar.so"
LIB_MANTICORE_SECONDARY="/var/lib/manticore/.mcl/lib_manticore_secondary.so"
[ -L /usr/share/manticore/modules/lib_manticore_columnar.so ] || ln -s $LIB_MANTICORE_COLUMNAR /usr/share/manticore/modules/lib_manticore_columnar.so
[ -L /usr/share/manticore/modules/lib_manticore_secondary.so ] || ln -s $LIB_MANTICORE_SECONDARY /usr/share/manticore/modules/lib_manticore_secondary.so
searchd -v|grep -i error|egrep "trying to load" \
&& rm $LIB_MANTICORE_COLUMNAR $LIB_MANTICORE_SECONDARY \
&& echo "WARNING: wrong MCL version was removed, installing the correct one"
if [[ ! -f "$LIB_MANTICORE_COLUMNAR" || ! -f "$LIB_MANTICORE_SECONDARY" ]]; then
if ! mkdir -p /var/lib/manticore/.mcl/ ; then
echo "ERROR: Manticore Columnar Library is inaccessible: couldn't create /var/lib/manticore/.mcl/."
exit
fi
MCL_URL=$(cat /mcl.url)
wget -P /tmp $MCL_URL
LAST_PATH=$(pwd)
cd /tmp
PACKAGE_NAME=$(ls | grep manticore-columnar | head -n 1)
ar -x $PACKAGE_NAME
tar -xf data.tar.gz
find . -name '*.so' -exec cp {} /var/lib/manticore/.mcl/ \;
cd $LAST_PATH
fi
fi
}
_main() {
# first arg is `h` or some `--option`
if [ "${1#-}" != "$1" ]; then
set -- searchd "$@"
fi
# Amended from searchd to sh since we're using sh to wait until searchd starts, then set the Kibana-specific options
if [ "$1" = 'sh' ] && ! _searchd_want_help "@"; then
docker_setup_env "$@"
# allow the container to be started with `--user`
if [ "$(id -u)" = '0' ]; then
find /var/lib/manticore /var/log/manticore /var/run/manticore /etc/manticoresearch \! -user manticore -exec chown manticore '{}' +
exec gosu manticore "$0" "$@"
fi
fi
_replace_conf_from_env
exec "$@"
}
_replace_conf_from_env() {
sed_query=""
while IFS='=' read -r oldname value; do
if [[ $oldname == 'searchd_'* || $oldname == 'common_'* ]]; then
value=$(echo ${!oldname} | sed 's/\//\\\//g')
oldname=$(echo $oldname | sed "s/searchd_//g;s/common_//g;")
newname=$oldname
if [[ $newname == 'listen' ]]; then
oldname="listen_env"
IFS='|' read -ra ADDR <<<"$value"
count=0
for i in "${ADDR[@]}"; do
if [[ $count == 0 ]]; then
value=$i
else
value="$value\n listen = $i"
fi
count=$((count + 1))
done
fi
if [[ -z $sed_query ]]; then
sed_query="s/(#\s)*?$oldname\s?=\s?.*?$/$newname = $value/g"
else
sed_query="$sed_query;s/(#\s)*?$oldname\s?=\s?.*?$/$newname = $value/g"
fi
fi
done < <(env)
if [[ ! -z $sed_query ]]; then
sed -i -E "$sed_query" /etc/manticoresearch/manticore.conf
fi
}
# If we are sourced from elsewhere, don't perform any further actions
if ! _is_sourced; then
_main "$@"
fi

View File

@@ -1,259 +0,0 @@
#!/bin/sh
ip=`hostname -i|rev|cut -d\ -f 1|rev`
cat << EOF
searchd {
# https://manual.manticoresearch.com/Server_settings/Searchd#access_plain_attrs
# access_plain_attrs = mmap_preread
# https://manual.manticoresearch.com/Server_settings/Searchd#access_blob_attrs
# access_blob_attrs = mmap_preread
# https://manual.manticoresearch.com/Server_settings/Searchd#access_doclists
# access_doclists = file
# https://manual.manticoresearch.com/Server_settings/Searchd#access_hitlists
# access_hitlists = file
# https://manual.manticoresearch.com/Server_settings/Searchd#agent_connect_timeout
# agent_connect_timeout =
# https://manual.manticoresearch.com/Server_settings/Searchd#agent_query_timeout
# agent_query_timeout =
# https://manual.manticoresearch.com/Server_settings/Searchd#agent_retry_count
# agent_retry_count = 0
# https://manual.manticoresearch.com/Server_settings/Searchd#agent_retry_delay
# agent_retry_delay = 500
# https://manual.manticoresearch.com/Server_settings/Searchd#attr_flush_period
# attr_flush_period = 0
# https://manual.manticoresearch.com/Server_settings/Searchd#binlog_flush
# binlog_flush = 2
# https://manual.manticoresearch.com/Server_settings/Searchd#binlog_max_log_size
# binlog_max_log_size = 268435456
# https://manual.manticoresearch.com/Server_settings/Searchd#binlog_path
# binlog_path =
# https://manual.manticoresearch.com/Server_settings/Searchd#client_timeout
# client_timeout = 300
# https://manual.manticoresearch.com/Server_settings/Searchd#collation_libc_locale
# collation_libc_locale = C
# https://manual.manticoresearch.com/Server_settings/Searchd#collation_server
# collation_server = libc_ci
# https://manual.manticoresearch.com/Server_settings/Searchd#data_dir
data_dir = /var/lib/manticore
# https://manual.manticoresearch.com/Server_settings/Searchd#docstore_cache_size
# docstore_cache_size = 16m
# https://manual.manticoresearch.com/Server_settings/Searchd#expansion_limit
# expansion_limit = 0
# https://manual.manticoresearch.com/Server_settings/Searchd#grouping_in_utc
# grouping_in_utc = 0
# https://manual.manticoresearch.com/Server_settings/Searchd#ha_period_karma
# ha_period_karma = 60
# https://manual.manticoresearch.com/Server_settings/Searchd#ha_ping_interval
# ha_ping_interval = 1000
# https://manual.manticoresearch.com/Server_settings/Searchd#hostname_lookup
# hostname_lookup =
# https://manual.manticoresearch.com/Server_settings/Searchd#jobs_queue_size
# jobs_queue_size =
# https://manual.manticoresearch.com/Server_settings/Searchd#listen_backlog
# listen_backlog = 5
# https://manual.manticoresearch.com/Server_settings/Searchd#listen
# listen_env = this directive allows to append listeners from environment variables
listen = 9306:mysql41
listen = /var/run/mysqld/mysqld.sock:mysql41
listen = $ip:9312
listen = 9308:http
listen = $ip:9315-9325:replication
# https://manual.manticoresearch.com/Server_settings/Searchd#listen_tfo
# listen_tfo = 0
# https://manual.manticoresearch.com/Server_settings/Searchd#log
log = /var/log/manticore/searchd.log
# https://manual.manticoresearch.com/Server_settings/Searchd#max_batch_queries
# max_batch_queries = 32
# https://manual.manticoresearch.com/Server_settings/Searchd#threads
# threads =
# https://manual.manticoresearch.com/Server_settings/Searchd#max_filters
# max_filters = 256
# https://manual.manticoresearch.com/Server_settings/Searchd#max_filter_values
# max_filter_values = 4096
# https://manual.manticoresearch.com/Server_settings/Searchd#max_open_files
# max_open_files =
# https://manual.manticoresearch.com/Server_settings/Searchd#max_packet_size
max_packet_size = 128M
# https://manual.manticoresearch.com/Server_settings/Searchd#mysql_version_string
# mysql_version_string =
# https://manual.manticoresearch.com/Server_settings/Searchd#net_workers
# net_workers = 1
# https://manual.manticoresearch.com/Server_settings/Searchd#net_wait_tm
# net_wait_tm = -1
# https://manual.manticoresearch.com/Server_settings/Searchd#net_throttle_accept
# net_throttle_accept = 0
# https://manual.manticoresearch.com/Server_settings/Searchd#net_throttle_action
# net_throttle_action = 0
# https://manual.manticoresearch.com/Server_settings/Searchd#node_address
# node_address =
# https://manual.manticoresearch.com/Server_settings/Searchd#ondisk_attrs_default
# ondisk_attrs_default = 0
# https://manual.manticoresearch.com/Server_settings/Searchd#persistent_connections_limit
# persistent_connections_limit =
# https://manual.manticoresearch.com/Server_settings/Searchd#pid_file
pid_file = /var/run/manticore/searchd.pid
# https://manual.manticoresearch.com/Server_settings/Searchd#predicted_time_costs
# predicted_time_costs = doc=64, hit=48, skip=2048, match=64
# https://manual.manticoresearch.com/Server_settings/Searchd#preopen_indexes
# preopen_indexes = 1
# https://manual.manticoresearch.com/Server_settings/Searchd#qcache_max_bytes
# qcache_max_bytes = 16Mb
# https://manual.manticoresearch.com/Server_settings/Searchd#qcache_thresh_msec
# qcache_thresh_msec = 3000
# https://manual.manticoresearch.com/Server_settings/Searchd#qcache_ttl_sec
# qcache_ttl_sec = 60
# https://manual.manticoresearch.com/Server_settings/Searchd#query_log_format
query_log_format = sphinxql
# https://manual.manticoresearch.com/Server_settings/Searchd#query_log_min_msec
query_log_min_msec = 0
# https://manual.manticoresearch.com/Server_settings/Searchd#query_log
query_log = /var/log/manticore/query.log
# https://manual.manticoresearch.com/Server_settings/Searchd#query_log_mode
# query_log_mode = 600
# https://manual.manticoresearch.com/Server_settings/Searchd#max_connections
# max_connections =
# https://manual.manticoresearch.com/Server_settings/Searchd#network_timeout
# network_timeout = 5
# https://manual.manticoresearch.com/Server_settings/Searchd#read_buffer
# read_buffer = 256K
# https://manual.manticoresearch.com/Server_settings/Searchd#read_buffer_docs
# read_buffer_docs = 256K
# https://manual.manticoresearch.com/Server_settings/Searchd#read_buffer_hits
# read_buffer_hits = 256K
# https://manual.manticoresearch.com/Server_settings/Searchd#read_unhinted
# read_unhinted 32K
# https://manual.manticoresearch.com/Server_settings/Searchd#rt_flush_period
# rt_flush_period =
# https://manual.manticoresearch.com/Server_settings/Searchd#rt_merge_iops
# rt_merge_iops = 0
# https://manual.manticoresearch.com/Server_settings/Searchd#rt_merge_maxiosize
# rt_merge_maxiosize = 0
# https://manual.manticoresearch.com/Server_settings/Searchd#seamless_rotate
# seamless_rotate = 1
# https://manual.manticoresearch.com/Server_settings/Searchd#server_id
# server_id =
# https://manual.manticoresearch.com/Server_settings/Searchd#shutdown_timeout
# shutdown_timeout = 3
# https://manual.manticoresearch.com/Server_settings/Searchd#shutdown_token
# shutdown_token =
# https://manual.manticoresearch.com/Server_settings/Searchd#snippets_file_prefix
# snippets_file_prefix =
# https://manual.manticoresearch.com/Server_settings/Searchd#sphinxql_state
# sphinxql_state =
# https://manual.manticoresearch.com/Server_settings/Searchd#sphinxql_timeout
# sphinxql_timeout = 900
# https://manual.manticoresearch.com/Server_settings/Searchd#ssl_ca
# ssl_ca =
# https://manual.manticoresearch.com/Server_settings/Searchd#ssl_cert
# ssl_cert =
# https://manual.manticoresearch.com/Server_settings/Searchd#ssl_key
# ssl_key =
# https://manual.manticoresearch.com/Server_settings/Searchd#subtree_docs_cache
# subtree_docs_cache = 0
# https://manual.manticoresearch.com/Server_settings/Searchd#subtree_hits_cache
# subtree_hits_cache = 0
# https://manual.manticoresearch.com/Server_settings/Searchd#thread_stack
# thread_stack =
# https://manual.manticoresearch.com/Server_settings/Searchd#unlink_old
# unlink_old = 1
# https://manual.manticoresearch.com/Server_settings/Searchd#watchdog
# watchdog = 1
}
common {
# https://manual.manticoresearch.com/Server_settings/Common#lemmatizer_base
# lemmatizer_base = /usr/local/share
# https://manual.manticoresearch.com/Server_settings/Common#progressive_merge
# progressive_merge =
# https://manual.manticoresearch.com/Server_settings/Common#json_autoconv_keynames
# json_autoconv_keynames =
# https://manual.manticoresearch.com/Server_settings/Common#json_autoconv_numbers
# json_autoconv_numbers = 0
# https://manual.manticoresearch.com/Server_settings/Common#on_json_attr_error
# on_json_attr_error = ignore_attr
# https://manual.manticoresearch.com/Server_settings/Common#plugin_dir
# plugin_dir =
}
EOF

File diff suppressed because one or more lines are too long

View File

@@ -1,2 +1,5 @@
unixsocket /var/run/redis/redis.sock unixsocket /var/run/monolith-redis.sock
unixsocketperm 777 unixsocketperm 777
port 0
# port 6379
# requirepass changeme

View File

@@ -1,23 +0,0 @@
wheel
beautifulsoup4
redis
siphashc
aiohttp[speedups]
python-dotenv
#manticoresearch
numpy
aioredis[hiredis]
aiokafka
vaderSentiment
polyglot
pyicu
pycld2
morfessor
six
nltk
#spacy
gensim
python-Levenshtein
orjson
uvloop
numba

View File

@@ -1,50 +0,0 @@
#!/usr/bin/env bash
#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
set -eo pipefail
REQUIREMENTS_LOCAL="/app/docker/requirements-local.txt"
# If Cypress run overwrite the password for admin and export env variables
if [ "$CYPRESS_CONFIG" == "true" ]; then
export SUPERSET_CONFIG=tests.integration_tests.superset_test_config
export SUPERSET_TESTENV=true
export SUPERSET__SQLALCHEMY_DATABASE_URI=postgresql+psycopg2://superset:superset@db:5432/superset
fi
#
# Make sure we have dev requirements installed
#
if [ -f "${REQUIREMENTS_LOCAL}" ]; then
echo "Installing local overrides at ${REQUIREMENTS_LOCAL}"
pip install -r "${REQUIREMENTS_LOCAL}"
else
echo "Skipping local overrides"
fi
if [[ "${1}" == "worker" ]]; then
echo "Starting Celery worker..."
celery --app=superset.tasks.celery_app:app worker -Ofair -l INFO
elif [[ "${1}" == "beat" ]]; then
echo "Starting Celery beat..."
celery --app=superset.tasks.celery_app:app beat --pidfile /tmp/celerybeat.pid -l INFO -s "${SUPERSET_HOME}"/celerybeat-schedule
elif [[ "${1}" == "app" ]]; then
echo "Starting web app..."
flask run -p 8088 --with-threads --reload --debugger --host=0.0.0.0
elif [[ "${1}" == "app-gunicorn" ]]; then
echo "Starting web app..."
/usr/bin/run-server.sh
fi

View File

@@ -1,78 +0,0 @@
#!/usr/bin/env bash
#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
set -e
#
# Always install local overrides first
#
/app/docker/docker-bootstrap.sh
STEP_CNT=4
echo_step() {
cat <<EOF
######################################################################
Init Step ${1}/${STEP_CNT} [${2}] -- ${3}
######################################################################
EOF
}
ADMIN_PASSWORD="admin"
# If Cypress run overwrite the password for admin and export env variables
if [ "$CYPRESS_CONFIG" == "true" ]; then
ADMIN_PASSWORD="general"
export SUPERSET_CONFIG=tests.integration_tests.superset_test_config
export SUPERSET_TESTENV=true
export SUPERSET__SQLALCHEMY_DATABASE_URI=postgresql+psycopg2://superset:superset@db:5432/superset
fi
# Initialize the database
echo_step "1" "Starting" "Applying DB migrations"
superset db upgrade
echo_step "1" "Complete" "Applying DB migrations"
# Create an admin user
echo_step "2" "Starting" "Setting up admin user ( admin / $ADMIN_PASSWORD )"
superset fab create-admin \
--username admin \
--firstname Superset \
--lastname Admin \
--email admin@superset.com \
--password $ADMIN_PASSWORD
echo_step "2" "Complete" "Setting up admin user"
# Create default roles and permissions
echo_step "3" "Starting" "Setting up roles and perms"
superset init
echo_step "3" "Complete" "Setting up roles and perms"
if [ "$SUPERSET_LOAD_EXAMPLES" = "yes" ]; then
# Load some data to play with
echo_step "4" "Starting" "Loading examples"
# If Cypress run which consumes superset_test_config load required data for tests
if [ "$CYPRESS_CONFIG" == "true" ]; then
superset load_test_users
superset load_examples --load-test-data
else
superset load_examples
fi
echo_step "4" "Complete" "Loading examples"
fi

View File

@@ -1 +0,0 @@
pydruid

View File

@@ -1,6 +1,6 @@
PORTAINER_GIT_DIR=. PORTAINER_GIT_DIR=..
MODULES_ENABLED="dis" MODULES_ENABLED="dis"
DISCORD_TOKEN="xx" DISCORD_TOKEN=
THRESHOLD_LISTENER_HOST=0.0.0.0 THRESHOLD_LISTENER_HOST=0.0.0.0
THRESHOLD_LISTENER_PORT=13867 THRESHOLD_LISTENER_PORT=13867
THRESHOLD_LISTENER_SSL=1 THRESHOLD_LISTENER_SSL=1
@@ -13,16 +13,16 @@ THRESHOLD_RELAY_SSL=1
THRESHOLD_API_ENABLED=1 THRESHOLD_API_ENABLED=1
THRESHOLD_API_HOST=0.0.0.0 THRESHOLD_API_HOST=0.0.0.0
THRESHOLD_API_PORT=13869 THRESHOLD_API_PORT=13869
PORTAINER_GIT_DIR=.
THRESHOLD_CONFIG_DIR=./legacy/conf/live/ THRESHOLD_CONFIG_DIR=../legacy/conf/live/
THRESHOLD_CERT_DIR=./legacy/conf/cert/ #THRESHOLD_TEMPLATE_DIR=../legacy/conf/templates/
THRESHOLD_CERT_DIR=../legacy/conf/cert/
# How many messages to ingest at once from Redis # How many messages to ingest at once from Redis
MONOLITH_INGEST_CHUNK_SIZE=900 MONOLITH_INGEST_CHUNK_SIZE=70000
# Time to wait between polling Redis again # Time to wait between polling Redis again
MONOLITH_INGEST_ITER_DELAY=0.5 MONOLITH_INGEST_ITER_DELAY=2
# Number of 4chan threads to request at once # Number of 4chan threads to request at once
MONOLITH_CH4_THREADS_CONCURRENT=1000 MONOLITH_CH4_THREADS_CONCURRENT=1000
@@ -31,11 +31,20 @@ MONOLITH_CH4_THREADS_CONCURRENT=1000
MONOLITH_CH4_THREADS_DELAY=0.1 MONOLITH_CH4_THREADS_DELAY=0.1
# Time to wait after finishing a crawl before starting again # Time to wait after finishing a crawl before starting again
MONOLITH_CH4_CRAWL_DELAY=30 MONOLITH_CH4_CRAWL_DELAY=60
# Semaphore value # Semaphore value
MONOLITH_CH4_THREADS_SEMAPHORE=1000 MONOLITH_CH4_THREADS_SEMAPHORE=1000
# Threads to use for data processing # Threads to use for data processing
# Leave uncommented to use all available threads # Leave uncommented to use all available threads
# MONOLITH_PROCESS_THREADS=4 MONOLITH_PROCESS_THREADS=7
# Enable performance metrics after message processing
MONOLITH_PROCESS_PERFSTATS=0
# Elasticsearch
ELASTICSEARCH_USERNAME=elastic
ELASTICSEARCH_PASSWORD=
ELASTICSEARCH_HOST=https://es01:9200
ELASTICSEARCH_TLS=1

View File

@@ -1,87 +0,0 @@
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
#
# Java tuning
#DRUID_XMX=1g
#DRUID_XMS=1g
#DRUID_MAXNEWSIZE=250m
#DRUID_NEWSIZE=250m
#DRUID_MAXDIRECTMEMORYSIZE=1g
#druid_emitter_logging_logLevel=debug
#druid_extensions_loadList=["druid-histogram", "druid-datasketches", "druid-lookups-cached-global", "postgresql-metadata-storage", "druid-kafka-indexing-service"]
#druid_zk_service_host=zookeeper
#druid_metadata_storage_host=
#druid_metadata_storage_type=postgresql
#druid_metadata_storage_connector_connectURI=jdbc:postgresql://postgres:5432/druid
#druid_metadata_storage_connector_user=druid
#druid_metadata_storage_connector_password=FoolishPassword
#druid_coordinator_balancer_strategy=cachingCost
#druid_indexer_runner_javaOptsArray=["-server", "-Xmx1g", "-Xms1g", "-XX:MaxDirectMemorySize=3g", "-Duser.timezone=UTC", "-Dfile.encoding=UTF-8", "-Djava.util.logging.manager=org.apache.logging.log4j.jul.LogManager"]
#druid_indexer_fork_property_druid_processing_buffer_sizeBytes=128MiB
#druid_processing_buffer_sizeBytes=268435456 # 256MiB
#druid_storage_type=local
#druid_storage_storageDirectory=/opt/shared/segments
#druid_indexer_logs_type=file
#druid_indexer_logs_directory=/opt/shared/indexing-logs
#druid_processing_numThreads=1
#druid_processing_numMergeBuffers=1
#DRUID_LOG4J=<?xml version="1.0" encoding="UTF-8" ?><Configuration status="WARN"><Appenders><Console name="Console" target="SYSTEM_OUT"><PatternLayout pattern="%d{ISO8601} %p [%t] %c - %m%n"/></Console></Appenders><Loggers><Root level="info"><AppenderRef ref="Console"/></Root><Logger name="org.apache.druid.jetty.RequestLog" additivity="false" level="DEBUG"><AppenderRef ref="Console"/></Logger></Loggers></Configuration>
# Java tuning
#DRUID_XMX=1g
#DRUID_XMS=1g
#DRUID_MAXNEWSIZE=250m
#DRUID_NEWSIZE=250m
#DRUID_MAXDIRECTMEMORYSIZE=6172m
DRUID_SINGLE_NODE_CONF=nano-quickstart
druid_emitter_logging_logLevel=debug
druid_extensions_loadList=["druid-histogram", "druid-datasketches", "druid-lookups-cached-global", "postgresql-metadata-storage", "druid-kafka-indexing-service"]
druid_zk_service_host=zookeeper
druid_metadata_storage_host=
druid_metadata_storage_type=postgresql
druid_metadata_storage_connector_connectURI=jdbc:postgresql://postgres:5432/druid
druid_metadata_storage_connector_user=druid
druid_metadata_storage_connector_password=FoolishPassword
druid_coordinator_balancer_strategy=cachingCost
druid_indexer_runner_javaOptsArray=["-server", "-Xmx1g", "-Xms1g", "-XX:MaxDirectMemorySize=3g", "-Duser.timezone=UTC", "-Dfile.encoding=UTF-8", "-Djava.util.logging.manager=org.apache.logging.log4j.jul.LogManager"]
druid_indexer_fork_property_druid_processing_buffer_sizeBytes=256MiB
druid_storage_type=local
druid_storage_storageDirectory=/opt/shared/segments
druid_indexer_logs_type=file
druid_indexer_logs_directory=/opt/shared/indexing-logs
druid_processing_numThreads=2
druid_processing_numMergeBuffers=2
DRUID_LOG4J=<?xml version="1.0" encoding="UTF-8" ?><Configuration status="WARN"><Appenders><Console name="Console" target="SYSTEM_OUT"><PatternLayout pattern="%d{ISO8601} %p [%t] %c - %m%n"/></Console></Appenders><Loggers><Root level="info"><AppenderRef ref="Console"/></Root><Logger name="org.apache.druid.jetty.RequestLog" additivity="false" level="DEBUG"><AppenderRef ref="Console"/></Logger></Loggers></Configuration>

View File

@@ -1,17 +0,0 @@
repos:
- repo: https://github.com/psf/black
rev: 22.6.0
hooks:
- id: black
args:
- --line-length=120
- repo: https://github.com/PyCQA/isort
rev: 5.10.1
hooks:
- id: isort
args: ["--profile", "black"]
- repo: https://github.com/PyCQA/flake8
rev: 4.0.1
hooks:
- id: flake8
args: [--max-line-length=120]

View File

@@ -17,7 +17,7 @@
}, },
"Key": "key.pem", "Key": "key.pem",
"Certificate": "cert.pem", "Certificate": "cert.pem",
"RedisSocket": "/var/run/redis/redis.sock", "RedisSocket": "/var/run/socks/redis.sock",
"RedisDBEphemeral": 1, "RedisDBEphemeral": 1,
"RedisDBPersistent": 0, "RedisDBPersistent": 0,
"UsePassword": false, "UsePassword": false,

View File

@@ -1,41 +0,0 @@
version: "2"
services:
app:
image: pathogen/threshold:latest
build: ./docker
volumes:
- ${PORTAINER_GIT_DIR}:/code
- ${THRESHOLD_CONFIG_DIR}:/code/conf/live
#- ${THRESHOLD_TEMPLATE_DIR}:/code/conf/templates
- ${THRESHOLD_CERT_DIR}:/code/conf/cert
ports:
- "${THRESHOLD_LISTENER_PORT}:${THRESHOLD_LISTENER_PORT}"
- "${THRESHOLD_RELAY_PORT}:${THRESHOLD_RELAY_PORT}"
- "${THRESHOLD_API_PORT}:${THRESHOLD_API_PORT}"
env_file:
- .env
# for development
extra_hosts:
- "host.docker.internal:host-gateway"
volumes_from:
- tmp
tmp:
image: busybox
command: chmod -R 777 /var/run/redis
volumes:
- /var/run/redis
redis:
image: redis
command: redis-server /etc/redis.conf
volumes:
- ${PORTAINER_GIT_DIR}/docker/redis.conf:/etc/redis.conf
volumes_from:
- tmp
networks:
default:
external:
name: pathogen

View File

@@ -1,38 +0,0 @@
version: "2"
services:
app:
image: pathogen/threshold:latest
build: ./docker
volumes:
- ${PORTAINER_GIT_DIR}:/code
- ${THRESHOLD_CONFIG_DIR}:/code/conf/live
#- ${THRESHOLD_TEMPLATE_DIR}:/code/conf/templates
- ${THRESHOLD_CERT_DIR}:/code/conf/cert
ports:
- "${THRESHOLD_LISTENER_PORT}:${THRESHOLD_LISTENER_PORT}"
- "${THRESHOLD_RELAY_PORT}:${THRESHOLD_RELAY_PORT}"
- "${THRESHOLD_API_PORT}:${THRESHOLD_API_PORT}"
env_file:
- ../stack.env
volumes_from:
- tmp
tmp:
image: busybox
command: chmod -R 777 /var/run/redis
volumes:
- /var/run/redis
redis:
image: redis
command: redis-server /etc/redis.conf
volumes:
- ${PORTAINER_GIT_DIR}/docker/redis.conf:/etc/redis.conf
volumes_from:
- tmp
networks:
default:
external:
name: pathogen

View File

@@ -4,6 +4,7 @@ from os import urandom
from os.path import exists from os.path import exists
from string import digits from string import digits
import redis
from redis import StrictRedis from redis import StrictRedis
# List of errors ZNC can give us # List of errors ZNC can give us
@@ -121,7 +122,7 @@ def initConf():
def initMain(): def initMain():
global r, g global r, g, x
initConf() initConf()
r = StrictRedis( r = StrictRedis(
unix_socket_path=config["RedisSocket"], db=config["RedisDBEphemeral"] # noqa unix_socket_path=config["RedisSocket"], db=config["RedisDBEphemeral"] # noqa
@@ -129,3 +130,5 @@ def initMain():
g = StrictRedis( g = StrictRedis(
unix_socket_path=config["RedisSocket"], db=config["RedisDBPersistent"] unix_socket_path=config["RedisSocket"], db=config["RedisDBPersistent"]
) # noqa ) # noqa
# SSDB for communication with Monolith
x = redis.from_url("redis://ssdb:1289", db=0)

View File

@@ -67,7 +67,7 @@ def parsemeta(numName, c):
def queue_message(c): def queue_message(c):
message = json.dumps(c) message = json.dumps(c)
main.g.sadd("queue", message) main.x.lpush("queue", message)
def event( def event(

View File

@@ -1,9 +0,0 @@
wheel
pre-commit
twisted
pyOpenSSL
redis
pyYaML
service_identity
siphashc
Klein

View File

@@ -98,7 +98,6 @@ class IRCRelayFactory(ReconnectingClientFactory):
self.relayCommands, self.user, self.stage2 = relayCommands, user, stage2 self.relayCommands, self.user, self.stage2 = relayCommands, user, stage2
def buildProtocol(self, addr): def buildProtocol(self, addr):
entry = IRCRelay(self.num, self.relayCommands, self.user, self.stage2) entry = IRCRelay(self.num, self.relayCommands, self.user, self.stage2)
self.client = entry self.client = entry

View File

@@ -1,8 +1,10 @@
import asyncio import asyncio
from os import getenv from os import getenv
from time import sleep
import uvloop import uvloop
import db
import util import util
from sources.ch4 import Chan4 from sources.ch4 import Chan4
from sources.dis import DiscordClient from sources.dis import DiscordClient
@@ -21,14 +23,28 @@ if not token:
async def main(loop): async def main(loop):
if "ingest" in modules_enabled:
ingest = Ingest()
loop.create_task(ingest.run())
if "dis" in modules_enabled:
client = DiscordClient() client = DiscordClient()
loop.create_task(client.start(token)) loop.create_task(client.start(token))
if "ch4" in modules_enabled:
chan = Chan4() chan = Chan4()
loop.create_task(chan.run()) loop.create_task(chan.run())
ingest = Ingest()
loop.create_task(ingest.run()) created = False
while not created:
try:
db.create_index(db.api_client)
created = True
except Exception as e:
print(f"Error creating index: {e}")
sleep(1) # Block the thread, just wait for the DB
db.update_schema()
loop = asyncio.get_event_loop() loop = asyncio.get_event_loop()

0
oom Normal file
View File

0
perf/__init__.py Normal file
View File

134
perf/throttle.py Normal file
View File

@@ -0,0 +1,134 @@
import asyncio
import time
import psutil
import util
class DynamicThrottle(object):
def __init__(self, **kwargs):
self.target_cpu_usage = kwargs.get("target_cpu_usage", 50)
self.sleep_interval = 0.0
self.sleep_increment = kwargs.get("sleep_increment", 0.01)
self.sleep_decrement = kwargs.get("sleep_decrement", 0.01)
self.sleep_max = kwargs.get("sleep_max", 0.1)
self.sleep_min = kwargs.get("sleep_min", 0.01)
self.psutil_interval = kwargs.get("psutil_interval", 0.1)
self.log = kwargs.get("log", util.get_logger(self.__class__.__name__))
self.consecutive_increments = 0
self.consecutive_decrements = 0
self.consecutive_divisor = kwargs.get("consecutive_divisor", 1)
self.last_was_increment = kwargs.get("start_increment", True)
if kwargs.get("use_async"):
self.wait = self.dynamic_throttle_async
else:
self.wait = self.dynamic_throttle
async def dynamic_throttle_async(self):
"""
Dynamically sleeps before a request if CPU usage is above our target.
"""
current_cpu_usage = psutil.cpu_percent(interval=self.psutil_interval)
if current_cpu_usage > self.target_cpu_usage:
if self.last_was_increment:
self.consecutive_increments += 1
# self.log.debug(f"High CPU consecutive increments: {self.consecutive_increments}")
else:
self.consecutive_increments = 0 # ?
self.consecutive_decrements = 0 # ?
# self.log.debug(f"High CPU alert reset.")
self.sleep_interval += self.sleep_increment * (
max(1, self.consecutive_increments) / self.consecutive_divisor
)
self.last_was_increment = True
if self.sleep_interval > self.sleep_max:
self.sleep_interval = self.sleep_max
# self.log.debug(f"High CPU, but not increasing above {self.sleep_max:.3f}s")
# self.log.debug(
# f"High CPU: {current_cpu_usage}% > {self.target_cpu_usage}%, "
# f"=> sleep {self.sleep_interval:.3f}s"
# )
elif current_cpu_usage < self.target_cpu_usage:
if not self.last_was_increment:
self.consecutive_decrements += 1
# self.log.debug(f"Low CPU consecutive decrements: {self.consecutive_decrements}")
else:
self.consecutive_decrements = 0 # ?
self.consecutive_increments = 0 # ?
# self.log.debug(f"Low CPU alert reset.")
self.sleep_interval -= self.sleep_decrement * (
max(1, self.consecutive_decrements) / self.consecutive_divisor
)
self.last_was_increment = False
if self.sleep_interval < self.sleep_min:
self.sleep_interval = self.sleep_min
# self.log.debug(f"Low CPU, but not decreasing below {self.sleep_min:.3f}s")
# self.log.debug(
# f"Low CPU: {current_cpu_usage}% < {self.target_cpu_usage}%, "
# f"=> sleep {self.sleep_interval:.3f}s"
# )
if self.sleep_interval > 0:
await asyncio.sleep(self.sleep_interval)
return self.sleep_interval
return 0.0
def dynamic_throttle(self):
"""
Dynamically sleeps before a request if CPU usage is above our target.
"""
current_cpu_usage = psutil.cpu_percent(interval=self.psutil_interval)
if current_cpu_usage > self.target_cpu_usage:
if self.last_was_increment:
self.consecutive_increments += 1
# self.log.debug(f"High CPU consecutive increments: {self.consecutive_increments}")
else:
self.consecutive_increments = 0 # ?
self.consecutive_decrements = 0 # ?
# self.log.debug(f"High CPU alert reset.")
self.sleep_interval += self.sleep_increment * (
max(1, self.consecutive_increments) / self.consecutive_divisor
)
self.last_was_increment = True
if self.sleep_interval > self.sleep_max:
self.sleep_interval = self.sleep_max
# self.log.debug(f"High CPU, but not increasing above {self.sleep_max:.3f}s")
# self.log.debug(
# f"High CPU: {current_cpu_usage}% > {self.target_cpu_usage}%, "
# f"=> sleep {self.sleep_interval:.3f}s"
# )
elif current_cpu_usage < self.target_cpu_usage:
if not self.last_was_increment:
self.consecutive_decrements += 1
# self.log.debug(f"Low CPU consecutive decrements: {self.consecutive_decrements}")
else:
self.consecutive_decrements = 0 # ?
self.consecutive_increments = 0 # ?
# self.log.debug(f"Low CPU alert reset.")
self.sleep_interval -= self.sleep_decrement * (
max(1, self.consecutive_decrements) / self.consecutive_divisor
)
self.last_was_increment = False
if self.sleep_interval < self.sleep_min:
self.sleep_interval = self.sleep_min
# self.log.debug(f"Low CPU, but not decreasing below {self.sleep_min:.3f}s")
# self.log.debug(
# f"Low CPU: {current_cpu_usage}% < {self.target_cpu_usage}%, "
# f"=> sleep {self.sleep_interval:.3f}s"
# )
if self.sleep_interval > 0:
time.sleep(self.sleep_interval)
return self.sleep_interval
return 0.0

1
processing/ohlc.py Normal file
View File

@@ -0,0 +1 @@
# Resample 1Min into 5Min, 15Min, 30Min, 1H, 4H, 1D, 1W, 1M, 1Y

View File

@@ -14,7 +14,7 @@ from concurrent.futures import ProcessPoolExecutor
# For timestamp processing # For timestamp processing
from datetime import datetime from datetime import datetime
from math import ceil from os import getenv
import orjson import orjson
import regex import regex
@@ -34,7 +34,6 @@ from gensim.parsing.preprocessing import ( # stem_text,
strip_short, strip_short,
strip_tags, strip_tags,
) )
from numpy import array_split
from polyglot.detect.base import logger as polyglot_logger from polyglot.detect.base import logger as polyglot_logger
# For NLP # For NLP
@@ -48,9 +47,21 @@ from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
import db import db
import util import util
# For throttling
from perf.throttle import DynamicThrottle
# 4chan schema # 4chan schema
from schemas.ch4_s import ATTRMAP from schemas.ch4_s import ATTRMAP
trues = ("true", "1", "t", True)
KEYNAME = "queue"
MONOLITH_PROCESS_PERFSTATS = (
getenv("MONOLITH_PROCESS_PERFSTATS", "false").lower() in trues
)
TARGET_CPU_USAGE = float(os.getenv("MONOLITH_PROCESS_TARGET_CPU_USAGE", 50.0))
CUSTOM_FILTERS = [ CUSTOM_FILTERS = [
lambda x: x.lower(), lambda x: x.lower(),
strip_tags, # strip_tags, #
@@ -81,6 +92,19 @@ CPU_THREADS = int(os.getenv("MONOLITH_PROCESS_THREADS", os.cpu_count()))
p = ProcessPoolExecutor(CPU_THREADS) p = ProcessPoolExecutor(CPU_THREADS)
throttle = DynamicThrottle(
target_cpu_usage=TARGET_CPU_USAGE,
sleep_increment=0.02,
sleep_decrement=0.01,
sleep_max=0.5,
sleep_min=0,
psutil_interval=0.1,
consecutive_divisor=2,
log=log,
start_increment=True,
use_async=False,
)
def get_hash_key(): def get_hash_key():
hash_key = db.r.get("hashing_key") hash_key = db.r.get("hashing_key")
@@ -99,38 +123,44 @@ hash_key = get_hash_key()
@asyncio.coroutine @asyncio.coroutine
async def spawn_processing_threads(data): async def spawn_processing_threads(chunk, length):
len_data = len(data) log.debug(f"Spawning processing threads for chunk {chunk} of length {length}")
loop = asyncio.get_event_loop() loop = asyncio.get_event_loop()
tasks = [] tasks = []
if len(data) < CPU_THREADS * 100: if length < CPU_THREADS * 100:
split_data = [data] cores = 1
chunk_size = length
else: else:
msg_per_core = int(len(data) / CPU_THREADS) cores = CPU_THREADS
split_data = array_split(data, ceil(len(data) / msg_per_core)) chunk_size = int(length / cores)
for index, split in enumerate(split_data):
log.debug(f"Delegating processing of {len(split)} messages to thread {index}") for index in range(cores):
task = loop.run_in_executor(p, process_data, split) log.debug(
f"[{chunk}/{index}] Delegating {chunk_size} messages to thread {index}"
)
task = loop.run_in_executor(p, process_data, chunk, index, chunk_size)
tasks.append(task) tasks.append(task)
results = [await task for task in tasks] results = [await task for task in tasks]
log.debug(
(
f"Results from processing of {len_data} messages in "
f"{len(split_data)} threads: {len(results)}"
)
)
# Join the results back from the split list # Join the results back from the split list
flat_list = [item for sublist in results for item in sublist] flat_list = [item for sublist in results for item in sublist]
await db.store_kafka_batch(flat_list) total_messages = len(flat_list)
log.info(
(
f"[{chunk}/{index}] Results from processing of {length} messages in "
f"{cores} threads: {len(flat_list)}"
)
)
await db.store_batch(flat_list)
return total_messages
# log.debug(f"Finished processing {len_data} messages") # log.debug(f"Finished processing {len_data} messages")
def process_data(data): def process_data(chunk, index, chunk_size):
log.debug(f"[{chunk}/{index}] Processing {chunk_size} messages")
to_store = [] to_store = []
sentiment_time = 0.0 sentiment_time = 0.0
@@ -139,15 +169,38 @@ def process_data(data):
date_time = 0.0 date_time = 0.0
nlp_time = 0.0 nlp_time = 0.0
normalise_time = 0.0 normalise_time = 0.0
hash_time = 0.0
normal2_time = 0.0 normal2_time = 0.0
soup_time = 0.0 soup_time = 0.0
sleep_time = 0.0
total_time = 0.0 total_time = 0.0
# Initialise sentiment analyser # Initialise sentiment analyser
analyzer = SentimentIntensityAnalyzer() analyzer = SentimentIntensityAnalyzer()
for msg in data:
for msg_index in range(chunk_size):
# Print percentage of msg_index relative to chunk_size
if msg_index % 10 == 0:
percentage_done = (msg_index / chunk_size) * 100
log.debug(
f"[{chunk}/{index}] {percentage_done:.2f}% done ({msg_index}/{chunk_size})"
)
msg = db.r.rpop(KEYNAME)
if not msg:
return
msg = orjson.loads(msg)
if msg["src"] == "4ch":
board = msg["net"]
thread = msg["channel"]
redis_key = (
f"cache.{board}.{thread}.{msg['no']}.{msg['resto']}.{msg['now']}"
)
key_content = db.r.get(redis_key)
if key_content is not None:
continue
db.r.set(redis_key, "1")
total_start = time.process_time() total_start = time.process_time()
# normalise fields # normalise fields
start = time.process_time() start = time.process_time()
@@ -173,24 +226,6 @@ def process_data(data):
board = msg["net"] board = msg["net"]
thread = msg["channel"] thread = msg["channel"]
# Calculate hash for post
start = time.process_time()
post_normalised = orjson.dumps(msg, option=orjson.OPT_SORT_KEYS)
hash = siphash(hash_key, post_normalised)
hash = str(hash)
redis_key = f"cache.{board}.{thread}.{msg['no']}"
key_content = db.r.get(redis_key)
if key_content:
key_content = key_content.decode("ascii")
if key_content == hash:
# This deletes the message since the append at the end won't be hit
continue
else:
msg["type"] = "update"
db.r.set(redis_key, hash)
time_took = (time.process_time() - start) * 1000
hash_time += time_took
start = time.process_time() start = time.process_time()
for key2, value in list(msg.items()): for key2, value in list(msg.items()):
if key2 in ATTRMAP: if key2 in ATTRMAP:
@@ -208,9 +243,10 @@ def process_data(data):
old_ts = datetime.strptime(old_time, "%m/%d/%y(%a)%H:%M:%S") old_ts = datetime.strptime(old_time, "%m/%d/%y(%a)%H:%M:%S")
else: else:
old_ts = datetime.strptime(old_time, "%m/%d/%y(%a)%H:%M") old_ts = datetime.strptime(old_time, "%m/%d/%y(%a)%H:%M")
# new_ts = old_ts.isoformat() # iso_ts = old_ts.isoformat()
new_ts = int(old_ts.timestamp()) new_ts = int(old_ts.timestamp())
msg["ts"] = new_ts msg["ts"] = new_ts
# msg["iso"] = iso_ts
else: else:
raise Exception("No TS in msg") raise Exception("No TS in msg")
time_took = (time.process_time() - start) * 1000 time_took = (time.process_time() - start) * 1000
@@ -236,7 +272,7 @@ def process_data(data):
msg["lang_code"] = lang_code msg["lang_code"] = lang_code
msg["lang_name"] = lang_name msg["lang_name"] = lang_name
except cld2_error as e: except cld2_error as e:
log.error(f"Error detecting language: {e}") log.error(f"[{chunk}/{index}] Error detecting language: {e}")
# So below block doesn't fail # So below block doesn't fail
lang_code = None lang_code = None
time_took = (time.process_time() - start) * 1000 time_took = (time.process_time() - start) * 1000
@@ -255,7 +291,7 @@ def process_data(data):
# Tokens # Tokens
start = time.process_time() start = time.process_time()
tokens = preprocess_string(msg["msg"], CUSTOM_FILTERS) tokens = preprocess_string(msg["msg"], CUSTOM_FILTERS)
msg["tokens"] = tokens msg["tokens"] = str(tokens)
# n = nlp(msg["msg"]) # n = nlp(msg["msg"])
# for tag in TAGS: # for tag in TAGS:
# tag_name = tag.lower() # tag_name = tag.lower()
@@ -267,17 +303,25 @@ def process_data(data):
# Add the mutated message to the return buffer # Add the mutated message to the return buffer
to_store.append(msg) to_store.append(msg)
total_time += (time.process_time() - total_start) * 1000 total_time += (time.process_time() - total_start) * 1000
log.debug("=====================================")
log.debug(f"Sentiment: {sentiment_time}") # Dynamic throttling to reduce CPU usage
log.debug(f"Regex: {regex_time}") if msg_index % 5 == 0:
log.debug(f"Polyglot: {polyglot_time}") sleep_time += throttle.wait()
log.debug(f"Date: {date_time}")
log.debug(f"NLP: {nlp_time}") if MONOLITH_PROCESS_PERFSTATS:
log.debug(f"Normalise: {normalise_time}") log.info("=====================================")
log.debug(f"Hash: {hash_time}") log.info(f"Chunk: {chunk}")
log.debug(f"Normal2: {normal2_time}") log.info(f"Index: {index}")
log.debug(f"Soup: {soup_time}") log.info(f"Sentiment: {sentiment_time}")
log.debug(f"Total: {total_time}") log.info(f"Regex: {regex_time}")
log.debug("=====================================") log.info(f"Polyglot: {polyglot_time}")
log.info(f"Date: {date_time}")
log.info(f"NLP: {nlp_time}")
log.info(f"Normalise: {normalise_time}")
log.info(f"Normal2: {normal2_time}")
log.info(f"Soup: {soup_time}")
log.info(f"Total: {total_time}")
log.info(f"Throttling: {sleep_time}")
log.info("=====================================")
return to_store return to_store

View File

@@ -1,14 +1,13 @@
wheel wheel
pre-commit
beautifulsoup4 beautifulsoup4
redis redis
siphashc siphashc
aiohttp[speedups] aiohttp[speedups]
python-dotenv python-dotenv
#manticoresearch manticoresearch
numpy numpy
aioredis[hiredis] aioredis[hiredis]
aiokafka #aiokafka
vaderSentiment vaderSentiment
polyglot polyglot
pyicu pyicu
@@ -21,4 +20,10 @@ gensim
python-Levenshtein python-Levenshtein
orjson orjson
uvloop uvloop
numba elasticsearch[async]
msgpack
# flpc
psutil
pymexc
websockets
aiomysql

186
rts.py Normal file
View File

@@ -0,0 +1,186 @@
import asyncio
import logging
from os import getenv
import orjson
import websockets
import db
# Logger setup
logging.basicConfig(level=logging.INFO)
log = logging.getLogger("RTS")
# Environment variables
MONOLITH_RTS_MEXC_API_ACCESS_KEY = getenv("MONOLITH_RTS_MEXC_API_ACCESS_KEY", None)
MONOLITH_RTS_MEXC_API_SECRET_KEY = getenv("MONOLITH_RTS_MEXC_API_SECRET_KEY", None)
# WebSocket endpoint
MEXC_WS_URL = "wss://wbs.mexc.com/ws"
{
"d": {
"e": "spot@public.kline.v3.api",
"k": {
"t": 1737901140, # TS
"o": "684.4", # Open
"c": "684.5", # Close
"h": "684.5", # High
"l": "684.4", # Low
"v": "0.173", # Volume of the base
"a": "118.41", # Volume of the quote (Quantity)
"T": 1737901200, # ?
"i": "Min1", # ?
},
},
"c": "spot@public.kline.v3.api@BNBUSDT@Min1", # Channel
"t": 1737901159239,
"s": "BNBUSDT", # Symbol
}
# Scan DB for last endtime (T)
# Request Kline data from last endtime (T) to now
# Check Server Time
# Response
# {
# "serverTime" : 1645539742000
# }
# GET /api/v3/time
# Weight(IP): 1
# Parameter:
# NONE
# Kline/Candlestick Data
# Response
# [
# [
# 1640804880000,
# "47482.36",
# "47482.36",
# "47416.57",
# "47436.1",
# "3.550717",
# 1640804940000,
# "168387.3"
# ]
# ]
# GET /api/v3/klines
# Weight(IP): 1
# Kline/candlestick bars for a symbol. Klines are uniquely identified by their open time.
# Parameters:
# Name Type Mandatory Description
# symbol string YES
# interval ENUM YES ENUM: Kline Interval
# startTime long NO
# endTime long NO
# limit integer NO Default 500; max 1000.
# Scrub function:
# For each record, ensure there are no time gaps
# When the 1m window goes over, the next t is always the last T.
# Check for gaps, and request all klines between those gaps to ensure a full DB, even with restarts.
# Idle jitter function - compare our time with server time.
# Compare ts with our time and print jitter. Add jitter warning to log and OHLC.
# High jitter may prevent us from getting the correct data for trading.
async def mex_handle(data):
message = orjson.loads(data)
# print(orjson.dumps(message, option=orjson.OPT_INDENT_2).decode("utf-8"))
if "code" in message:
if message["code"] == 0:
log.info("Control message received")
return
symbol = message["s"]
open = message["d"]["k"]["o"]
close = message["d"]["k"]["c"]
high = message["d"]["k"]["h"]
low = message["d"]["k"]["l"]
volume_base = message["d"]["k"]["v"] # ERROR IN API DOCS
volume_quote = message["d"]["k"]["a"] # > a bigDecimal volume
interval = message["d"]["k"]["i"]
start_time = message["d"]["k"]["t"] # > t long stratTime
end_time = message["d"]["k"]["T"] # > T long endTime
event_time = message["t"] # t long eventTime
index = f"mex_ohlc_{symbol.lower()}"
reformatted = {
"s": symbol,
"o": float(open),
"c": float(close),
"h": float(high),
"l": float(low),
"v": float(volume_base),
"a": float(volume_quote),
"i": interval,
"t": int(start_time),
"t2": int(end_time),
"ts": int(event_time),
}
await db.rts_store_message(index, reformatted)
print(index)
print(orjson.dumps(reformatted, option=orjson.OPT_INDENT_2).decode("utf-8"))
print()
# Kline WebSocket handler
async def mex_main():
await db.init_mysql_pool()
async with websockets.connect(MEXC_WS_URL) as websocket:
log.info("WebSocket connected")
# Define symbols and intervals
symbols = ["BTCUSDT"] # Add more symbols as needed
interval = "Min1" # Kline interval
# Prepare subscription requests for Kline streams
# Request: spot@public.kline.v3.api@<symbol>@<interval>
subscriptions = [
f"spot@public.kline.v3.api@{symbol}@{interval}" for symbol in symbols
]
# Send subscription requests
subscribe_request = {
"method": "SUBSCRIPTION",
"params": subscriptions,
# "id": 1,
}
await websocket.send(orjson.dumps(subscribe_request).decode("utf-8"))
log.info(f"Subscribed to: {subscriptions}")
# Listen for messages
while True:
try:
message = await websocket.recv()
await mex_handle(message)
except websockets.exceptions.ConnectionClosed as e:
log.error(f"WebSocket connection closed: {e}")
break
# Entry point
if __name__ == "__main__":
try:
asyncio.run(mex_main())
except KeyboardInterrupt:
log.info("RTS process terminated.")

View File

@@ -129,8 +129,19 @@ schema_main = {
"version_sentiment": "int", "version_sentiment": "int",
# 1, 2 # 1, 2
"version_tokens": "int", "version_tokens": "int",
# en, ru
"lang_code": "string indexed attribute",
"lang_name": "text",
"match_ts": "timestamp",
"batch_id": "bigint",
"rule_id": "bigint",
"index": "string indexed attribute",
"meta": "text",
# "iso": "string indexed attribute",
} }
schema_rule_storage = schema_main
schema_meta = { schema_meta = {
"id": "bigint", "id": "bigint",
# 393598265, #main, Rust Programmer's Club # 393598265, #main, Rust Programmer's Club

View File

@@ -10,6 +10,7 @@ from numpy import array_split
import db import db
import util import util
from perf.throttle import DynamicThrottle
# CONFIGURATION # # CONFIGURATION #
@@ -25,6 +26,12 @@ CRAWL_DELAY = int(getenv("MONOLITH_CH4_CRAWL_DELAY", 5))
# Semaphore value ? # Semaphore value ?
THREADS_SEMAPHORE = int(getenv("MONOLITH_CH4_THREADS_SEMAPHORE", 1000)) THREADS_SEMAPHORE = int(getenv("MONOLITH_CH4_THREADS_SEMAPHORE", 1000))
# Target CPU usage percentage
TARGET_CPU_USAGE = float(getenv("MONOLITH_CH4_TARGET_CPU_USAGE", 50.0))
# Boards to crawl
BOARDS = getenv("MONOLITH_CH4_BOARDS", "").split(",")
# CONFIGURATION END # # CONFIGURATION END #
@@ -37,6 +44,19 @@ class Chan4(object):
name = self.__class__.__name__ name = self.__class__.__name__
self.log = util.get_logger(name) self.log = util.get_logger(name)
self.throttle = DynamicThrottle(
target_cpu_usage=TARGET_CPU_USAGE,
sleep_increment=0.01,
sleep_decrement=0.01,
sleep_max=0.1,
sleep_min=0,
psutil_interval=0.1,
log=self.log,
start_increment=False,
use_async=True,
)
self.wait = self.throttle.wait
self.api_endpoint = "https://a.4cdn.org" self.api_endpoint = "https://a.4cdn.org"
# self.boards = ["out", "g", "a", "3", "pol"] # # self.boards = ["out", "g", "a", "3", "pol"] #
self.boards = [] self.boards = []
@@ -53,12 +73,14 @@ class Chan4(object):
self.log.debug(f"Created new hash key: {self.hash_key}") self.log.debug(f"Created new hash key: {self.hash_key}")
db.r.set("hashing_key", self.hash_key) db.r.set("hashing_key", self.hash_key)
else: else:
self.hash_key = self.hash_key.decode("ascii") self.hash_key = self.hash_key.decode("ascii")
self.log.debug(f"Decoded hash key: {self.hash_key}") self.log.debug(f"Decoded hash key: {self.hash_key}")
async def run(self): async def run(self):
if "ALL" in BOARDS:
await self.get_board_list() await self.get_board_list()
else:
self.boards = BOARDS
while True: while True:
await self.get_thread_lists(self.boards) await self.get_thread_lists(self.boards)
await asyncio.sleep(CRAWL_DELAY) await asyncio.sleep(CRAWL_DELAY)
@@ -71,29 +93,37 @@ class Chan4(object):
for board in response["boards"]: for board in response["boards"]:
self.boards.append(board["board"]) self.boards.append(board["board"])
self.log.debug(f"Got boards: {self.boards}") self.log.debug(f"Got boards: {self.boards}")
# await self.dynamic_throttle()
# TODO
async def get_thread_lists(self, boards): async def get_thread_lists(self, boards):
# self.log.debug(f"Getting thread list for {boards}") # self.log.debug(f"Getting thread list for {boards}")
board_urls = {board: f"{board}/catalog.json" for board in boards} board_urls = {board: f"{board}/threads.json" for board in boards}
responses = await self.api_call(board_urls) responses = await self.api_call(board_urls)
to_get = [] to_get = []
flat_map = [board for board, thread in responses] flat_map = [board for board, thread in responses]
self.log.debug(f"Got thread list for {flat_map}: {len(responses)}") self.log.debug(f"Got thread list for {len(responses)} boards: {flat_map}")
for mapped, response in responses: for board, response in responses:
if not response: if not response:
continue continue
for page in response: for page in response:
for threads in page["threads"]: for threads in page["threads"]:
no = threads["no"] no = threads["no"]
to_get.append((mapped, no)) to_get.append((board, no))
# await self.dynamic_throttle()
# TODO
if not to_get: if not to_get:
return return
self.log.debug(f"Got {len(to_get)} threads to fetch")
split_threads = array_split(to_get, ceil(len(to_get) / THREADS_CONCURRENT)) split_threads = array_split(to_get, ceil(len(to_get) / THREADS_CONCURRENT))
for threads in split_threads: self.log.debug(f"Split threads into {len(split_threads)} series")
await self.get_threads_content(threads) for index, thr in enumerate(split_threads):
self.log.debug(f"Series {index} - getting {len(thr)} threads")
await self.get_threads_content(thr)
# await self.dynamic_throttle()
# TODO
await asyncio.sleep(THREADS_DELAY) await asyncio.sleep(THREADS_DELAY)
# await self.get_threads_content(to_get)
def take_items(self, dict_list, n): def take_items(self, dict_list, n):
i = 0 i = 0
@@ -123,6 +153,8 @@ class Chan4(object):
continue continue
board, thread = mapped board, thread = mapped
all_posts[mapped] = response["posts"] all_posts[mapped] = response["posts"]
# await self.dynamic_throttle()
# TODO
if not all_posts: if not all_posts:
return return
@@ -132,14 +164,16 @@ class Chan4(object):
to_store = [] to_store = []
for key, post_list in posts.items(): for key, post_list in posts.items():
board, thread = key board, thread = key
for index, post in enumerate(post_list): for post in post_list:
posts[key][index]["type"] = "msg" post["type"] = "msg"
posts[key][index]["src"] = "4ch" post["src"] = "4ch"
posts[key][index]["net"] = board post["net"] = board
posts[key][index]["channel"] = thread post["channel"] = thread
to_store.append(posts[key][index]) to_store.append(post)
# await self.dynamic_throttle()
# TODO
if to_store: if to_store:
await db.queue_message_bulk(to_store) await db.queue_message_bulk(to_store)
@@ -154,6 +188,7 @@ class Chan4(object):
async def bound_fetch(self, sem, url, session, mapped): async def bound_fetch(self, sem, url, session, mapped):
# Getter function with semaphore. # Getter function with semaphore.
async with sem: async with sem:
await self.wait()
try: try:
return await self.fetch(url, session, mapped) return await self.fetch(url, session, mapped)
except: # noqa except: # noqa

View File

@@ -1,8 +1,6 @@
import asyncio import asyncio
from os import getenv from os import getenv
import orjson
import db import db
import util import util
from processing import process from processing import process
@@ -13,13 +11,22 @@ KEYNAME = "queue"
CHUNK_SIZE = int(getenv("MONOLITH_INGEST_CHUNK_SIZE", "900")) CHUNK_SIZE = int(getenv("MONOLITH_INGEST_CHUNK_SIZE", "900"))
ITER_DELAY = float(getenv("MONOLITH_INGEST_ITER_DELAY", "0.5")) ITER_DELAY = float(getenv("MONOLITH_INGEST_ITER_DELAY", "0.5"))
INGEST_INCREASE_BELOW = int(getenv("MONOLITH_INGEST_INCREASE_BELOW", "2500"))
INGEST_DECREASE_ABOVE = int(getenv("MONOLITH_INGEST_DECREASE_ABOVE", "10000"))
INGEST_INCREASE_BY = int(getenv("MONOLITH_INGEST_INCREASE_BY", "100"))
INGEST_DECREASE_BY = int(getenv("MONOLITH_INGEST_DECREASE_BY", "100"))
log = util.get_logger("ingest") log = util.get_logger("ingest")
INGEST_MAX = int(getenv("MONOLITH_INGEST_MAX", "1000000"))
INGEST_MIN = int(getenv("MONOLITH_INGEST_MIN", "100"))
class Ingest(object): class Ingest(object):
def __init__(self): def __init__(self):
name = self.__class__.__name__ name = self.__class__.__name__
self.log = util.get_logger(name) self.log = util.get_logger(name)
self.current_chunk = 0
self.log.info( self.log.info(
( (
"Starting ingest handler for chunk size of " "Starting ingest handler for chunk size of "
@@ -30,17 +37,45 @@ class Ingest(object):
async def run(self): async def run(self):
while True: while True:
await self.get_chunk() await self.get_chunk()
self.log.debug(f"Ingest chunk {self.current_chunk} complete")
self.current_chunk += 1
await asyncio.sleep(ITER_DELAY) await asyncio.sleep(ITER_DELAY)
async def get_chunk(self): async def get_chunk(self):
items = [] global CHUNK_SIZE
# for source in SOURCES: length = await db.ar.llen(KEYNAME)
# key = f"{KEYPREFIX}{source}" if length > CHUNK_SIZE:
chunk = await db.ar.spop(KEYNAME, CHUNK_SIZE) length = CHUNK_SIZE
if not chunk: if not length:
return return
for item in chunk: ingested = await process.spawn_processing_threads(self.current_chunk, length)
item = orjson.loads(item)
items.append(item) if ingested < INGEST_INCREASE_BELOW:
if items: if CHUNK_SIZE + INGEST_INCREASE_BY < INGEST_MAX:
await process.spawn_processing_threads(items) self.log.debug(
(
f"Increasing chunk size to "
f"{CHUNK_SIZE + INGEST_INCREASE_BY} "
f"due to low ingestion ({ingested})"
)
)
CHUNK_SIZE += INGEST_INCREASE_BY
else:
log.debug(
f"Chunk size ({CHUNK_SIZE}) at maximum, not increasing above: {INGEST_MAX}"
)
elif ingested > INGEST_DECREASE_ABOVE:
if CHUNK_SIZE - INGEST_DECREASE_BY > INGEST_MIN:
self.log.debug(
(
f"Decreasing chunk size to "
f"{CHUNK_SIZE - INGEST_DECREASE_BY}"
f"due to high ingestion ({ingested})"
)
)
CHUNK_SIZE -= INGEST_DECREASE_BY
else:
log.debug(
f"Chunk size ({CHUNK_SIZE}) at minimum, not decreasing below: {INGEST_MIN}"
)

View File

@@ -43,7 +43,6 @@ class ColoredFormatter(logging.Formatter):
def get_logger(name): def get_logger(name):
# Define the logging format # Define the logging format
FORMAT = "%(asctime)s %(levelname)18s $BOLD%(name)13s$RESET - %(message)s" FORMAT = "%(asctime)s %(levelname)18s $BOLD%(name)13s$RESET - %(message)s"
COLOR_FORMAT = formatter_message(FORMAT, True) COLOR_FORMAT = formatter_message(FORMAT, True)