Compare commits

..

320 Commits

Author SHA1 Message Date
f774f4c2d2 Add some environment variables to control debug output 2022-10-21 07:20:30 +01:00
e32b330ef4 Switch to SSDB for message queueing 2022-10-21 11:53:29 +01:00
8c596ec516 Update gitignore 2022-10-21 11:53:28 +01:00
ab5e85c5c6 Begin switching away from Redis 2022-10-21 11:14:51 +01:00
7482064aee Clean up docker environment 2022-10-19 16:45:18 +01:00
dccbc6b158 Remove dependencies on infra stuff 2022-10-11 11:16:24 +01:00
8cc1a48a25 Separate out infra in production 2022-10-11 11:04:03 +01:00
83e8fb0e38 Remove event log file 2022-10-05 12:52:30 +01:00
64cf7d0d4a Set Superset directory relative to Portainer Git root 2022-10-04 21:43:16 +01:00
ae12e37e9b Set Superset path properly 2022-10-04 21:41:22 +01:00
5bb9bd3998 Use local storage in production 2022-10-04 21:33:08 +01:00
d96dc573c5 Update production compose 2022-10-04 21:32:14 +01:00
aea1c7faf6 Use one image for all the Druid services 2022-10-04 21:30:17 +01:00
2d6b3bb090 Set Superset volume relative to docker folder 2022-10-04 20:54:38 +01:00
83ffd6517c Switch quickstart setting to nano 2022-10-04 20:37:02 +01:00
8465e8fb77 Set Superset env file relative to docker directory 2022-10-04 20:30:14 +01:00
d7d9958e54 Add persistent Redis data store and copy over Druid config to production 2022-10-04 20:26:58 +01:00
464c831686 Add Apache Superset and fix Druid resource usage 2022-10-04 20:17:04 +01:00
5ad6cd0354 Add postgres config to Metabase 2022-10-02 14:29:40 +01:00
06e80a9759 Time stuff and switch to gensim for tokenisation 2022-10-01 14:46:45 +01:00
5c91f1af87 Remove commented debug code 2022-09-30 07:22:22 +01:00
02ff44a6f5 Use only one Redis key for the queue to make chunk size more precise for thread allocation 2022-09-30 07:22:22 +01:00
a5d29606e9 Remove ujson 2022-09-30 15:30:34 +01:00
6b549dee6a Reformat 2022-09-30 15:23:00 +01:00
2dd2360b4f Add config file to Turnilo 2022-09-27 08:30:28 +01:00
a2f88e29e6 Implement uvloop 2022-09-23 07:20:30 +01:00
f0df3e80fd Print Ingest settings on start 2022-09-23 08:32:29 +01:00
09fc63d0ad Make debug output cleaner 2022-09-22 17:39:29 +01:00
e9ae499ce8 Fix indexer options 2022-09-22 17:39:18 +01:00
b6f8dabccd Fix Java variable in indexer parameters 2022-09-22 08:41:59 +01:00
395dfb1e7b Decrease memory requirements further and switch Kafka image 2022-09-21 21:11:13 +01:00
ee79762c73 Set Kafka max heap size 2022-09-21 20:26:05 +01:00
e58b9960b2 Set max memory for Metabase 2022-09-21 14:39:11 +01:00
4a60dec964 Remove debugging code and fix regex substitution 2022-09-21 12:48:54 +01:00
9ee55a720b Change dev container names 2022-09-21 12:09:18 +01:00
799286ca76 Change prod container names 2022-09-21 12:08:29 +01:00
0e62a5b4b8 Remove prod compose comment 2022-09-21 12:04:54 +01:00
5ebae02bf2 Remove commented code for debugging 2022-09-21 10:02:05 +01:00
ced3a251b2 Normalise fields in processing and remove invalid characters 2022-09-21 10:01:12 +01:00
740f93208b Make production volumes point to external storage 2022-09-21 10:00:48 +01:00
2763e52e6b Don't muddle up the topics when sending Kafka batches 2022-09-20 23:03:02 +01:00
869af451e5 Document new PROCESS_THREADS setting in example file 2022-09-20 22:43:04 +01:00
31c58dd85b Make CPU threads configurable 2022-09-20 22:29:13 +01:00
40a0c2d22e Make performance settings configurable 2022-09-20 22:22:13 +01:00
9f4d4784af Set memory size to 2.5GB 2022-09-08 07:20:30 +01:00
72c22ed91e Update DirectMemorySize to be 1.5GB 2022-09-19 21:51:07 +01:00
ce62a84cec Make MaxDirectMemory 0.5*cores 2022-09-19 19:15:57 +01:00
41b5ca6afd Make max memory size 512m 2022-09-19 19:10:33 +01:00
7db3504251 Further decrease Druid memory requirements 2022-09-19 17:07:15 +01:00
1284700e61 Bump production Kafka healthcheck timeout 2022-09-19 11:18:52 +01:00
a9803fc79c Decrease production Druid max memory size 2022-09-19 10:51:34 +01:00
d4861811e5 Increase Kafka retries 2022-09-19 10:48:29 +01:00
3c2e8e8e67 Change Metabase port 2022-09-18 13:15:10 +01:00
f60c08918e Add docker environment file 2022-09-18 13:05:08 +01:00
0d6b3763f9 Update production compose 2022-09-18 13:04:08 +01:00
d4b8e11525 Reformat comment 2022-09-18 13:02:06 +01:00
38d00f2c21 Implement restricted sources 2022-09-18 13:01:19 +01:00
cb11ce9b12 Fix merge conflict 2022-09-16 17:45:24 +01:00
a89b5a8b6f Implement sentiment/NLP annotation and optimise processing 2022-09-16 17:09:49 +01:00
f432e9b29e Properly process Redis buffered messages and ingest into Kafka 2022-09-14 18:32:32 +01:00
c5f01c3084 Ingest into Kafka and queue messages better 2022-09-13 22:17:46 +01:00
47c5f89914 Implement Apache Druid/Kafka and Metabase 2022-09-13 22:17:32 +01:00
68fd5fa230 Switch to latest image for dev docker-compose 2022-09-13 09:20:43 +01:00
fd90c233c2 Begin implementing Apache Druid 2022-09-08 07:20:30 +01:00
0eb4a04b89 Use stable after all 2022-09-08 07:20:30 +01:00
e196172e04 Switch production image back to dev 2022-09-08 07:20:30 +01:00
41a8cea873 Lower memory requirements to prevent crashes 2022-09-08 07:20:30 +01:00
9cf4e945d1 Set dev image back to the default 2022-09-12 08:43:18 +01:00
04b5dec843 Treat text fields as string and try beta Kibana image 2022-09-12 08:27:13 +01:00
40492b1595 Add Mysql port to ports instead of expose 2022-09-10 13:20:06 +01:00
90fed411e6 Expose the Mysql port 2022-09-10 13:16:19 +01:00
0dde7d6f30 Use dev image of manticore 2022-09-10 12:03:45 +01:00
fed3cdbf5a Remove indexer block to attempt to prevent Manticore DB crash 2022-09-08 07:20:30 +01:00
c2bdb3fd15 Reformat 2022-09-07 07:20:30 +01:00
5d042c1259 Raise open files limit for Redis 2022-09-07 07:20:30 +01:00
92475ee9a9 Add 4chan update message type to main types 2022-09-07 07:20:30 +01:00
5c3b338017 Implement threshold writing to Redis and manticore ingesting from Redis 2022-09-07 07:20:30 +01:00
54ea5fa8e9 Add config directories to gitignore 2022-09-08 09:45:18 +01:00
e79de2b377 Add aioredis 2022-09-08 09:44:27 +01:00
79b1bee9e4 Implement ingesting to Redis from Threshold 2022-09-07 07:20:30 +01:00
e3b2e1f36d Config relative to Git dir 2022-09-05 07:20:30 +01:00
eb71dd76f8 Store persistent database elsewhere 2022-09-05 07:20:30 +01:00
cd5eb61455 Improve DB performance with caching 2022-09-05 07:20:30 +01:00
a198f2a487 Reformat legacy project 2022-09-05 07:20:30 +01:00
19ee7071f5 Merge branch 'threshold' 2022-09-06 12:50:25 +01:00
122fdca5db Moved files to subdirectory 2022-09-06 12:50:09 +01:00
7bb2264d91 Increase thread delay time 2022-09-05 07:20:30 +01:00
1858e06c4b Alter schemas and 4chan performance settings 2022-09-05 07:20:30 +01:00
ddcfa614ad Remove some debugging code 2022-09-05 07:20:30 +01:00
a1b193c6da Change Python to 3.10 2022-09-05 07:20:30 +01:00
7606d77c2b Update production env file path 2022-09-05 07:20:30 +01:00
aaf2d58d86 Remove development dotenv loading 2022-09-05 07:20:30 +01:00
d7e49777ef Add debug statement 2022-09-05 07:20:30 +01:00
d1c6bd1fb5 Reformat and set the net and channel for 4chan 2022-09-05 07:20:30 +01:00
b8d2ecc009 Make crawler more efficient and implement configurable parameters 2022-09-05 07:20:30 +01:00
f8fc5e1a1b Split thread list into chunks to save memory 2022-09-05 07:20:30 +01:00
6e00f70184 Reformat code 2022-09-04 21:40:04 +01:00
0f717b987d Reinstate Redis cache 2022-09-04 21:38:53 +01:00
60c43b4eb5 Run processing in thread 2022-09-04 21:29:00 +01:00
db23b31f30 Implement aiohttp 2022-09-04 19:44:25 +01:00
f7860bf08b Begin implementing aiohttp 2022-09-04 13:47:32 +01:00
734a2b7879 Implement running Discord and 4chan gathering simultaneously 2022-09-02 22:30:45 +01:00
2731713ede Fix error when no email can be found 2022-08-27 11:19:28 +01:00
2a2f24f570 Fix getting first relay when they are not sequential 2022-08-26 22:17:12 +01:00
c7941bfcda Log authentication messages 2022-08-16 23:01:42 +01:00
49b0b9db46 Implement deduplicating channels 2022-08-16 22:01:35 +01:00
07f1fff125 Switch to siphash 2022-08-18 07:20:30 +01:00
8816024d90 Re-add fake messages 2022-08-15 19:49:21 +01:00
b61316d805 Detect queries if nick and channel are the same 2022-08-15 19:24:42 +01:00
a65098c222 Add sinst fetch and fix message send logic 2022-08-15 19:15:12 +01:00
ed3c8497bc Switch debugging statements to trace in ChanKeep 2022-08-15 19:15:00 +01:00
0b69893e17 Fix query handling and don't send a fake message 2022-08-15 17:59:31 +01:00
e4c1d80250 Only run pingmsg after negative has completed 2022-08-18 07:20:30 +01:00
415a0b1135 Fix debug statements and amend function names 2022-08-18 07:20:30 +01:00
d026881086 Properly format string 2022-08-18 07:20:30 +01:00
ce32ab4722 Improve regPing debugging 2022-08-18 07:20:30 +01:00
2942929478 Improve regPing negative handling logic 2022-08-18 07:20:30 +01:00
53ee69540f Fix double messages and regPing logic 2022-08-18 07:20:30 +01:00
b25cb1699f Set the channel limit on connected relays, not active 2022-08-18 07:20:30 +01:00
7efde28d99 Look before you leap to confirming registrations 2022-08-18 07:20:30 +01:00
659162ebc6 Fix IRC config mutation 2022-08-18 07:20:30 +01:00
d0ea3bb221 Change authentication endpoint 2022-08-18 07:20:30 +01:00
e64aaf99d8 Reorder API endpoints to prevent clashing 2022-08-18 07:20:30 +01:00
f4225b622f Add more debugging information 2022-08-15 00:39:22 +01:00
6f44921647 Figure out the channel parsing logic 2022-08-15 00:36:36 +01:00
731c6a2fd1 Pass a list instead of listinfo 2022-08-15 00:29:08 +01:00
ffed420c11 Fix variable placement 2022-08-15 00:27:16 +01:00
66e046e15f Fix list parsing 2022-08-15 00:26:11 +01:00
8f44f34d0e Fix debugging code in keepChannels 2022-08-15 00:08:11 +01:00
1b68568fb7 Add debugging code in keepChannels 2022-08-15 00:07:29 +01:00
bdb2949d17 Subtract one from length of list for indices 2022-08-15 00:04:49 +01:00
560af8aeb0 Lower max_chans to length of LIST if it's shorter 2022-08-15 00:03:12 +01:00
153d3dd847 Reset negative pass status when requesting recheck 2022-08-14 23:58:35 +01:00
6cdadd23a0 Implement initial WHO loop delay 2022-08-14 20:58:41 +01:00
4fa5c25e94 Fix getting all unregistered relays 2022-08-14 20:58:30 +01:00
1b39b46121 Blacklist channels we are kicked from 2022-08-14 20:44:04 +01:00
c55a4058b1 Use JSON for sending messages 2022-08-14 16:45:40 +01:00
b62200d410 Implement API call to register 2022-08-14 16:26:09 +01:00
e30250603b Convert num to number in registration confirmation 2022-08-14 16:09:32 +01:00
02739abaf4 Allow current nick substitution in IRC commands 2022-08-14 15:53:18 +01:00
281eb75b26 Fix variable shadowing 2022-08-14 15:43:48 +01:00
559e1f4afd Print identification message 2022-08-14 13:51:13 +01:00
060ee4f0d5 Implement manual authentication mode 2022-08-14 13:13:05 +01:00
f7d390da32 Implement API for authentication management actions 2022-08-14 12:43:33 +01:00
0b20a05b19 More debugging for reg tests and getstr command 2022-08-14 11:41:29 +01:00
39059084ef Add allRelaysActive output to network info 2022-08-14 10:58:28 +01:00
feecf48b9b Add debug statements and only check if network is connected when parting channels 2022-08-14 09:25:54 +01:00
9b14979f29 Use JSON for joining channels and don't shadow auth variable when getting network info 2022-08-14 09:25:01 +01:00
a204be25c5 Make channel deletion endpoint accept JSON 2022-08-14 00:01:14 +01:00
a42c6be1b7 LBYL 2022-08-13 23:38:13 +01:00
a82355b660 Add more information to relay API return 2022-08-13 23:36:39 +01:00
2a3c9f80a3 Add even more debugging 2022-08-13 23:18:56 +01:00
3ca5a3452c Extra debugging for getting active relays 2022-08-13 23:17:26 +01:00
5f33ba7f1d Fix typo in module name 2022-08-13 23:14:51 +01:00
d9d3faf860 Extra debugging for get_first_relay 2022-08-13 23:14:17 +01:00
4c91b6ad2c Filter queries more carefully 2022-08-13 22:46:10 +01:00
abeba6bc06 Update CHANLIMIT on all instances when set via API 2022-08-13 22:36:52 +01:00
406b3d77f4 Add helper to get all active relays 2022-08-13 22:36:18 +01:00
047e9148aa Implement API endpoint to enable authentication 2022-08-13 22:25:29 +01:00
5db659b9af Filter AUTH channel (OFTC fix) 2022-08-13 22:15:50 +01:00
fced2b7d75 Use ChanKeep system for joining channels with joinSingle 2022-08-13 21:54:14 +01:00
16133fb7b4 Fully make use of ECA for multiple channels 2022-08-13 21:40:53 +01:00
5c95f35c61 Return chanlimit for each relay 2022-08-13 21:22:43 +01:00
ad7a5cfe49 Check token before attempting to confirm 2022-08-13 20:55:36 +01:00
92df4fb9a3 Implement API endpoint for provisioning relays 2022-08-13 20:51:31 +01:00
28c1a33615 Implement configurable chanlimit and add more fields about LIST output to Redis 2022-08-13 20:37:21 +01:00
9470f0d0d9 Implement updating registration via API 2022-08-13 20:36:51 +01:00
496a3d0374 Implement ChanKeep without requiring persistent chanlimits on all networks 2022-08-13 19:20:29 +01:00
75965497be Add some debug statements and statistics for chanlimits 2022-08-13 18:40:13 +01:00
1e9dd1b223 Print message if relay is unauthenticated/disconnected 2022-08-13 14:06:34 +01:00
df6b9e34a3 Return relay numbers with channel list 2022-08-13 13:47:42 +01:00
facf58ec2c Add connected status to IRC info return and check when getting active relays 2022-08-13 13:40:33 +01:00
21ed66bc00 Reformat code 2022-08-13 13:32:22 +01:00
5c63fb5048 Implement getting LIST information from API 2022-08-13 13:27:20 +01:00
c3fd8a97f7 Provision relay on creation 2022-08-13 00:18:06 +01:00
acc363d207 Add docstrings to chankeep 2022-08-12 23:53:02 +01:00
49214644ff Implement migrating networks 2022-08-12 23:32:00 +01:00
20f59362ff Subtract allocated channel slots from total 2022-08-12 22:31:12 +01:00
065fe94cbd Improve channel allocation and write basic tests for it 2022-08-12 22:27:49 +01:00
6306231098 Make channel join notification a TRACE 2022-08-12 20:19:39 +01:00
5c2ef740e6 Fix email command 2022-08-12 20:19:33 +01:00
7e51178a10 Add endpoint to get the bot's nickname 2022-08-09 07:20:30 +01:00
a2b6ebd912 Properly implement querying with API 2022-08-09 07:20:30 +01:00
ec943203d0 Get our hostname from WHO when we create fake events 2022-08-09 07:20:30 +01:00
8dc176aa54 Fire a fake event when we send a message 2022-08-09 07:20:30 +01:00
8ba4831d9c Implement best effort allocation 2022-08-11 21:44:19 +01:00
4c040bbf78 Simplify variable names and reformat 2022-08-11 20:51:41 +01:00
5a4ae2153e Use ceil instead of round for relay number rounding 2022-08-11 20:46:44 +01:00
8c3a75b3c8 Expand ECA secondary allocation algorithm 2022-08-11 20:43:34 +01:00
dc13515aa8 Adding more debug statements in ECA system 2022-08-11 20:36:24 +01:00
d38f7ba1ba Print information about received LIST 2022-08-11 20:32:49 +01:00
7c9903bca2 Return correct data type for provisioning relays 2022-08-11 20:29:01 +01:00
22e853a3f7 Simplify is_first_relay 2022-08-11 20:26:19 +01:00
b5326e92a1 Add even more debugging 2022-08-11 20:21:39 +01:00
604bee1b78 Add more LIST handling debugging 2022-08-11 20:18:49 +01:00
87ee96dd26 Don't add 1 to current relays when iterating 2022-08-11 20:13:30 +01:00
cc0e3b872b Add extra debug call for allRelaysActive 2022-08-11 20:12:38 +01:00
16d268ca90 Reformat helpers 2022-08-11 20:09:14 +01:00
6193502f2e Enable debug mode with env vars 2022-08-11 20:09:01 +01:00
b16289cded Update IRC template 2022-08-11 19:49:58 +01:00
502b45cda5 Allow gaps in relay numbering 2022-08-11 19:22:09 +01:00
4c8b584ef4 Implement deleting networks 2022-08-02 09:01:34 +01:00
b42c82eac2 More error handling when joining channels with ChanKeep 2022-08-02 09:01:24 +01:00
4c9ac3ec42 Implement adding networks 2022-08-01 23:02:20 +01:00
db4b6cc6f9 Implement requesting channel list for network 2022-08-01 21:38:46 +01:00
dae62ea544 Remove debugging code 2022-08-01 21:31:48 +01:00
e8870e95e7 Implement automatic provisioning 2022-08-01 19:34:35 +01:00
0dedb545f0 Implement updating aliases 2022-08-01 19:05:12 +01:00
6909fb68f7 Implement API endpoint to add next relay 2022-07-29 22:39:08 +01:00
54b5561a75 Implement deleeting relays and fix adding 2022-07-29 22:11:43 +01:00
d51e87b09f Reformat code 2022-07-29 17:28:19 +01:00
6359918639 Fix joining channels with inactive relays 2022-07-29 17:28:09 +01:00
ba1f8407d1 Implement creating relays via the API 2022-07-29 17:27:40 +01:00
78f3f4520d Fix Redis config path 2022-07-29 22:22:22 +01:00
deb89e9202 Use proper port for SSL listener 2022-07-29 22:22:22 +01:00
f88551f926 Use Git dir to make redis config absolute path 2022-07-29 09:06:13 +01:00
dc6dcd79db Use paths relative to root in production compose 2022-07-29 09:04:18 +01:00
1d8bb73645 Switch paths 2022-07-29 09:00:08 +01:00
9de0b0919d Use relative paths 2022-07-29 08:59:02 +01:00
bf79c013d5 Fix redis.conf location in prod compose 2022-07-29 08:48:30 +01:00
e1fc59f636 Don't pass template directory 2022-07-29 08:35:56 +01:00
e662d36542 Fix path issue 2022-07-29 08:32:39 +01:00
cd38aab318 Pass through configuration directories to compose 2022-07-29 08:31:01 +01:00
6e99605701 Fix environment variable path on production compose 2022-07-29 08:11:37 +01:00
248273648d Properly configure production compose file 2022-07-29 08:02:10 +01:00
479e5072d2 Create separate production configuration 2022-07-29 08:01:48 +01:00
a9f499ec67 Remove print statements 2022-07-28 21:30:23 +01:00
ef61145671 Add trailing slash to example directory 2022-07-28 21:29:08 +01:00
3818308b75 Add Portainer Git directory to env file 2022-07-28 21:27:26 +01:00
2f74d79bc4 Seamlessly handle nonexistent configurations 2022-07-28 21:11:01 +01:00
3d67578179 Add stack.env file 2022-07-28 19:57:26 +01:00
9715b28f47 Move env file to example 2022-07-28 19:50:48 +01:00
a258ec8ad1 Properly pass environment variables to the process 2022-07-28 19:50:07 +01:00
f66f998f54 Make some addresses and hosts configurable with environment variables 2022-07-28 19:38:37 +01:00
e3700e309d Lower compose version 2022-07-28 19:25:15 +01:00
4e195b2954 Add docker definitions 2022-07-28 19:21:08 +01:00
8409a39e57 Implement relay, channel and alias management 2022-07-27 22:03:42 +01:00
b30a3a535d Implement editing networks via the API 2022-07-27 08:59:17 +01:00
b9c1470410 Implement network and channels view 2022-07-26 22:16:35 +01:00
5aebf63c2e Implement API endpoint for network listing 2022-07-25 18:05:53 +01:00
b149886128 Don't send to Logstash if it's disabled 2022-07-21 13:40:40 +01:00
4b33559e65 Implement getting number of channels and users 2022-07-21 13:40:18 +01:00
f589c7fc16 Implement more API functions 2022-07-21 13:40:17 +01:00
47a3f84c1c Update config 2022-07-21 13:40:15 +01:00
f942e94ee5 Implement API 2022-07-21 13:40:13 +01:00
f0acbdbfa3 Begin work on API endpoint 2022-07-21 13:40:11 +01:00
e5a14b2c91 Reformat again 2022-07-21 13:40:09 +01:00
a5fd7d60fd Remove some legacy code 2022-07-21 13:40:07 +01:00
f4c5323de1 Reformat project 2022-07-21 13:40:05 +01:00
6c7d0d5c45 Reformat and fix circular import 2022-07-21 13:40:03 +01:00
3229d9b806 Revert "Reformat project"
This reverts commit 64e3e1160aa76d191740342ab3edc68807f890fb.
2022-07-21 13:40:01 +01:00
760e43b59a Reformat project 2022-07-21 13:39:59 +01:00
9d4d31fdc2 Don't attempt secondary registration if it is disabled 2022-07-21 13:39:57 +01:00
e4a6e0d3c2 Don't attempt to register if it is disabled 2022-07-21 13:39:56 +01:00
7ffdc63eeb Rename time to ts 2022-07-21 13:39:54 +01:00
757b22c4a1 Extra error handling around emails 2022-07-21 13:39:52 +01:00
1532cf482c Make Redis DBs configurable 2022-07-21 13:39:50 +01:00
4b2a1f2735 Add Redis DB numbers to configuration 2022-07-21 13:39:48 +01:00
5c7d71dc99 Fix provisioning with emails 2022-07-21 13:39:46 +01:00
745c7caa12 Fix some issues with the default config 2022-07-21 13:39:44 +01:00
e5685286ae Improve email command 2022-07-21 13:39:43 +01:00
ff1ee63900 Reformat code with pre-commit 2022-07-21 13:39:41 +01:00
Mark Veidemanis
0777a55264 Start implementing email command 2021-08-25 07:47:54 +00:00
Mark Veidemanis
152bc08970 Add Logstash file 2021-08-24 20:08:18 +00:00
Mark Veidemanis
edc5f85ba6 Implement modifying emails for aliases 2021-06-06 10:31:13 +00:00
Mark Veidemanis
c389094365 Finish Logstash implementation 2021-06-06 10:16:04 +00:00
Mark Veidemanis
5d63d7a1e9 Update requirements without versions 2021-06-06 10:13:43 +00:00
0959d978b3 Merge branch 'master' into datarestructure 2020-11-02 20:18:36 +00:00
9c95fa8eaf Implement relay-independent join 2020-11-02 20:14:02 +00:00
14daa9dfef Don't discard server messages 2020-11-02 20:13:36 +00:00
45fa21fea3 Use substitutions in registration tests 2020-11-01 22:19:03 +00:00
0473c57291 Additional error handling for command parsing 2020-11-01 22:18:48 +00:00
735fee9286 Fix bug with reg command 2020-11-01 20:43:51 +00:00
d405a4cd10 Add example file for blacklist 2020-11-01 19:55:32 +00:00
399075afd1 Implement channel blacklisting 2020-11-01 19:54:24 +00:00
a0bea0b18a Fix bug with using muser attribute when absent 2020-11-01 19:03:56 +00:00
5d09e1ade7 Fix syntax error in reg command 2020-11-01 18:50:17 +00:00
19e04dbf36 Implement setting modes in ZNC 2020-11-01 03:39:32 +00:00
abdfc48b95 Prepare command loader for reloading commands 2020-11-01 03:38:47 +00:00
f7e1f2d221 Implement registration at net-level 2020-11-01 03:37:29 +00:00
a78e05c0c3 Clarify message output on confirm command 2020-11-01 03:36:23 +00:00
e22349802b Log error when ZNC says a channel can't be joined 2020-10-31 23:58:51 +00:00
b652b11335 Fix registration cancellation bug in regproc 2020-10-31 23:58:03 +00:00
49fd03304d Fix various bugs and off by one with provisioning 2020-10-31 23:55:11 +00:00
b0eaa7fd47 Move WHO and NAMES logging to trace 2020-10-31 16:52:00 +00:00
9e17223258 Don't deduplicate global messages (NICK/QUIT) 2020-10-31 16:51:24 +00:00
d60d89dbf6 Improve authentication detection
Add a negative check in the event we are authenticated and registered,
but not confirmed, as this fools other checks.
2020-10-31 16:49:37 +00:00
eaeb4b72c2 Use zero-padded numbers to maximise usuable ports 2020-10-31 00:13:59 +00:00
388cd1e4b9 Error checking in testing for registration message 2020-10-31 00:13:09 +00:00
b986d6ac45 Deauth bot when disconnected and lowercase user 2020-10-31 00:12:06 +00:00
c06e922749 Clarify error message to be more helpful 2020-10-31 00:11:28 +00:00
8deac2ab17 Implement another level of logging for tracing 2020-10-31 00:10:33 +00:00
4d25505625 Note that arguments to list are optional 2020-10-31 00:06:35 +00:00
69fbe180f1 Implement authentication checking on connection 2020-10-28 22:50:12 +00:00
812db95995 Add checks in dedup for time-less messages 2020-10-28 22:46:22 +00:00
b16b5d690b Fix decoding issue with some Redis keys 2020-10-28 22:30:49 +00:00
6acb106761 Provision users with lowercase names 2020-10-28 22:30:04 +00:00
7d9a45ee91 Add the time field to some notifications 2020-10-28 22:26:41 +00:00
913009ab71 Fix circular import in ChanKeep/provisioning modules 2020-10-28 18:38:27 +00:00
82c5c2d163 Start implementing prefixes 2020-07-09 19:43:47 +01:00
3acf182171 Fixes to auth detection and message parsing
* don't check authentication if the network doesn't need to
  register
* don't pass through muser for ZNC type messages
* avoid duplicate message for queries containing highlights
* make a copy of the cast for metadata analysis to avoid poisoning it
* set up callback for when the instance is authenticated, so we can
  request a LIST immediately if so desired
* separate out seeding functions to populate CHANLIMIT to ease future
  work involving other options, such as PREFIX
2020-06-07 17:26:53 +01:00
2a9869d0f9 Remove condition-based monitoring system 2020-06-07 15:31:43 +01:00
1640955e5c Fix various bugs in the event system
Squash many bugs in the event notification system and simplify the
code.
2020-06-02 21:34:15 +01:00
290e0b5f87 Fix syntax error in redis query 2020-05-31 21:54:43 +01:00
097f100ec5 Implement authentication detection
* pending command to see which instances have never authenticated
* authcheck command to see which instances are not currently
  authenticated
2020-05-31 21:52:56 +01:00
586a337ea4 Add help for pending command 2020-05-31 16:40:51 +01:00
5ee53ace4c Add additional error handling in user queries 2020-05-31 13:44:34 +01:00
81b0450904 Function to select and merge IRC network defs 2020-05-31 13:23:09 +01:00
5c6b626396 Check registration status before joining channels
Do not join channels if any relay for a network is unregistered.
2020-05-31 13:09:58 +01:00
4f9ca6088b Allow sending LIST to all networks at once 2020-05-31 13:08:00 +01:00
efb9666b6a Add confirm command
Confirm command to check which relays need manual
confirmation.
2020-05-31 12:32:12 +01:00
aec683ccce Remove leftover irc.json file 2020-05-30 21:42:26 +01:00
a3cdb35e05 Implement registration and confirmation of nicks 2020-05-30 21:40:10 +01:00
d99c3c394f Restructure provisioning into fewer functions 2020-05-30 21:37:22 +01:00
1ac1061348 Add irc.json to gitignore 2020-05-30 21:35:50 +01:00
690bf93676 Fix variable scope in LIST error handling 2020-04-21 23:32:17 +01:00
f4e5d248d5 Separate provisioning into user and auth info 2019-12-28 17:51:03 +00:00
97a25334aa Add IRC network definitions 2019-12-28 17:50:38 +00:00
06903d872e Add more comments and remove obsolete code 2019-12-07 16:35:29 +00:00
e3e522ad1e Add requirements 2019-11-17 19:09:17 +00:00
14 changed files with 347 additions and 498 deletions

1
.gitignore vendored
View File

@@ -159,3 +159,4 @@ docker/data
*.pem *.pem
legacy/conf/live/ legacy/conf/live/
legacy/conf/cert/ legacy/conf/cert/
stack.env

20
Makefile Normal file
View File

@@ -0,0 +1,20 @@
run:
docker-compose -f docker/docker-compose.prod.yml --env-file=stack.env up -d
build:
docker-compose -f docker/docker-compose.prod.yml --env-file=stack.env build
stop:
docker-compose -f docker/docker-compose.prod.yml --env-file=stack.env down
log:
docker-compose -f docker/docker-compose.prod.yml --env-file=stack.env logs -f
run-infra:
docker-compose -f docker/docker-compose.infra.yml --env-file=stack.env up -d
stop-infra:
docker-compose -f docker/docker-compose.infra.yml --env-file=stack.env down
log-infra:
docker-compose -f docker/docker-compose.infra.yml --env-file=stack.env logs -f

18
db.py
View File

@@ -1,23 +1,28 @@
import random import random
from os import getenv
import aioredis import aioredis
import orjson import orjson
import redis
# Kafka # Kafka
from aiokafka import AIOKafkaProducer from aiokafka import AIOKafkaProducer
from redis import StrictRedis
import util import util
trues = ("true", "1", "t", True)
MONOLITH_KAFKA_ENABLED = getenv("MONOLITH_KAFKA_ENABLED", "false").lower() in trues
# KAFKA_TOPIC = "msg" # KAFKA_TOPIC = "msg"
log = util.get_logger("db") log = util.get_logger("db")
# Redis (legacy) # Redis (legacy)
r = StrictRedis(unix_socket_path="/var/run/redis/redis.sock", db=0) r = redis.from_url("redis://ssdb:1289", db=0)
# AIORedis # AIORedis
ar = aioredis.from_url("unix:///var/run/redis/redis.sock", db=0) ar = aioredis.from_url("redis://ssdb:1289", db=0)
TYPES_MAIN = [ TYPES_MAIN = [
"msg", "msg",
@@ -44,6 +49,9 @@ KEYNAME = "queue"
async def store_kafka_batch(data): async def store_kafka_batch(data):
if not MONOLITH_KAFKA_ENABLED:
log.info(f"Not storing Kafka batch of length {len(data)}, Kafka is disabled.")
return
# log.debug(f"Storing Kafka batch of {len(data)} messages") # log.debug(f"Storing Kafka batch of {len(data)} messages")
producer = AIOKafkaProducer(bootstrap_servers="kafka:9092") producer = AIOKafkaProducer(bootstrap_servers="kafka:9092")
await producer.start() await producer.start()
@@ -114,7 +122,7 @@ async def queue_message(msg):
Queue a message on the Redis buffer. Queue a message on the Redis buffer.
""" """
message = orjson.dumps(msg) message = orjson.dumps(msg)
await ar.sadd(KEYNAME, message) await ar.lpush(KEYNAME, message)
async def queue_message_bulk(data): async def queue_message_bulk(data):
@@ -123,4 +131,4 @@ async def queue_message_bulk(data):
""" """
for msg in data: for msg in data:
message = orjson.dumps(msg) message = orjson.dumps(msg)
await ar.sadd(KEYNAME, message) await ar.lpush(KEYNAME, message)

View File

@@ -6,7 +6,7 @@ x-superset-depends-on: &superset-depends-on
- redis_superset - redis_superset
x-superset-volumes: &superset-volumes x-superset-volumes: &superset-volumes
# /app/pythonpath_docker will be appended to the PYTHONPATH in the final container # /app/pythonpath_docker will be appended to the PYTHONPATH in the final container
- ./docker/superset:/app/docker - ${PORTAINER_GIT_DIR}/docker/superset:/app/docker
- superset_home:/app/superset_home - superset_home:/app/superset_home
services: services:
@@ -21,7 +21,7 @@ services:
volumes_from: volumes_from:
- tmp - tmp
depends_on: depends_on:
broker: druid:
condition: service_started condition: service_started
kafka: kafka:
condition: service_healthy condition: service_healthy
@@ -100,7 +100,7 @@ services:
container_name: superset_cache container_name: superset_cache
restart: unless-stopped restart: unless-stopped
volumes: volumes:
- redis:/data - redis_superset:/data
db: db:
env_file: docker/.env-non-dev env_file: docker/.env-non-dev
image: postgres:10 image: postgres:10
@@ -195,89 +195,117 @@ services:
timeout: 30s timeout: 30s
retries: 45 retries: 45
coordinator: druid:
image: apache/druid:0.23.0 image: pathogen/druid:0.23.0
container_name: coordinator build: ./docker/druid/
container_name: druid
volumes: volumes:
- druid_shared:/opt/shared - druid_shared:/opt/shared
- coordinator_var:/opt/druid/var - druid_var:/opt/druid/var
depends_on: depends_on:
- zookeeper - zookeeper
- postgres - postgres
ports: ports:
- "8081:8081" - "8081:8081"
command:
- coordinator
env_file:
- environment
broker:
image: apache/druid:0.23.0
container_name: broker
volumes:
- broker_var:/opt/druid/var
depends_on:
- zookeeper
- postgres
- coordinator
ports:
- "8082:8082" - "8082:8082"
command:
- broker
env_file:
- environment
historical:
image: apache/druid:0.23.0
container_name: historical
volumes:
- druid_shared:/opt/shared
- historical_var:/opt/druid/var
depends_on:
- zookeeper
- postgres
- coordinator
ports:
- "8083:8083" - "8083:8083"
command:
- historical
env_file:
- environment
middlemanager:
image: apache/druid:0.23.0
container_name: middlemanager
volumes:
- druid_shared:/opt/shared
- middle_var:/opt/druid/var
depends_on:
- zookeeper
- postgres
- coordinator
ports:
- "8091:8091"
- "8100-8105:8100-8105"
command:
- middleManager
env_file:
- environment
router:
image: apache/druid:0.23.0
container_name: router
volumes:
- router_var:/opt/druid/var
depends_on:
- zookeeper
- postgres
- coordinator
ports:
- "8888:8888" - "8888:8888"
command:
- router
env_file: env_file:
- environment - environment
# coordinator:
# #image: apache/druid:0.23.0
# image: pathogen/druid:0.23.0
# build: ./docker/druid/
# container_name: coordinator
# volumes:
# - druid_shared:/opt/shared
# - coordinator_var:/opt/druid/var
# depends_on:
# - zookeeper
# - postgres
# ports:
# - "8081:8081"
# command:
# - coordinator
# env_file:
# - environment
# broker:
# #image: apache/druid:0.23.0
# image: pathogen/druid:0.23.0
# build: ./docker/druid/
# container_name: broker
# volumes:
# - broker_var:/opt/druid/var
# depends_on:
# - zookeeper
# - postgres
# - coordinator
# ports:
# - "8082:8082"
# command:
# - broker
# env_file:
# - environment
# historical:
# #image: apache/druid:0.23.0
# image: pathogen/druid:0.23.0
# build: ./docker/druid/
# container_name: historical
# volumes:
# - druid_shared:/opt/shared
# - historical_var:/opt/druid/var
# depends_on:
# - zookeeper
# - postgres
# - coordinator
# ports:
# - "8083:8083"
# command:
# - historical
# env_file:
# - environment
# middlemanager:
# #image: apache/druid:0.23.0
# image: pathogen/druid:0.23.0
# build: ./docker/druid/
# container_name: middlemanager
# volumes:
# - druid_shared:/opt/shared
# - middle_var:/opt/druid/var
# depends_on:
# - zookeeper
# - postgres
# - coordinator
# ports:
# - "8091:8091"
# - "8100-8105:8100-8105"
# command:
# - middleManager
# env_file:
# - environment
# router:
# #image: apache/druid:0.23.0
# image: pathogen/druid:0.23.0
# build: ./docker/druid/
# container_name: router
# volumes:
# - router_var:/opt/druid/var
# depends_on:
# - zookeeper
# - postgres
# - coordinator
# ports:
# - "8888:8888"
# command:
# - router
# env_file:
# - environment
# db: # db:
# #image: pathogen/manticore:kibana # #image: pathogen/manticore:kibana
# image: manticoresearch/manticore:dev # image: manticoresearch/manticore:dev
@@ -343,9 +371,9 @@ volumes:
external: false external: false
redis_data: {} redis_data: {}
metadata_data: {} metadata_data: {}
middle_var: {} # middle_var: {}
historical_var: {} # historical_var: {}
broker_var: {} # broker_var: {}
coordinator_var: {} # coordinator_var: {}
router_var: {} druid_var: {}
druid_shared: {} druid_shared: {}

View File

@@ -41,6 +41,6 @@ REDIS_PORT=6379
FLASK_ENV=production FLASK_ENV=production
SUPERSET_ENV=production SUPERSET_ENV=production
SUPERSET_LOAD_EXAMPLES=yes SUPERSET_LOAD_EXAMPLES=no
CYPRESS_CONFIG=false CYPRESS_CONFIG=false
SUPERSET_PORT=8088 SUPERSET_PORT=8088

View File

@@ -0,0 +1,142 @@
version: "2.2"
x-superset-image: &superset-image apache/superset:${TAG:-latest-dev}
x-superset-depends-on: &superset-depends-on
- db
- redis_superset
x-superset-volumes: &superset-volumes
# /app/pythonpath_docker will be appended to the PYTHONPATH in the final container
- ${PORTAINER_GIT_DIR}/docker/superset:/app/docker
- superset_home:/app/superset_home
services:
redis_superset:
image: redis:latest
container_name: superset_cache
restart: unless-stopped
volumes:
- redis:/data
db:
env_file: .env-non-dev
image: postgres:10
container_name: superset_db
restart: unless-stopped
volumes:
- db_home:/var/lib/postgresql/data
superset:
env_file: .env-non-dev
image: *superset-image
container_name: superset_app
command: ["/app/docker/docker-bootstrap.sh", "app-gunicorn"]
user: "root"
restart: unless-stopped
ports:
- 8088:8088
depends_on: *superset-depends-on
volumes: *superset-volumes
superset-init:
image: *superset-image
container_name: superset_init
command: ["/app/docker/docker-init.sh"]
env_file: .env-non-dev
depends_on: *superset-depends-on
user: "root"
volumes: *superset-volumes
superset-worker:
image: *superset-image
container_name: superset_worker
command: ["/app/docker/docker-bootstrap.sh", "worker"]
env_file: .env-non-dev
restart: unless-stopped
depends_on: *superset-depends-on
user: "root"
volumes: *superset-volumes
superset-worker-beat:
image: *superset-image
container_name: superset_worker_beat
command: ["/app/docker/docker-bootstrap.sh", "beat"]
env_file: .env-non-dev
restart: unless-stopped
depends_on: *superset-depends-on
user: "root"
volumes: *superset-volumes
postgres:
container_name: postgres
image: postgres:latest
volumes:
- /block/store/metadata_data:/var/lib/postgresql/data
environment:
- POSTGRES_PASSWORD=FoolishPassword
- POSTGRES_USER=druid
- POSTGRES_DB=druid
# Need 3.5 or later for container nodes
zookeeper:
container_name: zookeeper
image: zookeeper:3.5
ports:
- "2181:2181"
environment:
- ZOO_MY_ID=1
kafka:
image: wurstmeister/kafka:latest
container_name: kafka
depends_on:
- zookeeper
ports:
- 9092:9092
- 29092:29092
environment:
KAFKA_BROKER_ID: 1
KAFKA_ZOOKEEPER_CONNECT: zookeeper:2181
KAFKA_ADVERTISED_LISTENERS: PLAINTEXT://kafka:9092,PLAINTEXT_HOST://localhost:29092
KAFKA_LISTENERS: PLAINTEXT://kafka:9092,PLAINTEXT_HOST://localhost:29092
KAFKA_LISTENER_SECURITY_PROTOCOL_MAP: PLAINTEXT:PLAINTEXT,PLAINTEXT_HOST:PLAINTEXT
KAFKA_INTER_BROKER_LISTENER_NAME: PLAINTEXT
KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR: 1
KAFKA_AUTO_CREATE_TOPICS_ENABLE: 'true'
KAFKA_MESSAGE_MAX_BYTES: 2000000
#KAFKA_HEAP_OPTS: -Xmx2g
healthcheck:
test: ["CMD", "kafka-topics.sh", "--list", "--bootstrap-server", "kafka:9092"]
start_period: 15s
interval: 30s
timeout: 30s
retries: 45
druid:
image: pathogen/druid:0.23.0
build: ./druid/
container_name: druid
volumes:
- /block/store/druid_shared:/opt/shared
- /block/store/druid_var:/opt/druid/var
depends_on:
- zookeeper
- postgres
ports:
- "8081:8081"
- "8082:8082"
- "8083:8083"
- "8888:8888"
env_file:
- environment
networks:
default:
external:
name: pathogen
volumes:
superset_home:
external: false
db_home:
external: false
redis:
external: false

View File

@@ -1,44 +1,14 @@
version: "2.2" version: "2.2"
# volumes:
# metadata_data: {}
# middle_var: {}
# historical_var: {}
# broker_var: {}
# coordinator_var: {}
# router_var: {}
# druid_shared: {}
x-superset-image: &superset-image apache/superset:${TAG:-latest-dev}
x-superset-depends-on: &superset-depends-on
- db
- redis_superset
x-superset-volumes: &superset-volumes
# /app/pythonpath_docker will be appended to the PYTHONPATH in the final container
- ./docker/superset:/app/docker
- superset_home:/app/superset_home
services: services:
app: app:
image: pathogen/monolith:latest image: pathogen/monolith:latest
container_name: monolith container_name: monolith
build: ./docker build: ${PORTAINER_GIT_DIR}/docker
volumes: volumes:
- ${PORTAINER_GIT_DIR}:/code - ${PORTAINER_GIT_DIR}:/code
env_file: env_file:
- ../stack.env - ../stack.env
volumes_from:
- tmp
depends_on:
broker:
condition: service_started
kafka:
condition: service_healthy
tmp:
condition: service_started
redis:
condition: service_healthy
# - db
threshold: threshold:
image: pathogen/threshold:latest image: pathogen/threshold:latest
@@ -47,7 +17,7 @@ services:
volumes: volumes:
- ${PORTAINER_GIT_DIR}:/code - ${PORTAINER_GIT_DIR}:/code
- ${THRESHOLD_CONFIG_DIR}:/code/legacy/conf/live - ${THRESHOLD_CONFIG_DIR}:/code/legacy/conf/live
#- ${THRESHOLD_TEMPLATE_DIR}:/code/conf/templates - ${THRESHOLD_TEMPLATE_DIR}:/code/conf/templates
- ${THRESHOLD_CERT_DIR}:/code/legacy/conf/cert - ${THRESHOLD_CERT_DIR}:/code/legacy/conf/cert
ports: ports:
- "${THRESHOLD_LISTENER_PORT}:${THRESHOLD_LISTENER_PORT}" - "${THRESHOLD_LISTENER_PORT}:${THRESHOLD_LISTENER_PORT}"
@@ -55,283 +25,19 @@ services:
- "${THRESHOLD_API_PORT}:${THRESHOLD_API_PORT}" - "${THRESHOLD_API_PORT}:${THRESHOLD_API_PORT}"
env_file: env_file:
- ../stack.env - ../stack.env
volumes_from: # for development
- tmp extra_hosts:
depends_on: - "host.docker.internal:host-gateway"
tmp:
condition: service_started
redis:
condition: service_healthy
# db: ssdb:
#image: pathogen/manticore:kibana image: tsl0922/ssdb
# image: manticoresearch/manticore:latest container_name: ssdb_monolith
#build:
# context: ./docker/manticore
# args:
# DEV: 1
# restart: always
# turnilo:
# container_name: turnilo
# image: uchhatre/turnilo:latest
# ports:
# - 9093:9090
# environment:
# - DRUID_BROKER_URL=http://broker:8082
# depends_on:
# - broker
# metabase:
# container_name: metabase
# image: metabase/metabase:latest
# ports:
# - 3096:3000
# environment:
# JAVA_OPTS: -Xmx1g
# MB_DB_TYPE: postgres
# MB_DB_DBNAME: metabase
# MB_DB_PORT: 5432
# MB_DB_USER: druid
# MB_DB_PASS: FoolishPassword
# MB_DB_HOST: postgres
# depends_on:
# - broker
redis_superset:
image: redis:latest
container_name: superset_cache
restart: unless-stopped
volumes: volumes:
- redis:/data - ssdb_data:/ssdb/var
db:
env_file: .env-non-dev
image: postgres:10
container_name: superset_db
restart: unless-stopped
volumes:
- db_home:/var/lib/postgresql/data
superset:
env_file: .env-non-dev
image: *superset-image
container_name: superset_app
command: ["/app/docker/docker-bootstrap.sh", "app-gunicorn"]
user: "root"
restart: unless-stopped
ports: ports:
- 8088:8088 - "1289:1289"
depends_on: *superset-depends-on
volumes: *superset-volumes
superset-init:
image: *superset-image
container_name: superset_init
command: ["/app/docker/docker-init.sh"]
env_file: .env-non-dev
depends_on: *superset-depends-on
user: "root"
volumes: *superset-volumes
superset-worker:
image: *superset-image
container_name: superset_worker
command: ["/app/docker/docker-bootstrap.sh", "worker"]
env_file: .env-non-dev
restart: unless-stopped
depends_on: *superset-depends-on
user: "root"
volumes: *superset-volumes
superset-worker-beat:
image: *superset-image
container_name: superset_worker_beat
command: ["/app/docker/docker-bootstrap.sh", "beat"]
env_file: .env-non-dev
restart: unless-stopped
depends_on: *superset-depends-on
user: "root"
volumes: *superset-volumes
postgres:
container_name: postgres
image: postgres:latest
volumes:
- /block/store/metadata_data:/var/lib/postgresql/data
environment: environment:
- POSTGRES_PASSWORD=FoolishPassword - SSDB_PORT=1289
- POSTGRES_USER=druid
- POSTGRES_DB=druid
# Need 3.5 or later for container nodes
zookeeper:
container_name: zookeeper
image: zookeeper:3.5
ports:
- "2181:2181"
environment:
- ZOO_MY_ID=1
kafka:
image: wurstmeister/kafka:latest
container_name: kafka
depends_on:
- zookeeper
ports:
- 9092:9092
- 29092:29092
environment:
KAFKA_BROKER_ID: 1
KAFKA_ZOOKEEPER_CONNECT: zookeeper:2181
KAFKA_ADVERTISED_LISTENERS: PLAINTEXT://kafka:9092,PLAINTEXT_HOST://localhost:29092
KAFKA_LISTENERS: PLAINTEXT://kafka:9092,PLAINTEXT_HOST://localhost:29092
KAFKA_LISTENER_SECURITY_PROTOCOL_MAP: PLAINTEXT:PLAINTEXT,PLAINTEXT_HOST:PLAINTEXT
KAFKA_INTER_BROKER_LISTENER_NAME: PLAINTEXT
KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR: 1
KAFKA_AUTO_CREATE_TOPICS_ENABLE: 'true'
KAFKA_MESSAGE_MAX_BYTES: 2000000
#KAFKA_HEAP_OPTS: -Xmx2g
healthcheck:
test: ["CMD", "kafka-topics.sh", "--list", "--bootstrap-server", "kafka:9092"]
start_period: 15s
interval: 30s
timeout: 30s
retries: 45
coordinator:
image: apache/druid:0.23.0
container_name: coordinator
volumes:
- /block/store/druid_shared:/opt/shared
- /block/store/coordinator_var:/opt/druid/var
depends_on:
- zookeeper
- postgres
ports:
- "8081:8081"
command:
- coordinator
env_file:
- environment
broker:
image: apache/druid:0.23.0
container_name: broker
volumes:
- /block/store/broker_var:/opt/druid/var
depends_on:
- zookeeper
- postgres
- coordinator
ports:
- "8082:8082"
command:
- broker
env_file:
- environment
historical:
image: apache/druid:0.23.0
container_name: historical
volumes:
- /block/store/druid_shared:/opt/shared
- /block/store/historical_var:/opt/druid/var
depends_on:
- zookeeper
- postgres
- coordinator
ports:
- "8083:8083"
command:
- historical
env_file:
- environment
middlemanager:
image: apache/druid:0.23.0
container_name: middlemanager
volumes:
- /block/store/druid_shared:/opt/shared
- /block/store/middle_var:/opt/druid/var
depends_on:
- zookeeper
- postgres
- coordinator
ports:
- "8091:8091"
- "8100-8105:8100-8105"
command:
- middleManager
env_file:
- environment
router:
image: apache/druid:0.23.0
container_name: router
volumes:
- /block/store/router_var:/opt/druid/var
depends_on:
- zookeeper
- postgres
- coordinator
ports:
- "8888:8888"
command:
- router
env_file:
- environment
# db:
# #image: pathogen/manticore:kibana
# image: manticoresearch/manticore:dev
# #build:
# # context: ./docker/manticore
# # args:
# # DEV: 1
# restart: always
# ports:
# - 9308
# - 9312
# - 9306
# ulimits:
# nproc: 65535
# nofile:
# soft: 65535
# hard: 65535
# memlock:
# soft: -1
# hard: -1
# environment:
# - MCL=1
# volumes:
# - ./docker/data:/var/lib/manticore
# - ./docker/manticore.conf:/etc/manticoresearch/manticore.conf
tmp:
image: busybox
command: chmod -R 777 /var/run/redis
volumes:
- /var/run/redis
redis:
image: redis
command: redis-server /etc/redis.conf
ulimits:
nproc: 65535
nofile:
soft: 65535
hard: 65535
volumes:
- ${PORTAINER_GIT_DIR}/docker/redis.conf:/etc/redis.conf
- redis_data:/data
volumes_from:
- tmp
healthcheck:
test: "redis-cli -s /var/run/redis/redis.sock ping"
interval: 2s
timeout: 2s
retries: 15
networks: networks:
default: default:
@@ -339,10 +45,4 @@ networks:
name: pathogen name: pathogen
volumes: volumes:
redis_data: {} ssdb_data: {}
superset_home:
external: false
db_home:
external: false
redis:
external: false

22
docker/druid/Dockerfile Normal file
View File

@@ -0,0 +1,22 @@
ARG DRUID_VER=0.23.0
FROM apache/druid:${DRUID_VER} AS druid
FROM ubuntu:bionic
RUN apt-get update && \
apt-get install --yes openjdk-8-jre-headless perl-modules && \
apt-get clean
RUN addgroup --system -gid 1000 druid \
&& adduser --system --uid 1000 --disabled-password --home /opt/druid --shell /bin/bash --group druid
COPY --from=druid --chown=druid:druid /opt/druid /opt/druid
WORKDIR /opt/druid
USER druid
EXPOSE 8888/tcp
EXPOSE 8081/tcp
CMD /opt/druid/bin/start-nano-quickstart

3
docker/turnilo.yaml Normal file
View File

@@ -0,0 +1,3 @@
clusters:
- name: druid
guardDataCubes: true

View File

@@ -1,87 +0,0 @@
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
#
# Java tuning
#DRUID_XMX=1g
#DRUID_XMS=1g
#DRUID_MAXNEWSIZE=250m
#DRUID_NEWSIZE=250m
#DRUID_MAXDIRECTMEMORYSIZE=1g
#druid_emitter_logging_logLevel=debug
#druid_extensions_loadList=["druid-histogram", "druid-datasketches", "druid-lookups-cached-global", "postgresql-metadata-storage", "druid-kafka-indexing-service"]
#druid_zk_service_host=zookeeper
#druid_metadata_storage_host=
#druid_metadata_storage_type=postgresql
#druid_metadata_storage_connector_connectURI=jdbc:postgresql://postgres:5432/druid
#druid_metadata_storage_connector_user=druid
#druid_metadata_storage_connector_password=FoolishPassword
#druid_coordinator_balancer_strategy=cachingCost
#druid_indexer_runner_javaOptsArray=["-server", "-Xmx1g", "-Xms1g", "-XX:MaxDirectMemorySize=3g", "-Duser.timezone=UTC", "-Dfile.encoding=UTF-8", "-Djava.util.logging.manager=org.apache.logging.log4j.jul.LogManager"]
#druid_indexer_fork_property_druid_processing_buffer_sizeBytes=128MiB
#druid_processing_buffer_sizeBytes=268435456 # 256MiB
#druid_storage_type=local
#druid_storage_storageDirectory=/opt/shared/segments
#druid_indexer_logs_type=file
#druid_indexer_logs_directory=/opt/shared/indexing-logs
#druid_processing_numThreads=1
#druid_processing_numMergeBuffers=1
#DRUID_LOG4J=<?xml version="1.0" encoding="UTF-8" ?><Configuration status="WARN"><Appenders><Console name="Console" target="SYSTEM_OUT"><PatternLayout pattern="%d{ISO8601} %p [%t] %c - %m%n"/></Console></Appenders><Loggers><Root level="info"><AppenderRef ref="Console"/></Root><Logger name="org.apache.druid.jetty.RequestLog" additivity="false" level="DEBUG"><AppenderRef ref="Console"/></Logger></Loggers></Configuration>
# Java tuning
#DRUID_XMX=1g
#DRUID_XMS=1g
#DRUID_MAXNEWSIZE=250m
#DRUID_NEWSIZE=250m
#DRUID_MAXDIRECTMEMORYSIZE=6172m
DRUID_SINGLE_NODE_CONF=nano-quickstart
druid_emitter_logging_logLevel=debug
druid_extensions_loadList=["druid-histogram", "druid-datasketches", "druid-lookups-cached-global", "postgresql-metadata-storage", "druid-kafka-indexing-service"]
druid_zk_service_host=zookeeper
druid_metadata_storage_host=
druid_metadata_storage_type=postgresql
druid_metadata_storage_connector_connectURI=jdbc:postgresql://postgres:5432/druid
druid_metadata_storage_connector_user=druid
druid_metadata_storage_connector_password=FoolishPassword
druid_coordinator_balancer_strategy=cachingCost
druid_indexer_runner_javaOptsArray=["-server", "-Xmx1g", "-Xms1g", "-XX:MaxDirectMemorySize=3g", "-Duser.timezone=UTC", "-Dfile.encoding=UTF-8", "-Djava.util.logging.manager=org.apache.logging.log4j.jul.LogManager"]
druid_indexer_fork_property_druid_processing_buffer_sizeBytes=256MiB
druid_storage_type=local
druid_storage_storageDirectory=/opt/shared/segments
druid_indexer_logs_type=file
druid_indexer_logs_directory=/opt/shared/indexing-logs
druid_processing_numThreads=2
druid_processing_numMergeBuffers=2
DRUID_LOG4J=<?xml version="1.0" encoding="UTF-8" ?><Configuration status="WARN"><Appenders><Console name="Console" target="SYSTEM_OUT"><PatternLayout pattern="%d{ISO8601} %p [%t] %c - %m%n"/></Console></Appenders><Loggers><Root level="info"><AppenderRef ref="Console"/></Root><Logger name="org.apache.druid.jetty.RequestLog" additivity="false" level="DEBUG"><AppenderRef ref="Console"/></Logger></Loggers></Configuration>

View File

View File

@@ -67,7 +67,7 @@ def parsemeta(numName, c):
def queue_message(c): def queue_message(c):
message = json.dumps(c) message = json.dumps(c)
main.g.sadd("queue", message) main.g.lpush("queue", message)
def event( def event(

View File

@@ -15,6 +15,7 @@ from concurrent.futures import ProcessPoolExecutor
# For timestamp processing # For timestamp processing
from datetime import datetime from datetime import datetime
from math import ceil from math import ceil
from os import getenv
import orjson import orjson
import regex import regex
@@ -51,6 +52,12 @@ import util
# 4chan schema # 4chan schema
from schemas.ch4_s import ATTRMAP from schemas.ch4_s import ATTRMAP
trues = ("true", "1", "t", True)
MONOLITH_PROCESS_PERFSTATS = (
getenv("MONOLITH_PROCESS_PERFSTATS", "false").lower() in trues
)
CUSTOM_FILTERS = [ CUSTOM_FILTERS = [
lambda x: x.lower(), lambda x: x.lower(),
strip_tags, # strip_tags, #
@@ -267,17 +274,19 @@ def process_data(data):
# Add the mutated message to the return buffer # Add the mutated message to the return buffer
to_store.append(msg) to_store.append(msg)
total_time += (time.process_time() - total_start) * 1000 total_time += (time.process_time() - total_start) * 1000
log.debug("=====================================")
log.debug(f"Sentiment: {sentiment_time}") if MONOLITH_PROCESS_PERFSTATS:
log.debug(f"Regex: {regex_time}") log.debug("=====================================")
log.debug(f"Polyglot: {polyglot_time}") log.debug(f"Sentiment: {sentiment_time}")
log.debug(f"Date: {date_time}") log.debug(f"Regex: {regex_time}")
log.debug(f"NLP: {nlp_time}") log.debug(f"Polyglot: {polyglot_time}")
log.debug(f"Normalise: {normalise_time}") log.debug(f"Date: {date_time}")
log.debug(f"Hash: {hash_time}") log.debug(f"NLP: {nlp_time}")
log.debug(f"Normal2: {normal2_time}") log.debug(f"Normalise: {normalise_time}")
log.debug(f"Soup: {soup_time}") log.debug(f"Hash: {hash_time}")
log.debug(f"Total: {total_time}") log.debug(f"Normal2: {normal2_time}")
log.debug("=====================================") log.debug(f"Soup: {soup_time}")
log.debug(f"Total: {total_time}")
log.debug("=====================================")
return to_store return to_store

View File

@@ -36,7 +36,10 @@ class Ingest(object):
items = [] items = []
# for source in SOURCES: # for source in SOURCES:
# key = f"{KEYPREFIX}{source}" # key = f"{KEYPREFIX}{source}"
chunk = await db.ar.spop(KEYNAME, CHUNK_SIZE) length = await db.ar.llen(KEYNAME)
start_num = length - CHUNK_SIZE
chunk = await db.ar.lrange(KEYNAME, start_num, -1)
# chunk = await db.ar.rpop(KEYNAME, CHUNK_SIZE)
if not chunk: if not chunk:
return return
for item in chunk: for item in chunk: