Compare commits

..

308 Commits

Author SHA1 Message Date
808ed18b74 Switch quickstart setting to nano 2022-10-04 20:37:02 +01:00
34e589aa9c Set Superset env file relative to docker directory 2022-10-04 20:30:14 +01:00
cc6340acab Add persistent Redis data store and copy over Druid config to production 2022-10-04 20:26:58 +01:00
7b73229d5a Add Apache Superset and fix Druid resource usage 2022-10-04 20:17:04 +01:00
35ba2cc947 Add postgres config to Metabase 2022-10-02 14:29:40 +01:00
817bfd8835 Time stuff and switch to gensim for tokenisation 2022-10-01 14:46:45 +01:00
40cf0c6430 Remove commented debug code 2022-09-30 07:22:22 +01:00
63081f68b7 Use only one Redis key for the queue to make chunk size more precise for thread allocation 2022-09-30 07:22:22 +01:00
5992498493 Remove ujson 2022-09-30 15:30:34 +01:00
328db4a6da Reformat 2022-09-30 15:23:00 +01:00
c5c834da82 Add config file to Turnilo 2022-09-27 08:30:28 +01:00
a8dbabd85e Implement uvloop 2022-09-23 07:20:30 +01:00
56b5c85fac Print Ingest settings on start 2022-09-23 08:32:29 +01:00
fc7450c33a Make debug output cleaner 2022-09-22 17:39:29 +01:00
0e9a016e2a Fix indexer options 2022-09-22 17:39:18 +01:00
763501d1ee Fix Java variable in indexer parameters 2022-09-22 08:41:59 +01:00
40a215e6ec Decrease memory requirements further and switch Kafka image 2022-09-21 21:11:13 +01:00
7abf9a00cb Set Kafka max heap size 2022-09-21 20:26:05 +01:00
bd3f1ecd53 Set max memory for Metabase 2022-09-21 14:39:11 +01:00
64ebcedd76 Remove debugging code and fix regex substitution 2022-09-21 12:48:54 +01:00
3d293daad3 Change dev container names 2022-09-21 12:09:18 +01:00
00890860c0 Change prod container names 2022-09-21 12:08:29 +01:00
b0efaeef90 Remove prod compose comment 2022-09-21 12:04:54 +01:00
d6d19625f3 Remove commented code for debugging 2022-09-21 10:02:05 +01:00
cf4aa45663 Normalise fields in processing and remove invalid characters 2022-09-21 10:01:12 +01:00
48e4c07959 Make production volumes point to external storage 2022-09-21 10:00:48 +01:00
027c43b60a Don't muddle up the topics when sending Kafka batches 2022-09-20 23:03:02 +01:00
e0803d4934 Document new PROCESS_THREADS setting in example file 2022-09-20 22:43:04 +01:00
6de17063a2 Make CPU threads configurable 2022-09-20 22:29:13 +01:00
2c5133a546 Make performance settings configurable 2022-09-20 22:22:13 +01:00
24929a5fbb Set memory size to 2.5GB 2022-09-08 07:20:30 +01:00
f336d96268 Update DirectMemorySize to be 1.5GB 2022-09-19 21:51:07 +01:00
315e477916 Make MaxDirectMemory 0.5*cores 2022-09-19 19:15:57 +01:00
006677819d Make max memory size 512m 2022-09-19 19:10:33 +01:00
93a0be98ce Further decrease Druid memory requirements 2022-09-19 17:07:15 +01:00
14322f5090 Bump production Kafka healthcheck timeout 2022-09-19 11:18:52 +01:00
d94da5ac5c Decrease production Druid max memory size 2022-09-19 10:51:34 +01:00
a1382ee46d Increase Kafka retries 2022-09-19 10:48:29 +01:00
5e6b962ea8 Change Metabase port 2022-09-18 13:15:10 +01:00
e8dd847b36 Add docker environment file 2022-09-18 13:05:08 +01:00
d68bcfaebd Update production compose 2022-09-18 13:04:08 +01:00
ebfa06e8d6 Reformat comment 2022-09-18 13:02:06 +01:00
3ed382ec13 Implement restricted sources 2022-09-18 13:01:19 +01:00
dab5e81715 Fix merge conflict 2022-09-16 17:45:24 +01:00
143f2a0bf0 Implement sentiment/NLP annotation and optimise processing 2022-09-16 17:09:49 +01:00
4ea77ac543 Properly process Redis buffered messages and ingest into Kafka 2022-09-14 18:32:32 +01:00
fec0d379a6 Ingest into Kafka and queue messages better 2022-09-13 22:17:46 +01:00
3c2adfc16e Implement Apache Druid/Kafka and Metabase 2022-09-13 22:17:32 +01:00
4c6fe87b88 Switch to latest image for dev docker-compose 2022-09-13 09:20:43 +01:00
79a430be04 Begin implementing Apache Druid 2022-09-08 07:20:30 +01:00
baea6aebeb Use stable after all 2022-09-08 07:20:30 +01:00
eaecc5cdbe Switch production image back to dev 2022-09-08 07:20:30 +01:00
764e36ef14 Lower memory requirements to prevent crashes 2022-09-08 07:20:30 +01:00
50a873dbba Set dev image back to the default 2022-09-12 08:43:18 +01:00
21182629b4 Treat text fields as string and try beta Kibana image 2022-09-12 08:27:13 +01:00
dfd71b6c64 Add Mysql port to ports instead of expose 2022-09-10 13:20:06 +01:00
1b0817b047 Expose the Mysql port 2022-09-10 13:16:19 +01:00
0ba4929294 Use dev image of manticore 2022-09-10 12:03:45 +01:00
caded433b7 Remove indexer block to attempt to prevent Manticore DB crash 2022-09-08 07:20:30 +01:00
bf802d7fdf Reformat 2022-09-07 07:20:30 +01:00
89328a827a Raise open files limit for Redis 2022-09-07 07:20:30 +01:00
32249a1d99 Add 4chan update message type to main types 2022-09-07 07:20:30 +01:00
cdd12cd082 Implement threshold writing to Redis and manticore ingesting from Redis 2022-09-07 07:20:30 +01:00
137299fe9e Add config directories to gitignore 2022-09-08 09:45:18 +01:00
2aedcf77a0 Add aioredis 2022-09-08 09:44:27 +01:00
49784dfbe5 Implement ingesting to Redis from Threshold 2022-09-07 07:20:30 +01:00
a6b5348224 Config relative to Git dir 2022-09-05 07:20:30 +01:00
d0fe2baafe Store persistent database elsewhere 2022-09-05 07:20:30 +01:00
e092327932 Improve DB performance with caching 2022-09-05 07:20:30 +01:00
8b9ad05089 Reformat legacy project 2022-09-05 07:20:30 +01:00
6b082adeb2 Merge branch 'threshold' 2022-09-06 12:50:25 +01:00
bd9f9378cf Moved files to subdirectory 2022-09-06 12:50:09 +01:00
62fe03a6cb Increase thread delay time 2022-09-05 07:20:30 +01:00
297bbbe035 Alter schemas and 4chan performance settings 2022-09-05 07:20:30 +01:00
ed7c439b56 Remove some debugging code 2022-09-05 07:20:30 +01:00
ecb8079b5b Change Python to 3.10 2022-09-05 07:20:30 +01:00
6811ce4af5 Update production env file path 2022-09-05 07:20:30 +01:00
e34d281774 Remove development dotenv loading 2022-09-05 07:20:30 +01:00
91e18c60e6 Add debug statement 2022-09-05 07:20:30 +01:00
9c9d49dcd2 Reformat and set the net and channel for 4chan 2022-09-05 07:20:30 +01:00
dcd648e1d2 Make crawler more efficient and implement configurable parameters 2022-09-05 07:20:30 +01:00
318a8ddbd5 Split thread list into chunks to save memory 2022-09-05 07:20:30 +01:00
20e22ae7ca Reformat code 2022-09-04 21:40:04 +01:00
8feccbbf00 Reinstate Redis cache 2022-09-04 21:38:53 +01:00
db46fea550 Run processing in thread 2022-09-04 21:29:00 +01:00
22cef33342 Implement aiohttp 2022-09-04 19:44:25 +01:00
663a26778d Begin implementing aiohttp 2022-09-04 13:47:32 +01:00
36de004ee5 Implement running Discord and 4chan gathering simultaneously 2022-09-02 22:30:45 +01:00
2c3d83fe9a Fix error when no email can be found 2022-08-27 11:19:28 +01:00
d7adffb47f Fix getting first relay when they are not sequential 2022-08-26 22:17:12 +01:00
4f4820818a Log authentication messages 2022-08-16 23:01:42 +01:00
5cc38da00e Implement deduplicating channels 2022-08-16 22:01:35 +01:00
a4dae2a583 Switch to siphash 2022-08-18 07:20:30 +01:00
5f1667869f Re-add fake messages 2022-08-15 19:49:21 +01:00
09a5cd14ad Detect queries if nick and channel are the same 2022-08-15 19:24:42 +01:00
96de70aaf2 Add sinst fetch and fix message send logic 2022-08-15 19:15:12 +01:00
f8c1e952bb Switch debugging statements to trace in ChanKeep 2022-08-15 19:15:00 +01:00
36628e157d Fix query handling and don't send a fake message 2022-08-15 17:59:31 +01:00
aeee745ac9 Only run pingmsg after negative has completed 2022-08-18 07:20:30 +01:00
d795af164f Fix debug statements and amend function names 2022-08-18 07:20:30 +01:00
4acadd3508 Properly format string 2022-08-18 07:20:30 +01:00
5c4904ba56 Improve regPing debugging 2022-08-18 07:20:30 +01:00
4e88b93856 Improve regPing negative handling logic 2022-08-18 07:20:30 +01:00
af1dba5741 Fix double messages and regPing logic 2022-08-18 07:20:30 +01:00
553e2eb2b7 Set the channel limit on connected relays, not active 2022-08-18 07:20:30 +01:00
3dfc6d736a Look before you leap to confirming registrations 2022-08-18 07:20:30 +01:00
7ef76d1424 Fix IRC config mutation 2022-08-18 07:20:30 +01:00
d78600a2f1 Change authentication endpoint 2022-08-18 07:20:30 +01:00
f004bd47af Reorder API endpoints to prevent clashing 2022-08-18 07:20:30 +01:00
fafcff1427 Add more debugging information 2022-08-15 00:39:22 +01:00
e56bd61362 Figure out the channel parsing logic 2022-08-15 00:36:36 +01:00
2b7bd486f1 Pass a list instead of listinfo 2022-08-15 00:29:08 +01:00
a9592a85d0 Fix variable placement 2022-08-15 00:27:16 +01:00
e77c046965 Fix list parsing 2022-08-15 00:26:11 +01:00
7a8cee1431 Fix debugging code in keepChannels 2022-08-15 00:08:11 +01:00
e6527b4f9f Add debugging code in keepChannels 2022-08-15 00:07:29 +01:00
8979a03bbd Subtract one from length of list for indices 2022-08-15 00:04:49 +01:00
f7b84913f2 Lower max_chans to length of LIST if it's shorter 2022-08-15 00:03:12 +01:00
d46c98a211 Reset negative pass status when requesting recheck 2022-08-14 23:58:35 +01:00
d68f0589cb Implement initial WHO loop delay 2022-08-14 20:58:41 +01:00
d9ec68708b Fix getting all unregistered relays 2022-08-14 20:58:30 +01:00
1b77c50552 Blacklist channels we are kicked from 2022-08-14 20:44:04 +01:00
1ce5a8228c Use JSON for sending messages 2022-08-14 16:45:40 +01:00
f6f515b308 Implement API call to register 2022-08-14 16:26:09 +01:00
9864b4e2b5 Convert num to number in registration confirmation 2022-08-14 16:09:32 +01:00
2fdd0cf6b8 Allow current nick substitution in IRC commands 2022-08-14 15:53:18 +01:00
8c809ad444 Fix variable shadowing 2022-08-14 15:43:48 +01:00
2022ab985b Print identification message 2022-08-14 13:51:13 +01:00
b5e78bc4de Implement manual authentication mode 2022-08-14 13:13:05 +01:00
eba2c387f0 Implement API for authentication management actions 2022-08-14 12:43:33 +01:00
5123941c79 More debugging for reg tests and getstr command 2022-08-14 11:41:29 +01:00
6cc07c9171 Add allRelaysActive output to network info 2022-08-14 10:58:28 +01:00
ed1f3cdca7 Add debug statements and only check if network is connected when parting channels 2022-08-14 09:25:54 +01:00
128e005611 Use JSON for joining channels and don't shadow auth variable when getting network info 2022-08-14 09:25:01 +01:00
713e03b66e Make channel deletion endpoint accept JSON 2022-08-14 00:01:14 +01:00
a0761ff1ae LBYL 2022-08-13 23:38:13 +01:00
15523bed96 Add more information to relay API return 2022-08-13 23:36:39 +01:00
653d9ea4f9 Add even more debugging 2022-08-13 23:18:56 +01:00
f1229a76e1 Extra debugging for getting active relays 2022-08-13 23:17:26 +01:00
d4bcbf99e5 Fix typo in module name 2022-08-13 23:14:51 +01:00
e517d04095 Extra debugging for get_first_relay 2022-08-13 23:14:17 +01:00
65697ce8f0 Filter queries more carefully 2022-08-13 22:46:10 +01:00
ab9b0a1c9f Update CHANLIMIT on all instances when set via API 2022-08-13 22:36:52 +01:00
60f7a84383 Add helper to get all active relays 2022-08-13 22:36:18 +01:00
956d328fd3 Implement API endpoint to enable authentication 2022-08-13 22:25:29 +01:00
dcd7fcc3c0 Filter AUTH channel (OFTC fix) 2022-08-13 22:15:50 +01:00
7415ca5556 Use ChanKeep system for joining channels with joinSingle 2022-08-13 21:54:14 +01:00
9780a2dfc8 Fully make use of ECA for multiple channels 2022-08-13 21:40:53 +01:00
c7fa508a38 Return chanlimit for each relay 2022-08-13 21:22:43 +01:00
b83062c34f Check token before attempting to confirm 2022-08-13 20:55:36 +01:00
2e57e0930a Implement API endpoint for provisioning relays 2022-08-13 20:51:31 +01:00
43c5625b3b Implement configurable chanlimit and add more fields about LIST output to Redis 2022-08-13 20:37:21 +01:00
291968fbc7 Implement updating registration via API 2022-08-13 20:36:51 +01:00
dd67e9cc8b Implement ChanKeep without requiring persistent chanlimits on all networks 2022-08-13 19:20:29 +01:00
c145e5cf18 Add some debug statements and statistics for chanlimits 2022-08-13 18:40:13 +01:00
5db0373731 Print message if relay is unauthenticated/disconnected 2022-08-13 14:06:34 +01:00
6c11bbe912 Return relay numbers with channel list 2022-08-13 13:47:42 +01:00
4d543f31ec Add connected status to IRC info return and check when getting active relays 2022-08-13 13:40:33 +01:00
6c92e8e7d9 Reformat code 2022-08-13 13:32:22 +01:00
836e621063 Implement getting LIST information from API 2022-08-13 13:27:20 +01:00
852d62a9c9 Provision relay on creation 2022-08-13 00:18:06 +01:00
ddc9af0ddf Add docstrings to chankeep 2022-08-12 23:53:02 +01:00
edfb3f15eb Implement migrating networks 2022-08-12 23:32:00 +01:00
14967f662c Subtract allocated channel slots from total 2022-08-12 22:31:12 +01:00
0b370fc155 Improve channel allocation and write basic tests for it 2022-08-12 22:27:49 +01:00
9804f30060 Make channel join notification a TRACE 2022-08-12 20:19:39 +01:00
f7d6cec896 Fix email command 2022-08-12 20:19:33 +01:00
b871fea039 Add endpoint to get the bot's nickname 2022-08-09 07:20:30 +01:00
e69ce5090a Properly implement querying with API 2022-08-09 07:20:30 +01:00
813c9baf30 Get our hostname from WHO when we create fake events 2022-08-09 07:20:30 +01:00
220ce976f2 Fire a fake event when we send a message 2022-08-09 07:20:30 +01:00
719f014265 Implement best effort allocation 2022-08-11 21:44:19 +01:00
1ef600a9df Simplify variable names and reformat 2022-08-11 20:51:41 +01:00
b72a0672a5 Use ceil instead of round for relay number rounding 2022-08-11 20:46:44 +01:00
bb3b96e7f7 Expand ECA secondary allocation algorithm 2022-08-11 20:43:34 +01:00
c4db8ec99d Adding more debug statements in ECA system 2022-08-11 20:36:24 +01:00
73b0518a8f Print information about received LIST 2022-08-11 20:32:49 +01:00
571a527f43 Return correct data type for provisioning relays 2022-08-11 20:29:01 +01:00
4c3bab6d96 Simplify is_first_relay 2022-08-11 20:26:19 +01:00
14eb05722c Add even more debugging 2022-08-11 20:21:39 +01:00
11c226833d Add more LIST handling debugging 2022-08-11 20:18:49 +01:00
ea81fc80e3 Don't add 1 to current relays when iterating 2022-08-11 20:13:30 +01:00
8cd22888b7 Add extra debug call for allRelaysActive 2022-08-11 20:12:38 +01:00
ba4b8c7501 Reformat helpers 2022-08-11 20:09:14 +01:00
0666c4a153 Enable debug mode with env vars 2022-08-11 20:09:01 +01:00
2a5e6766be Update IRC template 2022-08-11 19:49:58 +01:00
c983a8e3b6 Allow gaps in relay numbering 2022-08-11 19:22:09 +01:00
a3fe92bea9 Implement deleting networks 2022-08-02 09:01:34 +01:00
9b03485b69 More error handling when joining channels with ChanKeep 2022-08-02 09:01:24 +01:00
98dcb99f90 Implement adding networks 2022-08-01 23:02:20 +01:00
aa68bfd9be Implement requesting channel list for network 2022-08-01 21:38:46 +01:00
f3f717e693 Remove debugging code 2022-08-01 21:31:48 +01:00
864f0904f5 Implement automatic provisioning 2022-08-01 19:34:35 +01:00
b72d3d67a1 Implement updating aliases 2022-08-01 19:05:12 +01:00
96d189290b Implement API endpoint to add next relay 2022-07-29 22:39:08 +01:00
c950bcbd43 Implement deleeting relays and fix adding 2022-07-29 22:11:43 +01:00
4472352785 Reformat code 2022-07-29 17:28:19 +01:00
75f79cf072 Fix joining channels with inactive relays 2022-07-29 17:28:09 +01:00
1ca6d79868 Implement creating relays via the API 2022-07-29 17:27:40 +01:00
33466b90ba Fix Redis config path 2022-07-29 22:22:22 +01:00
659d5b391b Use proper port for SSL listener 2022-07-29 22:22:22 +01:00
6e1dfecc95 Disable RelayAPI by default in stack file 2022-07-29 22:22:22 +01:00
3354a94024 Add stack example to test production 2022-07-29 09:09:08 +01:00
a5b25b2048 Use Git dir to make redis config absolute path 2022-07-29 09:06:13 +01:00
1f51bf2972 Use paths relative to root in production compose 2022-07-29 09:04:18 +01:00
6e41c8dfc0 Switch paths 2022-07-29 09:00:08 +01:00
ce0b26577f Use relative paths 2022-07-29 08:59:02 +01:00
335e602072 Fix redis.conf location in prod compose 2022-07-29 08:48:30 +01:00
1fcc9d6643 Don't pass template directory 2022-07-29 08:35:56 +01:00
1ab9824e95 Fix path issue 2022-07-29 08:32:39 +01:00
47312b04d4 Pass through configuration directories to compose 2022-07-29 08:31:01 +01:00
743c1d6be8 Fix environment variable path on production compose 2022-07-29 08:11:37 +01:00
1b60ec62f6 Properly configure production compose file 2022-07-29 08:02:10 +01:00
94303b1108 Create separate production configuration 2022-07-29 08:01:48 +01:00
219fc8ac35 Remove print statements 2022-07-28 21:30:23 +01:00
c5604c0ca8 Add trailing slash to example directory 2022-07-28 21:29:08 +01:00
f9482cac63 Add Portainer Git directory to env file 2022-07-28 21:27:26 +01:00
a61ba7b9e1 Seamlessly handle nonexistent configurations 2022-07-28 21:11:01 +01:00
b3dce50ce4 Add stack.env file 2022-07-28 19:57:26 +01:00
7eee2ec929 Move env file to example 2022-07-28 19:50:48 +01:00
2ad61e6afa Properly pass environment variables to the process 2022-07-28 19:50:07 +01:00
a598bbab4b Make some addresses and hosts configurable with environment variables 2022-07-28 19:38:37 +01:00
422d3d4cdc Lower compose version 2022-07-28 19:25:15 +01:00
2b4e037b51 Add docker definitions 2022-07-28 19:21:08 +01:00
15583bdaab Implement relay, channel and alias management 2022-07-27 22:03:42 +01:00
8050484b6f Implement editing networks via the API 2022-07-27 08:59:17 +01:00
4f141b976a Implement network and channels view 2022-07-26 22:16:35 +01:00
c302cd25da Implement API endpoint for network listing 2022-07-25 18:05:53 +01:00
24a2f79e8e Don't send to Logstash if it's disabled 2022-07-21 13:40:40 +01:00
8c9ec3ab9c Implement getting number of channels and users 2022-07-21 13:40:18 +01:00
a8d0a7d886 Implement more API functions 2022-07-21 13:40:17 +01:00
e3e150c805 Update config 2022-07-21 13:40:15 +01:00
071d6f4579 Implement API 2022-07-21 13:40:13 +01:00
4a8605626a Begin work on API endpoint 2022-07-21 13:40:11 +01:00
80c016761f Reformat again 2022-07-21 13:40:09 +01:00
7a0e2be66c Remove some legacy code 2022-07-21 13:40:07 +01:00
2fecd98978 Reformat project 2022-07-21 13:40:05 +01:00
4ecb37b179 Reformat and fix circular import 2022-07-21 13:40:03 +01:00
27cafa1def Revert "Reformat project"
This reverts commit 64e3e1160aa76d191740342ab3edc68807f890fb.
2022-07-21 13:40:01 +01:00
da678617d8 Reformat project 2022-07-21 13:39:59 +01:00
4669096fcb Don't attempt secondary registration if it is disabled 2022-07-21 13:39:57 +01:00
404fdb000f Don't attempt to register if it is disabled 2022-07-21 13:39:56 +01:00
2177766d90 Rename time to ts 2022-07-21 13:39:54 +01:00
4734a271a1 Extra error handling around emails 2022-07-21 13:39:52 +01:00
ef3151f34c Make Redis DBs configurable 2022-07-21 13:39:50 +01:00
8442c799be Add Redis DB numbers to configuration 2022-07-21 13:39:48 +01:00
e0f86ec853 Fix provisioning with emails 2022-07-21 13:39:46 +01:00
f88e6dec5a Fix some issues with the default config 2022-07-21 13:39:44 +01:00
4ff111a216 Improve email command 2022-07-21 13:39:43 +01:00
7c855e09c0 Reformat code with pre-commit 2022-07-21 13:39:41 +01:00
Mark Veidemanis
61f6715b20 Start implementing email command 2021-08-25 07:47:54 +00:00
Mark Veidemanis
0854c6d60d Add Logstash file 2021-08-24 20:08:18 +00:00
Mark Veidemanis
5179c43972 Implement modifying emails for aliases 2021-06-06 10:31:13 +00:00
Mark Veidemanis
7439d97c71 Finish Logstash implementation 2021-06-06 10:16:04 +00:00
Mark Veidemanis
391f917b38 Update requirements without versions 2021-06-06 10:13:43 +00:00
2686e4ab04 Merge branch 'master' into datarestructure 2020-11-02 20:18:36 +00:00
08b5dc06f0 Implement relay-independent join 2020-11-02 20:14:02 +00:00
5deb0649fb Don't discard server messages 2020-11-02 20:13:36 +00:00
9959231d50 Use substitutions in registration tests 2020-11-01 22:19:03 +00:00
73e596dac3 Additional error handling for command parsing 2020-11-01 22:18:48 +00:00
be405160e4 Fix bug with reg command 2020-11-01 20:43:51 +00:00
7489512a82 Add example file for blacklist 2020-11-01 19:55:32 +00:00
1f178a20ed Implement channel blacklisting 2020-11-01 19:54:24 +00:00
cb21ad8fca Fix bug with using muser attribute when absent 2020-11-01 19:03:56 +00:00
c10274ccd6 Fix syntax error in reg command 2020-11-01 18:50:17 +00:00
9fd6688892 Implement setting modes in ZNC 2020-11-01 03:39:32 +00:00
f54a448d54 Prepare command loader for reloading commands 2020-11-01 03:38:47 +00:00
fe52561b71 Implement registration at net-level 2020-11-01 03:37:29 +00:00
09405f374e Clarify message output on confirm command 2020-11-01 03:36:23 +00:00
16ab37cc0c Log error when ZNC says a channel can't be joined 2020-10-31 23:58:51 +00:00
fc3a349cb3 Fix registration cancellation bug in regproc 2020-10-31 23:58:03 +00:00
fe86d30155 Fix various bugs and off by one with provisioning 2020-10-31 23:55:11 +00:00
7485bbefd1 Move WHO and NAMES logging to trace 2020-10-31 16:52:00 +00:00
82a98c9539 Don't deduplicate global messages (NICK/QUIT) 2020-10-31 16:51:24 +00:00
45f02c323b Improve authentication detection
Add a negative check in the event we are authenticated and registered,
but not confirmed, as this fools other checks.
2020-10-31 16:49:37 +00:00
bdb3d059e3 Use zero-padded numbers to maximise usuable ports 2020-10-31 00:13:59 +00:00
e403852778 Error checking in testing for registration message 2020-10-31 00:13:09 +00:00
f3dd102096 Deauth bot when disconnected and lowercase user 2020-10-31 00:12:06 +00:00
1fec14d759 Clarify error message to be more helpful 2020-10-31 00:11:28 +00:00
b67eee42c1 Implement another level of logging for tracing 2020-10-31 00:10:33 +00:00
9e6dd5e03d Note that arguments to list are optional 2020-10-31 00:06:35 +00:00
77e8ef4c16 Implement authentication checking on connection 2020-10-28 22:50:12 +00:00
c879caa9d7 Add checks in dedup for time-less messages 2020-10-28 22:46:22 +00:00
db7e5677d3 Fix decoding issue with some Redis keys 2020-10-28 22:30:49 +00:00
f848b5afd6 Provision users with lowercase names 2020-10-28 22:30:04 +00:00
3bc65f8456 Add the time field to some notifications 2020-10-28 22:26:41 +00:00
95ee63e399 Fix circular import in ChanKeep/provisioning modules 2020-10-28 18:38:27 +00:00
a1e045793c Start implementing prefixes 2020-07-09 19:43:47 +01:00
f50a40d207 Fixes to auth detection and message parsing
* don't check authentication if the network doesn't need to
  register
* don't pass through muser for ZNC type messages
* avoid duplicate message for queries containing highlights
* make a copy of the cast for metadata analysis to avoid poisoning it
* set up callback for when the instance is authenticated, so we can
  request a LIST immediately if so desired
* separate out seeding functions to populate CHANLIMIT to ease future
  work involving other options, such as PREFIX
2020-06-07 17:26:53 +01:00
4c08225a50 Remove condition-based monitoring system 2020-06-07 15:31:43 +01:00
11f15ac960 Fix various bugs in the event system
Squash many bugs in the event notification system and simplify the
code.
2020-06-02 21:34:15 +01:00
8103c16253 Fix syntax error in redis query 2020-05-31 21:54:43 +01:00
45070b06e2 Implement authentication detection
* pending command to see which instances have never authenticated
* authcheck command to see which instances are not currently
  authenticated
2020-05-31 21:52:56 +01:00
12db2f349e Add help for pending command 2020-05-31 16:40:51 +01:00
40e1f38508 Add additional error handling in user queries 2020-05-31 13:44:34 +01:00
63c97db12e Function to select and merge IRC network defs 2020-05-31 13:23:09 +01:00
91885170f1 Check registration status before joining channels
Do not join channels if any relay for a network is unregistered.
2020-05-31 13:09:58 +01:00
7c23766763 Allow sending LIST to all networks at once 2020-05-31 13:08:00 +01:00
9e62ac62bc Add confirm command
Confirm command to check which relays need manual
confirmation.
2020-05-31 12:32:12 +01:00
014de9f958 Remove leftover irc.json file 2020-05-30 21:42:26 +01:00
f90f2fdef7 Implement registration and confirmation of nicks 2020-05-30 21:40:10 +01:00
e0549cdd30 Restructure provisioning into fewer functions 2020-05-30 21:37:22 +01:00
a78229a288 Add irc.json to gitignore 2020-05-30 21:35:50 +01:00
918d410927 Fix variable scope in LIST error handling 2020-04-21 23:32:17 +01:00
bc4d5cba8e Separate provisioning into user and auth info 2019-12-28 17:51:03 +00:00
376d1bd911 Add IRC network definitions 2019-12-28 17:50:38 +00:00
778690ae3a Add more comments and remove obsolete code 2019-12-07 16:35:29 +00:00
da3ba4ea8c Add requirements 2019-11-17 19:09:17 +00:00
14 changed files with 497 additions and 346 deletions

3
.gitignore vendored
View File

@@ -158,5 +158,4 @@ cython_debug/
docker/data docker/data
*.pem *.pem
legacy/conf/live/ legacy/conf/live/
legacy/conf/cert/ legacy/conf/cert/
stack.env

View File

@@ -1,20 +0,0 @@
run:
docker-compose -f docker/docker-compose.prod.yml --env-file=stack.env up -d
build:
docker-compose -f docker/docker-compose.prod.yml --env-file=stack.env build
stop:
docker-compose -f docker/docker-compose.prod.yml --env-file=stack.env down
log:
docker-compose -f docker/docker-compose.prod.yml --env-file=stack.env logs -f
run-infra:
docker-compose -f docker/docker-compose.infra.yml --env-file=stack.env up -d
stop-infra:
docker-compose -f docker/docker-compose.infra.yml --env-file=stack.env down
log-infra:
docker-compose -f docker/docker-compose.infra.yml --env-file=stack.env logs -f

18
db.py
View File

@@ -1,28 +1,23 @@
import random import random
from os import getenv
import aioredis import aioredis
import orjson import orjson
import redis
# Kafka # Kafka
from aiokafka import AIOKafkaProducer from aiokafka import AIOKafkaProducer
from redis import StrictRedis
import util import util
trues = ("true", "1", "t", True)
MONOLITH_KAFKA_ENABLED = getenv("MONOLITH_KAFKA_ENABLED", "false").lower() in trues
# KAFKA_TOPIC = "msg" # KAFKA_TOPIC = "msg"
log = util.get_logger("db") log = util.get_logger("db")
# Redis (legacy) # Redis (legacy)
r = redis.from_url("redis://ssdb:1289", db=0) r = StrictRedis(unix_socket_path="/var/run/redis/redis.sock", db=0)
# AIORedis # AIORedis
ar = aioredis.from_url("redis://ssdb:1289", db=0) ar = aioredis.from_url("unix:///var/run/redis/redis.sock", db=0)
TYPES_MAIN = [ TYPES_MAIN = [
"msg", "msg",
@@ -49,9 +44,6 @@ KEYNAME = "queue"
async def store_kafka_batch(data): async def store_kafka_batch(data):
if not MONOLITH_KAFKA_ENABLED:
log.info(f"Not storing Kafka batch of length {len(data)}, Kafka is disabled.")
return
# log.debug(f"Storing Kafka batch of {len(data)} messages") # log.debug(f"Storing Kafka batch of {len(data)} messages")
producer = AIOKafkaProducer(bootstrap_servers="kafka:9092") producer = AIOKafkaProducer(bootstrap_servers="kafka:9092")
await producer.start() await producer.start()
@@ -122,7 +114,7 @@ async def queue_message(msg):
Queue a message on the Redis buffer. Queue a message on the Redis buffer.
""" """
message = orjson.dumps(msg) message = orjson.dumps(msg)
await ar.lpush(KEYNAME, message) await ar.sadd(KEYNAME, message)
async def queue_message_bulk(data): async def queue_message_bulk(data):
@@ -131,4 +123,4 @@ async def queue_message_bulk(data):
""" """
for msg in data: for msg in data:
message = orjson.dumps(msg) message = orjson.dumps(msg)
await ar.lpush(KEYNAME, message) await ar.sadd(KEYNAME, message)

View File

@@ -6,7 +6,7 @@ x-superset-depends-on: &superset-depends-on
- redis_superset - redis_superset
x-superset-volumes: &superset-volumes x-superset-volumes: &superset-volumes
# /app/pythonpath_docker will be appended to the PYTHONPATH in the final container # /app/pythonpath_docker will be appended to the PYTHONPATH in the final container
- ${PORTAINER_GIT_DIR}/docker/superset:/app/docker - ./docker/superset:/app/docker
- superset_home:/app/superset_home - superset_home:/app/superset_home
services: services:
@@ -21,7 +21,7 @@ services:
volumes_from: volumes_from:
- tmp - tmp
depends_on: depends_on:
druid: broker:
condition: service_started condition: service_started
kafka: kafka:
condition: service_healthy condition: service_healthy
@@ -100,7 +100,7 @@ services:
container_name: superset_cache container_name: superset_cache
restart: unless-stopped restart: unless-stopped
volumes: volumes:
- redis_superset:/data - redis:/data
db: db:
env_file: docker/.env-non-dev env_file: docker/.env-non-dev
image: postgres:10 image: postgres:10
@@ -195,116 +195,88 @@ services:
timeout: 30s timeout: 30s
retries: 45 retries: 45
druid: coordinator:
image: pathogen/druid:0.23.0 image: apache/druid:0.23.0
build: ./docker/druid/ container_name: coordinator
container_name: druid
volumes: volumes:
- druid_shared:/opt/shared - druid_shared:/opt/shared
- druid_var:/opt/druid/var - coordinator_var:/opt/druid/var
depends_on: depends_on:
- zookeeper - zookeeper
- postgres - postgres
ports: ports:
- "8081:8081" - "8081:8081"
- "8082:8082" command:
- "8083:8083" - coordinator
- "8888:8888"
env_file: env_file:
- environment - environment
# coordinator: broker:
# #image: apache/druid:0.23.0 image: apache/druid:0.23.0
# image: pathogen/druid:0.23.0 container_name: broker
# build: ./docker/druid/ volumes:
# container_name: coordinator - broker_var:/opt/druid/var
# volumes: depends_on:
# - druid_shared:/opt/shared - zookeeper
# - coordinator_var:/opt/druid/var - postgres
# depends_on: - coordinator
# - zookeeper ports:
# - postgres - "8082:8082"
# ports: command:
# - "8081:8081" - broker
# command: env_file:
# - coordinator - environment
# env_file:
# - environment
# broker: historical:
# #image: apache/druid:0.23.0 image: apache/druid:0.23.0
# image: pathogen/druid:0.23.0 container_name: historical
# build: ./docker/druid/ volumes:
# container_name: broker - druid_shared:/opt/shared
# volumes: - historical_var:/opt/druid/var
# - broker_var:/opt/druid/var depends_on:
# depends_on: - zookeeper
# - zookeeper - postgres
# - postgres - coordinator
# - coordinator ports:
# ports: - "8083:8083"
# - "8082:8082" command:
# command: - historical
# - broker env_file:
# env_file: - environment
# - environment
# historical: middlemanager:
# #image: apache/druid:0.23.0 image: apache/druid:0.23.0
# image: pathogen/druid:0.23.0 container_name: middlemanager
# build: ./docker/druid/ volumes:
# container_name: historical - druid_shared:/opt/shared
# volumes: - middle_var:/opt/druid/var
# - druid_shared:/opt/shared depends_on:
# - historical_var:/opt/druid/var - zookeeper
# depends_on: - postgres
# - zookeeper - coordinator
# - postgres ports:
# - coordinator - "8091:8091"
# ports: - "8100-8105:8100-8105"
# - "8083:8083" command:
# command: - middleManager
# - historical env_file:
# env_file: - environment
# - environment
# middlemanager: router:
# #image: apache/druid:0.23.0 image: apache/druid:0.23.0
# image: pathogen/druid:0.23.0 container_name: router
# build: ./docker/druid/ volumes:
# container_name: middlemanager - router_var:/opt/druid/var
# volumes: depends_on:
# - druid_shared:/opt/shared - zookeeper
# - middle_var:/opt/druid/var - postgres
# depends_on: - coordinator
# - zookeeper ports:
# - postgres - "8888:8888"
# - coordinator command:
# ports: - router
# - "8091:8091" env_file:
# - "8100-8105:8100-8105" - environment
# command:
# - middleManager
# env_file:
# - environment
# router:
# #image: apache/druid:0.23.0
# image: pathogen/druid:0.23.0
# build: ./docker/druid/
# container_name: router
# volumes:
# - router_var:/opt/druid/var
# depends_on:
# - zookeeper
# - postgres
# - coordinator
# ports:
# - "8888:8888"
# command:
# - router
# env_file:
# - environment
# db: # db:
# #image: pathogen/manticore:kibana # #image: pathogen/manticore:kibana
@@ -371,9 +343,9 @@ volumes:
external: false external: false
redis_data: {} redis_data: {}
metadata_data: {} metadata_data: {}
# middle_var: {} middle_var: {}
# historical_var: {} historical_var: {}
# broker_var: {} broker_var: {}
# coordinator_var: {} coordinator_var: {}
druid_var: {} router_var: {}
druid_shared: {} druid_shared: {}

View File

@@ -41,6 +41,6 @@ REDIS_PORT=6379
FLASK_ENV=production FLASK_ENV=production
SUPERSET_ENV=production SUPERSET_ENV=production
SUPERSET_LOAD_EXAMPLES=no SUPERSET_LOAD_EXAMPLES=yes
CYPRESS_CONFIG=false CYPRESS_CONFIG=false
SUPERSET_PORT=8088 SUPERSET_PORT=8088

View File

@@ -1,142 +0,0 @@
version: "2.2"
x-superset-image: &superset-image apache/superset:${TAG:-latest-dev}
x-superset-depends-on: &superset-depends-on
- db
- redis_superset
x-superset-volumes: &superset-volumes
# /app/pythonpath_docker will be appended to the PYTHONPATH in the final container
- ${PORTAINER_GIT_DIR}/docker/superset:/app/docker
- superset_home:/app/superset_home
services:
redis_superset:
image: redis:latest
container_name: superset_cache
restart: unless-stopped
volumes:
- redis:/data
db:
env_file: .env-non-dev
image: postgres:10
container_name: superset_db
restart: unless-stopped
volumes:
- db_home:/var/lib/postgresql/data
superset:
env_file: .env-non-dev
image: *superset-image
container_name: superset_app
command: ["/app/docker/docker-bootstrap.sh", "app-gunicorn"]
user: "root"
restart: unless-stopped
ports:
- 8088:8088
depends_on: *superset-depends-on
volumes: *superset-volumes
superset-init:
image: *superset-image
container_name: superset_init
command: ["/app/docker/docker-init.sh"]
env_file: .env-non-dev
depends_on: *superset-depends-on
user: "root"
volumes: *superset-volumes
superset-worker:
image: *superset-image
container_name: superset_worker
command: ["/app/docker/docker-bootstrap.sh", "worker"]
env_file: .env-non-dev
restart: unless-stopped
depends_on: *superset-depends-on
user: "root"
volumes: *superset-volumes
superset-worker-beat:
image: *superset-image
container_name: superset_worker_beat
command: ["/app/docker/docker-bootstrap.sh", "beat"]
env_file: .env-non-dev
restart: unless-stopped
depends_on: *superset-depends-on
user: "root"
volumes: *superset-volumes
postgres:
container_name: postgres
image: postgres:latest
volumes:
- /block/store/metadata_data:/var/lib/postgresql/data
environment:
- POSTGRES_PASSWORD=FoolishPassword
- POSTGRES_USER=druid
- POSTGRES_DB=druid
# Need 3.5 or later for container nodes
zookeeper:
container_name: zookeeper
image: zookeeper:3.5
ports:
- "2181:2181"
environment:
- ZOO_MY_ID=1
kafka:
image: wurstmeister/kafka:latest
container_name: kafka
depends_on:
- zookeeper
ports:
- 9092:9092
- 29092:29092
environment:
KAFKA_BROKER_ID: 1
KAFKA_ZOOKEEPER_CONNECT: zookeeper:2181
KAFKA_ADVERTISED_LISTENERS: PLAINTEXT://kafka:9092,PLAINTEXT_HOST://localhost:29092
KAFKA_LISTENERS: PLAINTEXT://kafka:9092,PLAINTEXT_HOST://localhost:29092
KAFKA_LISTENER_SECURITY_PROTOCOL_MAP: PLAINTEXT:PLAINTEXT,PLAINTEXT_HOST:PLAINTEXT
KAFKA_INTER_BROKER_LISTENER_NAME: PLAINTEXT
KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR: 1
KAFKA_AUTO_CREATE_TOPICS_ENABLE: 'true'
KAFKA_MESSAGE_MAX_BYTES: 2000000
#KAFKA_HEAP_OPTS: -Xmx2g
healthcheck:
test: ["CMD", "kafka-topics.sh", "--list", "--bootstrap-server", "kafka:9092"]
start_period: 15s
interval: 30s
timeout: 30s
retries: 45
druid:
image: pathogen/druid:0.23.0
build: ./druid/
container_name: druid
volumes:
- /block/store/druid_shared:/opt/shared
- /block/store/druid_var:/opt/druid/var
depends_on:
- zookeeper
- postgres
ports:
- "8081:8081"
- "8082:8082"
- "8083:8083"
- "8888:8888"
env_file:
- environment
networks:
default:
external:
name: pathogen
volumes:
superset_home:
external: false
db_home:
external: false
redis:
external: false

View File

@@ -1,14 +1,44 @@
version: "2.2" version: "2.2"
# volumes:
# metadata_data: {}
# middle_var: {}
# historical_var: {}
# broker_var: {}
# coordinator_var: {}
# router_var: {}
# druid_shared: {}
x-superset-image: &superset-image apache/superset:${TAG:-latest-dev}
x-superset-depends-on: &superset-depends-on
- db
- redis_superset
x-superset-volumes: &superset-volumes
# /app/pythonpath_docker will be appended to the PYTHONPATH in the final container
- ./docker/superset:/app/docker
- superset_home:/app/superset_home
services: services:
app: app:
image: pathogen/monolith:latest image: pathogen/monolith:latest
container_name: monolith container_name: monolith
build: ${PORTAINER_GIT_DIR}/docker build: ./docker
volumes: volumes:
- ${PORTAINER_GIT_DIR}:/code - ${PORTAINER_GIT_DIR}:/code
env_file: env_file:
- ../stack.env - ../stack.env
volumes_from:
- tmp
depends_on:
broker:
condition: service_started
kafka:
condition: service_healthy
tmp:
condition: service_started
redis:
condition: service_healthy
# - db
threshold: threshold:
image: pathogen/threshold:latest image: pathogen/threshold:latest
@@ -17,7 +47,7 @@ services:
volumes: volumes:
- ${PORTAINER_GIT_DIR}:/code - ${PORTAINER_GIT_DIR}:/code
- ${THRESHOLD_CONFIG_DIR}:/code/legacy/conf/live - ${THRESHOLD_CONFIG_DIR}:/code/legacy/conf/live
- ${THRESHOLD_TEMPLATE_DIR}:/code/conf/templates #- ${THRESHOLD_TEMPLATE_DIR}:/code/conf/templates
- ${THRESHOLD_CERT_DIR}:/code/legacy/conf/cert - ${THRESHOLD_CERT_DIR}:/code/legacy/conf/cert
ports: ports:
- "${THRESHOLD_LISTENER_PORT}:${THRESHOLD_LISTENER_PORT}" - "${THRESHOLD_LISTENER_PORT}:${THRESHOLD_LISTENER_PORT}"
@@ -25,19 +55,283 @@ services:
- "${THRESHOLD_API_PORT}:${THRESHOLD_API_PORT}" - "${THRESHOLD_API_PORT}:${THRESHOLD_API_PORT}"
env_file: env_file:
- ../stack.env - ../stack.env
# for development volumes_from:
extra_hosts: - tmp
- "host.docker.internal:host-gateway" depends_on:
tmp:
condition: service_started
redis:
condition: service_healthy
ssdb: # db:
image: tsl0922/ssdb #image: pathogen/manticore:kibana
container_name: ssdb_monolith # image: manticoresearch/manticore:latest
#build:
# context: ./docker/manticore
# args:
# DEV: 1
# restart: always
# turnilo:
# container_name: turnilo
# image: uchhatre/turnilo:latest
# ports:
# - 9093:9090
# environment:
# - DRUID_BROKER_URL=http://broker:8082
# depends_on:
# - broker
# metabase:
# container_name: metabase
# image: metabase/metabase:latest
# ports:
# - 3096:3000
# environment:
# JAVA_OPTS: -Xmx1g
# MB_DB_TYPE: postgres
# MB_DB_DBNAME: metabase
# MB_DB_PORT: 5432
# MB_DB_USER: druid
# MB_DB_PASS: FoolishPassword
# MB_DB_HOST: postgres
# depends_on:
# - broker
redis_superset:
image: redis:latest
container_name: superset_cache
restart: unless-stopped
volumes: volumes:
- ssdb_data:/ssdb/var - redis:/data
db:
env_file: .env-non-dev
image: postgres:10
container_name: superset_db
restart: unless-stopped
volumes:
- db_home:/var/lib/postgresql/data
superset:
env_file: .env-non-dev
image: *superset-image
container_name: superset_app
command: ["/app/docker/docker-bootstrap.sh", "app-gunicorn"]
user: "root"
restart: unless-stopped
ports: ports:
- "1289:1289" - 8088:8088
depends_on: *superset-depends-on
volumes: *superset-volumes
superset-init:
image: *superset-image
container_name: superset_init
command: ["/app/docker/docker-init.sh"]
env_file: .env-non-dev
depends_on: *superset-depends-on
user: "root"
volumes: *superset-volumes
superset-worker:
image: *superset-image
container_name: superset_worker
command: ["/app/docker/docker-bootstrap.sh", "worker"]
env_file: .env-non-dev
restart: unless-stopped
depends_on: *superset-depends-on
user: "root"
volumes: *superset-volumes
superset-worker-beat:
image: *superset-image
container_name: superset_worker_beat
command: ["/app/docker/docker-bootstrap.sh", "beat"]
env_file: .env-non-dev
restart: unless-stopped
depends_on: *superset-depends-on
user: "root"
volumes: *superset-volumes
postgres:
container_name: postgres
image: postgres:latest
volumes:
- /block/store/metadata_data:/var/lib/postgresql/data
environment: environment:
- SSDB_PORT=1289 - POSTGRES_PASSWORD=FoolishPassword
- POSTGRES_USER=druid
- POSTGRES_DB=druid
# Need 3.5 or later for container nodes
zookeeper:
container_name: zookeeper
image: zookeeper:3.5
ports:
- "2181:2181"
environment:
- ZOO_MY_ID=1
kafka:
image: wurstmeister/kafka:latest
container_name: kafka
depends_on:
- zookeeper
ports:
- 9092:9092
- 29092:29092
environment:
KAFKA_BROKER_ID: 1
KAFKA_ZOOKEEPER_CONNECT: zookeeper:2181
KAFKA_ADVERTISED_LISTENERS: PLAINTEXT://kafka:9092,PLAINTEXT_HOST://localhost:29092
KAFKA_LISTENERS: PLAINTEXT://kafka:9092,PLAINTEXT_HOST://localhost:29092
KAFKA_LISTENER_SECURITY_PROTOCOL_MAP: PLAINTEXT:PLAINTEXT,PLAINTEXT_HOST:PLAINTEXT
KAFKA_INTER_BROKER_LISTENER_NAME: PLAINTEXT
KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR: 1
KAFKA_AUTO_CREATE_TOPICS_ENABLE: 'true'
KAFKA_MESSAGE_MAX_BYTES: 2000000
#KAFKA_HEAP_OPTS: -Xmx2g
healthcheck:
test: ["CMD", "kafka-topics.sh", "--list", "--bootstrap-server", "kafka:9092"]
start_period: 15s
interval: 30s
timeout: 30s
retries: 45
coordinator:
image: apache/druid:0.23.0
container_name: coordinator
volumes:
- /block/store/druid_shared:/opt/shared
- /block/store/coordinator_var:/opt/druid/var
depends_on:
- zookeeper
- postgres
ports:
- "8081:8081"
command:
- coordinator
env_file:
- environment
broker:
image: apache/druid:0.23.0
container_name: broker
volumes:
- /block/store/broker_var:/opt/druid/var
depends_on:
- zookeeper
- postgres
- coordinator
ports:
- "8082:8082"
command:
- broker
env_file:
- environment
historical:
image: apache/druid:0.23.0
container_name: historical
volumes:
- /block/store/druid_shared:/opt/shared
- /block/store/historical_var:/opt/druid/var
depends_on:
- zookeeper
- postgres
- coordinator
ports:
- "8083:8083"
command:
- historical
env_file:
- environment
middlemanager:
image: apache/druid:0.23.0
container_name: middlemanager
volumes:
- /block/store/druid_shared:/opt/shared
- /block/store/middle_var:/opt/druid/var
depends_on:
- zookeeper
- postgres
- coordinator
ports:
- "8091:8091"
- "8100-8105:8100-8105"
command:
- middleManager
env_file:
- environment
router:
image: apache/druid:0.23.0
container_name: router
volumes:
- /block/store/router_var:/opt/druid/var
depends_on:
- zookeeper
- postgres
- coordinator
ports:
- "8888:8888"
command:
- router
env_file:
- environment
# db:
# #image: pathogen/manticore:kibana
# image: manticoresearch/manticore:dev
# #build:
# # context: ./docker/manticore
# # args:
# # DEV: 1
# restart: always
# ports:
# - 9308
# - 9312
# - 9306
# ulimits:
# nproc: 65535
# nofile:
# soft: 65535
# hard: 65535
# memlock:
# soft: -1
# hard: -1
# environment:
# - MCL=1
# volumes:
# - ./docker/data:/var/lib/manticore
# - ./docker/manticore.conf:/etc/manticoresearch/manticore.conf
tmp:
image: busybox
command: chmod -R 777 /var/run/redis
volumes:
- /var/run/redis
redis:
image: redis
command: redis-server /etc/redis.conf
ulimits:
nproc: 65535
nofile:
soft: 65535
hard: 65535
volumes:
- ${PORTAINER_GIT_DIR}/docker/redis.conf:/etc/redis.conf
- redis_data:/data
volumes_from:
- tmp
healthcheck:
test: "redis-cli -s /var/run/redis/redis.sock ping"
interval: 2s
timeout: 2s
retries: 15
networks: networks:
default: default:
@@ -45,4 +339,10 @@ networks:
name: pathogen name: pathogen
volumes: volumes:
ssdb_data: {} redis_data: {}
superset_home:
external: false
db_home:
external: false
redis:
external: false

View File

@@ -1,22 +0,0 @@
ARG DRUID_VER=0.23.0
FROM apache/druid:${DRUID_VER} AS druid
FROM ubuntu:bionic
RUN apt-get update && \
apt-get install --yes openjdk-8-jre-headless perl-modules && \
apt-get clean
RUN addgroup --system -gid 1000 druid \
&& adduser --system --uid 1000 --disabled-password --home /opt/druid --shell /bin/bash --group druid
COPY --from=druid --chown=druid:druid /opt/druid /opt/druid
WORKDIR /opt/druid
USER druid
EXPOSE 8888/tcp
EXPOSE 8081/tcp
CMD /opt/druid/bin/start-nano-quickstart

View File

@@ -1,3 +0,0 @@
clusters:
- name: druid
guardDataCubes: true

View File

@@ -0,0 +1,87 @@
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
#
# Java tuning
#DRUID_XMX=1g
#DRUID_XMS=1g
#DRUID_MAXNEWSIZE=250m
#DRUID_NEWSIZE=250m
#DRUID_MAXDIRECTMEMORYSIZE=1g
#druid_emitter_logging_logLevel=debug
#druid_extensions_loadList=["druid-histogram", "druid-datasketches", "druid-lookups-cached-global", "postgresql-metadata-storage", "druid-kafka-indexing-service"]
#druid_zk_service_host=zookeeper
#druid_metadata_storage_host=
#druid_metadata_storage_type=postgresql
#druid_metadata_storage_connector_connectURI=jdbc:postgresql://postgres:5432/druid
#druid_metadata_storage_connector_user=druid
#druid_metadata_storage_connector_password=FoolishPassword
#druid_coordinator_balancer_strategy=cachingCost
#druid_indexer_runner_javaOptsArray=["-server", "-Xmx1g", "-Xms1g", "-XX:MaxDirectMemorySize=3g", "-Duser.timezone=UTC", "-Dfile.encoding=UTF-8", "-Djava.util.logging.manager=org.apache.logging.log4j.jul.LogManager"]
#druid_indexer_fork_property_druid_processing_buffer_sizeBytes=128MiB
#druid_processing_buffer_sizeBytes=268435456 # 256MiB
#druid_storage_type=local
#druid_storage_storageDirectory=/opt/shared/segments
#druid_indexer_logs_type=file
#druid_indexer_logs_directory=/opt/shared/indexing-logs
#druid_processing_numThreads=1
#druid_processing_numMergeBuffers=1
#DRUID_LOG4J=<?xml version="1.0" encoding="UTF-8" ?><Configuration status="WARN"><Appenders><Console name="Console" target="SYSTEM_OUT"><PatternLayout pattern="%d{ISO8601} %p [%t] %c - %m%n"/></Console></Appenders><Loggers><Root level="info"><AppenderRef ref="Console"/></Root><Logger name="org.apache.druid.jetty.RequestLog" additivity="false" level="DEBUG"><AppenderRef ref="Console"/></Logger></Loggers></Configuration>
# Java tuning
#DRUID_XMX=1g
#DRUID_XMS=1g
#DRUID_MAXNEWSIZE=250m
#DRUID_NEWSIZE=250m
#DRUID_MAXDIRECTMEMORYSIZE=6172m
DRUID_SINGLE_NODE_CONF=nano-quickstart
druid_emitter_logging_logLevel=debug
druid_extensions_loadList=["druid-histogram", "druid-datasketches", "druid-lookups-cached-global", "postgresql-metadata-storage", "druid-kafka-indexing-service"]
druid_zk_service_host=zookeeper
druid_metadata_storage_host=
druid_metadata_storage_type=postgresql
druid_metadata_storage_connector_connectURI=jdbc:postgresql://postgres:5432/druid
druid_metadata_storage_connector_user=druid
druid_metadata_storage_connector_password=FoolishPassword
druid_coordinator_balancer_strategy=cachingCost
druid_indexer_runner_javaOptsArray=["-server", "-Xmx1g", "-Xms1g", "-XX:MaxDirectMemorySize=3g", "-Duser.timezone=UTC", "-Dfile.encoding=UTF-8", "-Djava.util.logging.manager=org.apache.logging.log4j.jul.LogManager"]
druid_indexer_fork_property_druid_processing_buffer_sizeBytes=256MiB
druid_storage_type=local
druid_storage_storageDirectory=/opt/shared/segments
druid_indexer_logs_type=file
druid_indexer_logs_directory=/opt/shared/indexing-logs
druid_processing_numThreads=2
druid_processing_numMergeBuffers=2
DRUID_LOG4J=<?xml version="1.0" encoding="UTF-8" ?><Configuration status="WARN"><Appenders><Console name="Console" target="SYSTEM_OUT"><PatternLayout pattern="%d{ISO8601} %p [%t] %c - %m%n"/></Console></Appenders><Loggers><Root level="info"><AppenderRef ref="Console"/></Root><Logger name="org.apache.druid.jetty.RequestLog" additivity="false" level="DEBUG"><AppenderRef ref="Console"/></Logger></Loggers></Configuration>

0
event_log.txt Normal file
View File

View File

@@ -67,7 +67,7 @@ def parsemeta(numName, c):
def queue_message(c): def queue_message(c):
message = json.dumps(c) message = json.dumps(c)
main.g.lpush("queue", message) main.g.sadd("queue", message)
def event( def event(

View File

@@ -15,7 +15,6 @@ from concurrent.futures import ProcessPoolExecutor
# For timestamp processing # For timestamp processing
from datetime import datetime from datetime import datetime
from math import ceil from math import ceil
from os import getenv
import orjson import orjson
import regex import regex
@@ -52,12 +51,6 @@ import util
# 4chan schema # 4chan schema
from schemas.ch4_s import ATTRMAP from schemas.ch4_s import ATTRMAP
trues = ("true", "1", "t", True)
MONOLITH_PROCESS_PERFSTATS = (
getenv("MONOLITH_PROCESS_PERFSTATS", "false").lower() in trues
)
CUSTOM_FILTERS = [ CUSTOM_FILTERS = [
lambda x: x.lower(), lambda x: x.lower(),
strip_tags, # strip_tags, #
@@ -274,19 +267,17 @@ def process_data(data):
# Add the mutated message to the return buffer # Add the mutated message to the return buffer
to_store.append(msg) to_store.append(msg)
total_time += (time.process_time() - total_start) * 1000 total_time += (time.process_time() - total_start) * 1000
log.debug("=====================================")
if MONOLITH_PROCESS_PERFSTATS: log.debug(f"Sentiment: {sentiment_time}")
log.debug("=====================================") log.debug(f"Regex: {regex_time}")
log.debug(f"Sentiment: {sentiment_time}") log.debug(f"Polyglot: {polyglot_time}")
log.debug(f"Regex: {regex_time}") log.debug(f"Date: {date_time}")
log.debug(f"Polyglot: {polyglot_time}") log.debug(f"NLP: {nlp_time}")
log.debug(f"Date: {date_time}") log.debug(f"Normalise: {normalise_time}")
log.debug(f"NLP: {nlp_time}") log.debug(f"Hash: {hash_time}")
log.debug(f"Normalise: {normalise_time}") log.debug(f"Normal2: {normal2_time}")
log.debug(f"Hash: {hash_time}") log.debug(f"Soup: {soup_time}")
log.debug(f"Normal2: {normal2_time}") log.debug(f"Total: {total_time}")
log.debug(f"Soup: {soup_time}") log.debug("=====================================")
log.debug(f"Total: {total_time}")
log.debug("=====================================")
return to_store return to_store

View File

@@ -36,10 +36,7 @@ class Ingest(object):
items = [] items = []
# for source in SOURCES: # for source in SOURCES:
# key = f"{KEYPREFIX}{source}" # key = f"{KEYPREFIX}{source}"
length = await db.ar.llen(KEYNAME) chunk = await db.ar.spop(KEYNAME, CHUNK_SIZE)
start_num = length - CHUNK_SIZE
chunk = await db.ar.lrange(KEYNAME, start_num, -1)
# chunk = await db.ar.rpop(KEYNAME, CHUNK_SIZE)
if not chunk: if not chunk:
return return
for item in chunk: for item in chunk: