Compare commits

..

308 Commits

Author SHA1 Message Date
808ed18b74 Switch quickstart setting to nano 2022-10-04 20:37:02 +01:00
34e589aa9c Set Superset env file relative to docker directory 2022-10-04 20:30:14 +01:00
cc6340acab Add persistent Redis data store and copy over Druid config to production 2022-10-04 20:26:58 +01:00
7b73229d5a Add Apache Superset and fix Druid resource usage 2022-10-04 20:17:04 +01:00
35ba2cc947 Add postgres config to Metabase 2022-10-02 14:29:40 +01:00
817bfd8835 Time stuff and switch to gensim for tokenisation 2022-10-01 14:46:45 +01:00
40cf0c6430 Remove commented debug code 2022-09-30 07:22:22 +01:00
63081f68b7 Use only one Redis key for the queue to make chunk size more precise for thread allocation 2022-09-30 07:22:22 +01:00
5992498493 Remove ujson 2022-09-30 15:30:34 +01:00
328db4a6da Reformat 2022-09-30 15:23:00 +01:00
c5c834da82 Add config file to Turnilo 2022-09-27 08:30:28 +01:00
a8dbabd85e Implement uvloop 2022-09-23 07:20:30 +01:00
56b5c85fac Print Ingest settings on start 2022-09-23 08:32:29 +01:00
fc7450c33a Make debug output cleaner 2022-09-22 17:39:29 +01:00
0e9a016e2a Fix indexer options 2022-09-22 17:39:18 +01:00
763501d1ee Fix Java variable in indexer parameters 2022-09-22 08:41:59 +01:00
40a215e6ec Decrease memory requirements further and switch Kafka image 2022-09-21 21:11:13 +01:00
7abf9a00cb Set Kafka max heap size 2022-09-21 20:26:05 +01:00
bd3f1ecd53 Set max memory for Metabase 2022-09-21 14:39:11 +01:00
64ebcedd76 Remove debugging code and fix regex substitution 2022-09-21 12:48:54 +01:00
3d293daad3 Change dev container names 2022-09-21 12:09:18 +01:00
00890860c0 Change prod container names 2022-09-21 12:08:29 +01:00
b0efaeef90 Remove prod compose comment 2022-09-21 12:04:54 +01:00
d6d19625f3 Remove commented code for debugging 2022-09-21 10:02:05 +01:00
cf4aa45663 Normalise fields in processing and remove invalid characters 2022-09-21 10:01:12 +01:00
48e4c07959 Make production volumes point to external storage 2022-09-21 10:00:48 +01:00
027c43b60a Don't muddle up the topics when sending Kafka batches 2022-09-20 23:03:02 +01:00
e0803d4934 Document new PROCESS_THREADS setting in example file 2022-09-20 22:43:04 +01:00
6de17063a2 Make CPU threads configurable 2022-09-20 22:29:13 +01:00
2c5133a546 Make performance settings configurable 2022-09-20 22:22:13 +01:00
24929a5fbb Set memory size to 2.5GB 2022-09-08 07:20:30 +01:00
f336d96268 Update DirectMemorySize to be 1.5GB 2022-09-19 21:51:07 +01:00
315e477916 Make MaxDirectMemory 0.5*cores 2022-09-19 19:15:57 +01:00
006677819d Make max memory size 512m 2022-09-19 19:10:33 +01:00
93a0be98ce Further decrease Druid memory requirements 2022-09-19 17:07:15 +01:00
14322f5090 Bump production Kafka healthcheck timeout 2022-09-19 11:18:52 +01:00
d94da5ac5c Decrease production Druid max memory size 2022-09-19 10:51:34 +01:00
a1382ee46d Increase Kafka retries 2022-09-19 10:48:29 +01:00
5e6b962ea8 Change Metabase port 2022-09-18 13:15:10 +01:00
e8dd847b36 Add docker environment file 2022-09-18 13:05:08 +01:00
d68bcfaebd Update production compose 2022-09-18 13:04:08 +01:00
ebfa06e8d6 Reformat comment 2022-09-18 13:02:06 +01:00
3ed382ec13 Implement restricted sources 2022-09-18 13:01:19 +01:00
dab5e81715 Fix merge conflict 2022-09-16 17:45:24 +01:00
143f2a0bf0 Implement sentiment/NLP annotation and optimise processing 2022-09-16 17:09:49 +01:00
4ea77ac543 Properly process Redis buffered messages and ingest into Kafka 2022-09-14 18:32:32 +01:00
fec0d379a6 Ingest into Kafka and queue messages better 2022-09-13 22:17:46 +01:00
3c2adfc16e Implement Apache Druid/Kafka and Metabase 2022-09-13 22:17:32 +01:00
4c6fe87b88 Switch to latest image for dev docker-compose 2022-09-13 09:20:43 +01:00
79a430be04 Begin implementing Apache Druid 2022-09-08 07:20:30 +01:00
baea6aebeb Use stable after all 2022-09-08 07:20:30 +01:00
eaecc5cdbe Switch production image back to dev 2022-09-08 07:20:30 +01:00
764e36ef14 Lower memory requirements to prevent crashes 2022-09-08 07:20:30 +01:00
50a873dbba Set dev image back to the default 2022-09-12 08:43:18 +01:00
21182629b4 Treat text fields as string and try beta Kibana image 2022-09-12 08:27:13 +01:00
dfd71b6c64 Add Mysql port to ports instead of expose 2022-09-10 13:20:06 +01:00
1b0817b047 Expose the Mysql port 2022-09-10 13:16:19 +01:00
0ba4929294 Use dev image of manticore 2022-09-10 12:03:45 +01:00
caded433b7 Remove indexer block to attempt to prevent Manticore DB crash 2022-09-08 07:20:30 +01:00
bf802d7fdf Reformat 2022-09-07 07:20:30 +01:00
89328a827a Raise open files limit for Redis 2022-09-07 07:20:30 +01:00
32249a1d99 Add 4chan update message type to main types 2022-09-07 07:20:30 +01:00
cdd12cd082 Implement threshold writing to Redis and manticore ingesting from Redis 2022-09-07 07:20:30 +01:00
137299fe9e Add config directories to gitignore 2022-09-08 09:45:18 +01:00
2aedcf77a0 Add aioredis 2022-09-08 09:44:27 +01:00
49784dfbe5 Implement ingesting to Redis from Threshold 2022-09-07 07:20:30 +01:00
a6b5348224 Config relative to Git dir 2022-09-05 07:20:30 +01:00
d0fe2baafe Store persistent database elsewhere 2022-09-05 07:20:30 +01:00
e092327932 Improve DB performance with caching 2022-09-05 07:20:30 +01:00
8b9ad05089 Reformat legacy project 2022-09-05 07:20:30 +01:00
6b082adeb2 Merge branch 'threshold' 2022-09-06 12:50:25 +01:00
bd9f9378cf Moved files to subdirectory 2022-09-06 12:50:09 +01:00
62fe03a6cb Increase thread delay time 2022-09-05 07:20:30 +01:00
297bbbe035 Alter schemas and 4chan performance settings 2022-09-05 07:20:30 +01:00
ed7c439b56 Remove some debugging code 2022-09-05 07:20:30 +01:00
ecb8079b5b Change Python to 3.10 2022-09-05 07:20:30 +01:00
6811ce4af5 Update production env file path 2022-09-05 07:20:30 +01:00
e34d281774 Remove development dotenv loading 2022-09-05 07:20:30 +01:00
91e18c60e6 Add debug statement 2022-09-05 07:20:30 +01:00
9c9d49dcd2 Reformat and set the net and channel for 4chan 2022-09-05 07:20:30 +01:00
dcd648e1d2 Make crawler more efficient and implement configurable parameters 2022-09-05 07:20:30 +01:00
318a8ddbd5 Split thread list into chunks to save memory 2022-09-05 07:20:30 +01:00
20e22ae7ca Reformat code 2022-09-04 21:40:04 +01:00
8feccbbf00 Reinstate Redis cache 2022-09-04 21:38:53 +01:00
db46fea550 Run processing in thread 2022-09-04 21:29:00 +01:00
22cef33342 Implement aiohttp 2022-09-04 19:44:25 +01:00
663a26778d Begin implementing aiohttp 2022-09-04 13:47:32 +01:00
36de004ee5 Implement running Discord and 4chan gathering simultaneously 2022-09-02 22:30:45 +01:00
2c3d83fe9a Fix error when no email can be found 2022-08-27 11:19:28 +01:00
d7adffb47f Fix getting first relay when they are not sequential 2022-08-26 22:17:12 +01:00
4f4820818a Log authentication messages 2022-08-16 23:01:42 +01:00
5cc38da00e Implement deduplicating channels 2022-08-16 22:01:35 +01:00
a4dae2a583 Switch to siphash 2022-08-18 07:20:30 +01:00
5f1667869f Re-add fake messages 2022-08-15 19:49:21 +01:00
09a5cd14ad Detect queries if nick and channel are the same 2022-08-15 19:24:42 +01:00
96de70aaf2 Add sinst fetch and fix message send logic 2022-08-15 19:15:12 +01:00
f8c1e952bb Switch debugging statements to trace in ChanKeep 2022-08-15 19:15:00 +01:00
36628e157d Fix query handling and don't send a fake message 2022-08-15 17:59:31 +01:00
aeee745ac9 Only run pingmsg after negative has completed 2022-08-18 07:20:30 +01:00
d795af164f Fix debug statements and amend function names 2022-08-18 07:20:30 +01:00
4acadd3508 Properly format string 2022-08-18 07:20:30 +01:00
5c4904ba56 Improve regPing debugging 2022-08-18 07:20:30 +01:00
4e88b93856 Improve regPing negative handling logic 2022-08-18 07:20:30 +01:00
af1dba5741 Fix double messages and regPing logic 2022-08-18 07:20:30 +01:00
553e2eb2b7 Set the channel limit on connected relays, not active 2022-08-18 07:20:30 +01:00
3dfc6d736a Look before you leap to confirming registrations 2022-08-18 07:20:30 +01:00
7ef76d1424 Fix IRC config mutation 2022-08-18 07:20:30 +01:00
d78600a2f1 Change authentication endpoint 2022-08-18 07:20:30 +01:00
f004bd47af Reorder API endpoints to prevent clashing 2022-08-18 07:20:30 +01:00
fafcff1427 Add more debugging information 2022-08-15 00:39:22 +01:00
e56bd61362 Figure out the channel parsing logic 2022-08-15 00:36:36 +01:00
2b7bd486f1 Pass a list instead of listinfo 2022-08-15 00:29:08 +01:00
a9592a85d0 Fix variable placement 2022-08-15 00:27:16 +01:00
e77c046965 Fix list parsing 2022-08-15 00:26:11 +01:00
7a8cee1431 Fix debugging code in keepChannels 2022-08-15 00:08:11 +01:00
e6527b4f9f Add debugging code in keepChannels 2022-08-15 00:07:29 +01:00
8979a03bbd Subtract one from length of list for indices 2022-08-15 00:04:49 +01:00
f7b84913f2 Lower max_chans to length of LIST if it's shorter 2022-08-15 00:03:12 +01:00
d46c98a211 Reset negative pass status when requesting recheck 2022-08-14 23:58:35 +01:00
d68f0589cb Implement initial WHO loop delay 2022-08-14 20:58:41 +01:00
d9ec68708b Fix getting all unregistered relays 2022-08-14 20:58:30 +01:00
1b77c50552 Blacklist channels we are kicked from 2022-08-14 20:44:04 +01:00
1ce5a8228c Use JSON for sending messages 2022-08-14 16:45:40 +01:00
f6f515b308 Implement API call to register 2022-08-14 16:26:09 +01:00
9864b4e2b5 Convert num to number in registration confirmation 2022-08-14 16:09:32 +01:00
2fdd0cf6b8 Allow current nick substitution in IRC commands 2022-08-14 15:53:18 +01:00
8c809ad444 Fix variable shadowing 2022-08-14 15:43:48 +01:00
2022ab985b Print identification message 2022-08-14 13:51:13 +01:00
b5e78bc4de Implement manual authentication mode 2022-08-14 13:13:05 +01:00
eba2c387f0 Implement API for authentication management actions 2022-08-14 12:43:33 +01:00
5123941c79 More debugging for reg tests and getstr command 2022-08-14 11:41:29 +01:00
6cc07c9171 Add allRelaysActive output to network info 2022-08-14 10:58:28 +01:00
ed1f3cdca7 Add debug statements and only check if network is connected when parting channels 2022-08-14 09:25:54 +01:00
128e005611 Use JSON for joining channels and don't shadow auth variable when getting network info 2022-08-14 09:25:01 +01:00
713e03b66e Make channel deletion endpoint accept JSON 2022-08-14 00:01:14 +01:00
a0761ff1ae LBYL 2022-08-13 23:38:13 +01:00
15523bed96 Add more information to relay API return 2022-08-13 23:36:39 +01:00
653d9ea4f9 Add even more debugging 2022-08-13 23:18:56 +01:00
f1229a76e1 Extra debugging for getting active relays 2022-08-13 23:17:26 +01:00
d4bcbf99e5 Fix typo in module name 2022-08-13 23:14:51 +01:00
e517d04095 Extra debugging for get_first_relay 2022-08-13 23:14:17 +01:00
65697ce8f0 Filter queries more carefully 2022-08-13 22:46:10 +01:00
ab9b0a1c9f Update CHANLIMIT on all instances when set via API 2022-08-13 22:36:52 +01:00
60f7a84383 Add helper to get all active relays 2022-08-13 22:36:18 +01:00
956d328fd3 Implement API endpoint to enable authentication 2022-08-13 22:25:29 +01:00
dcd7fcc3c0 Filter AUTH channel (OFTC fix) 2022-08-13 22:15:50 +01:00
7415ca5556 Use ChanKeep system for joining channels with joinSingle 2022-08-13 21:54:14 +01:00
9780a2dfc8 Fully make use of ECA for multiple channels 2022-08-13 21:40:53 +01:00
c7fa508a38 Return chanlimit for each relay 2022-08-13 21:22:43 +01:00
b83062c34f Check token before attempting to confirm 2022-08-13 20:55:36 +01:00
2e57e0930a Implement API endpoint for provisioning relays 2022-08-13 20:51:31 +01:00
43c5625b3b Implement configurable chanlimit and add more fields about LIST output to Redis 2022-08-13 20:37:21 +01:00
291968fbc7 Implement updating registration via API 2022-08-13 20:36:51 +01:00
dd67e9cc8b Implement ChanKeep without requiring persistent chanlimits on all networks 2022-08-13 19:20:29 +01:00
c145e5cf18 Add some debug statements and statistics for chanlimits 2022-08-13 18:40:13 +01:00
5db0373731 Print message if relay is unauthenticated/disconnected 2022-08-13 14:06:34 +01:00
6c11bbe912 Return relay numbers with channel list 2022-08-13 13:47:42 +01:00
4d543f31ec Add connected status to IRC info return and check when getting active relays 2022-08-13 13:40:33 +01:00
6c92e8e7d9 Reformat code 2022-08-13 13:32:22 +01:00
836e621063 Implement getting LIST information from API 2022-08-13 13:27:20 +01:00
852d62a9c9 Provision relay on creation 2022-08-13 00:18:06 +01:00
ddc9af0ddf Add docstrings to chankeep 2022-08-12 23:53:02 +01:00
edfb3f15eb Implement migrating networks 2022-08-12 23:32:00 +01:00
14967f662c Subtract allocated channel slots from total 2022-08-12 22:31:12 +01:00
0b370fc155 Improve channel allocation and write basic tests for it 2022-08-12 22:27:49 +01:00
9804f30060 Make channel join notification a TRACE 2022-08-12 20:19:39 +01:00
f7d6cec896 Fix email command 2022-08-12 20:19:33 +01:00
b871fea039 Add endpoint to get the bot's nickname 2022-08-09 07:20:30 +01:00
e69ce5090a Properly implement querying with API 2022-08-09 07:20:30 +01:00
813c9baf30 Get our hostname from WHO when we create fake events 2022-08-09 07:20:30 +01:00
220ce976f2 Fire a fake event when we send a message 2022-08-09 07:20:30 +01:00
719f014265 Implement best effort allocation 2022-08-11 21:44:19 +01:00
1ef600a9df Simplify variable names and reformat 2022-08-11 20:51:41 +01:00
b72a0672a5 Use ceil instead of round for relay number rounding 2022-08-11 20:46:44 +01:00
bb3b96e7f7 Expand ECA secondary allocation algorithm 2022-08-11 20:43:34 +01:00
c4db8ec99d Adding more debug statements in ECA system 2022-08-11 20:36:24 +01:00
73b0518a8f Print information about received LIST 2022-08-11 20:32:49 +01:00
571a527f43 Return correct data type for provisioning relays 2022-08-11 20:29:01 +01:00
4c3bab6d96 Simplify is_first_relay 2022-08-11 20:26:19 +01:00
14eb05722c Add even more debugging 2022-08-11 20:21:39 +01:00
11c226833d Add more LIST handling debugging 2022-08-11 20:18:49 +01:00
ea81fc80e3 Don't add 1 to current relays when iterating 2022-08-11 20:13:30 +01:00
8cd22888b7 Add extra debug call for allRelaysActive 2022-08-11 20:12:38 +01:00
ba4b8c7501 Reformat helpers 2022-08-11 20:09:14 +01:00
0666c4a153 Enable debug mode with env vars 2022-08-11 20:09:01 +01:00
2a5e6766be Update IRC template 2022-08-11 19:49:58 +01:00
c983a8e3b6 Allow gaps in relay numbering 2022-08-11 19:22:09 +01:00
a3fe92bea9 Implement deleting networks 2022-08-02 09:01:34 +01:00
9b03485b69 More error handling when joining channels with ChanKeep 2022-08-02 09:01:24 +01:00
98dcb99f90 Implement adding networks 2022-08-01 23:02:20 +01:00
aa68bfd9be Implement requesting channel list for network 2022-08-01 21:38:46 +01:00
f3f717e693 Remove debugging code 2022-08-01 21:31:48 +01:00
864f0904f5 Implement automatic provisioning 2022-08-01 19:34:35 +01:00
b72d3d67a1 Implement updating aliases 2022-08-01 19:05:12 +01:00
96d189290b Implement API endpoint to add next relay 2022-07-29 22:39:08 +01:00
c950bcbd43 Implement deleeting relays and fix adding 2022-07-29 22:11:43 +01:00
4472352785 Reformat code 2022-07-29 17:28:19 +01:00
75f79cf072 Fix joining channels with inactive relays 2022-07-29 17:28:09 +01:00
1ca6d79868 Implement creating relays via the API 2022-07-29 17:27:40 +01:00
33466b90ba Fix Redis config path 2022-07-29 22:22:22 +01:00
659d5b391b Use proper port for SSL listener 2022-07-29 22:22:22 +01:00
6e1dfecc95 Disable RelayAPI by default in stack file 2022-07-29 22:22:22 +01:00
3354a94024 Add stack example to test production 2022-07-29 09:09:08 +01:00
a5b25b2048 Use Git dir to make redis config absolute path 2022-07-29 09:06:13 +01:00
1f51bf2972 Use paths relative to root in production compose 2022-07-29 09:04:18 +01:00
6e41c8dfc0 Switch paths 2022-07-29 09:00:08 +01:00
ce0b26577f Use relative paths 2022-07-29 08:59:02 +01:00
335e602072 Fix redis.conf location in prod compose 2022-07-29 08:48:30 +01:00
1fcc9d6643 Don't pass template directory 2022-07-29 08:35:56 +01:00
1ab9824e95 Fix path issue 2022-07-29 08:32:39 +01:00
47312b04d4 Pass through configuration directories to compose 2022-07-29 08:31:01 +01:00
743c1d6be8 Fix environment variable path on production compose 2022-07-29 08:11:37 +01:00
1b60ec62f6 Properly configure production compose file 2022-07-29 08:02:10 +01:00
94303b1108 Create separate production configuration 2022-07-29 08:01:48 +01:00
219fc8ac35 Remove print statements 2022-07-28 21:30:23 +01:00
c5604c0ca8 Add trailing slash to example directory 2022-07-28 21:29:08 +01:00
f9482cac63 Add Portainer Git directory to env file 2022-07-28 21:27:26 +01:00
a61ba7b9e1 Seamlessly handle nonexistent configurations 2022-07-28 21:11:01 +01:00
b3dce50ce4 Add stack.env file 2022-07-28 19:57:26 +01:00
7eee2ec929 Move env file to example 2022-07-28 19:50:48 +01:00
2ad61e6afa Properly pass environment variables to the process 2022-07-28 19:50:07 +01:00
a598bbab4b Make some addresses and hosts configurable with environment variables 2022-07-28 19:38:37 +01:00
422d3d4cdc Lower compose version 2022-07-28 19:25:15 +01:00
2b4e037b51 Add docker definitions 2022-07-28 19:21:08 +01:00
15583bdaab Implement relay, channel and alias management 2022-07-27 22:03:42 +01:00
8050484b6f Implement editing networks via the API 2022-07-27 08:59:17 +01:00
4f141b976a Implement network and channels view 2022-07-26 22:16:35 +01:00
c302cd25da Implement API endpoint for network listing 2022-07-25 18:05:53 +01:00
24a2f79e8e Don't send to Logstash if it's disabled 2022-07-21 13:40:40 +01:00
8c9ec3ab9c Implement getting number of channels and users 2022-07-21 13:40:18 +01:00
a8d0a7d886 Implement more API functions 2022-07-21 13:40:17 +01:00
e3e150c805 Update config 2022-07-21 13:40:15 +01:00
071d6f4579 Implement API 2022-07-21 13:40:13 +01:00
4a8605626a Begin work on API endpoint 2022-07-21 13:40:11 +01:00
80c016761f Reformat again 2022-07-21 13:40:09 +01:00
7a0e2be66c Remove some legacy code 2022-07-21 13:40:07 +01:00
2fecd98978 Reformat project 2022-07-21 13:40:05 +01:00
4ecb37b179 Reformat and fix circular import 2022-07-21 13:40:03 +01:00
27cafa1def Revert "Reformat project"
This reverts commit 64e3e1160aa76d191740342ab3edc68807f890fb.
2022-07-21 13:40:01 +01:00
da678617d8 Reformat project 2022-07-21 13:39:59 +01:00
4669096fcb Don't attempt secondary registration if it is disabled 2022-07-21 13:39:57 +01:00
404fdb000f Don't attempt to register if it is disabled 2022-07-21 13:39:56 +01:00
2177766d90 Rename time to ts 2022-07-21 13:39:54 +01:00
4734a271a1 Extra error handling around emails 2022-07-21 13:39:52 +01:00
ef3151f34c Make Redis DBs configurable 2022-07-21 13:39:50 +01:00
8442c799be Add Redis DB numbers to configuration 2022-07-21 13:39:48 +01:00
e0f86ec853 Fix provisioning with emails 2022-07-21 13:39:46 +01:00
f88e6dec5a Fix some issues with the default config 2022-07-21 13:39:44 +01:00
4ff111a216 Improve email command 2022-07-21 13:39:43 +01:00
7c855e09c0 Reformat code with pre-commit 2022-07-21 13:39:41 +01:00
Mark Veidemanis
61f6715b20 Start implementing email command 2021-08-25 07:47:54 +00:00
Mark Veidemanis
0854c6d60d Add Logstash file 2021-08-24 20:08:18 +00:00
Mark Veidemanis
5179c43972 Implement modifying emails for aliases 2021-06-06 10:31:13 +00:00
Mark Veidemanis
7439d97c71 Finish Logstash implementation 2021-06-06 10:16:04 +00:00
Mark Veidemanis
391f917b38 Update requirements without versions 2021-06-06 10:13:43 +00:00
2686e4ab04 Merge branch 'master' into datarestructure 2020-11-02 20:18:36 +00:00
08b5dc06f0 Implement relay-independent join 2020-11-02 20:14:02 +00:00
5deb0649fb Don't discard server messages 2020-11-02 20:13:36 +00:00
9959231d50 Use substitutions in registration tests 2020-11-01 22:19:03 +00:00
73e596dac3 Additional error handling for command parsing 2020-11-01 22:18:48 +00:00
be405160e4 Fix bug with reg command 2020-11-01 20:43:51 +00:00
7489512a82 Add example file for blacklist 2020-11-01 19:55:32 +00:00
1f178a20ed Implement channel blacklisting 2020-11-01 19:54:24 +00:00
cb21ad8fca Fix bug with using muser attribute when absent 2020-11-01 19:03:56 +00:00
c10274ccd6 Fix syntax error in reg command 2020-11-01 18:50:17 +00:00
9fd6688892 Implement setting modes in ZNC 2020-11-01 03:39:32 +00:00
f54a448d54 Prepare command loader for reloading commands 2020-11-01 03:38:47 +00:00
fe52561b71 Implement registration at net-level 2020-11-01 03:37:29 +00:00
09405f374e Clarify message output on confirm command 2020-11-01 03:36:23 +00:00
16ab37cc0c Log error when ZNC says a channel can't be joined 2020-10-31 23:58:51 +00:00
fc3a349cb3 Fix registration cancellation bug in regproc 2020-10-31 23:58:03 +00:00
fe86d30155 Fix various bugs and off by one with provisioning 2020-10-31 23:55:11 +00:00
7485bbefd1 Move WHO and NAMES logging to trace 2020-10-31 16:52:00 +00:00
82a98c9539 Don't deduplicate global messages (NICK/QUIT) 2020-10-31 16:51:24 +00:00
45f02c323b Improve authentication detection
Add a negative check in the event we are authenticated and registered,
but not confirmed, as this fools other checks.
2020-10-31 16:49:37 +00:00
bdb3d059e3 Use zero-padded numbers to maximise usuable ports 2020-10-31 00:13:59 +00:00
e403852778 Error checking in testing for registration message 2020-10-31 00:13:09 +00:00
f3dd102096 Deauth bot when disconnected and lowercase user 2020-10-31 00:12:06 +00:00
1fec14d759 Clarify error message to be more helpful 2020-10-31 00:11:28 +00:00
b67eee42c1 Implement another level of logging for tracing 2020-10-31 00:10:33 +00:00
9e6dd5e03d Note that arguments to list are optional 2020-10-31 00:06:35 +00:00
77e8ef4c16 Implement authentication checking on connection 2020-10-28 22:50:12 +00:00
c879caa9d7 Add checks in dedup for time-less messages 2020-10-28 22:46:22 +00:00
db7e5677d3 Fix decoding issue with some Redis keys 2020-10-28 22:30:49 +00:00
f848b5afd6 Provision users with lowercase names 2020-10-28 22:30:04 +00:00
3bc65f8456 Add the time field to some notifications 2020-10-28 22:26:41 +00:00
95ee63e399 Fix circular import in ChanKeep/provisioning modules 2020-10-28 18:38:27 +00:00
a1e045793c Start implementing prefixes 2020-07-09 19:43:47 +01:00
f50a40d207 Fixes to auth detection and message parsing
* don't check authentication if the network doesn't need to
  register
* don't pass through muser for ZNC type messages
* avoid duplicate message for queries containing highlights
* make a copy of the cast for metadata analysis to avoid poisoning it
* set up callback for when the instance is authenticated, so we can
  request a LIST immediately if so desired
* separate out seeding functions to populate CHANLIMIT to ease future
  work involving other options, such as PREFIX
2020-06-07 17:26:53 +01:00
4c08225a50 Remove condition-based monitoring system 2020-06-07 15:31:43 +01:00
11f15ac960 Fix various bugs in the event system
Squash many bugs in the event notification system and simplify the
code.
2020-06-02 21:34:15 +01:00
8103c16253 Fix syntax error in redis query 2020-05-31 21:54:43 +01:00
45070b06e2 Implement authentication detection
* pending command to see which instances have never authenticated
* authcheck command to see which instances are not currently
  authenticated
2020-05-31 21:52:56 +01:00
12db2f349e Add help for pending command 2020-05-31 16:40:51 +01:00
40e1f38508 Add additional error handling in user queries 2020-05-31 13:44:34 +01:00
63c97db12e Function to select and merge IRC network defs 2020-05-31 13:23:09 +01:00
91885170f1 Check registration status before joining channels
Do not join channels if any relay for a network is unregistered.
2020-05-31 13:09:58 +01:00
7c23766763 Allow sending LIST to all networks at once 2020-05-31 13:08:00 +01:00
9e62ac62bc Add confirm command
Confirm command to check which relays need manual
confirmation.
2020-05-31 12:32:12 +01:00
014de9f958 Remove leftover irc.json file 2020-05-30 21:42:26 +01:00
f90f2fdef7 Implement registration and confirmation of nicks 2020-05-30 21:40:10 +01:00
e0549cdd30 Restructure provisioning into fewer functions 2020-05-30 21:37:22 +01:00
a78229a288 Add irc.json to gitignore 2020-05-30 21:35:50 +01:00
918d410927 Fix variable scope in LIST error handling 2020-04-21 23:32:17 +01:00
bc4d5cba8e Separate provisioning into user and auth info 2019-12-28 17:51:03 +00:00
376d1bd911 Add IRC network definitions 2019-12-28 17:50:38 +00:00
778690ae3a Add more comments and remove obsolete code 2019-12-07 16:35:29 +00:00
da3ba4ea8c Add requirements 2019-11-17 19:09:17 +00:00
14 changed files with 497 additions and 346 deletions

3
.gitignore vendored
View File

@@ -158,5 +158,4 @@ cython_debug/
docker/data
*.pem
legacy/conf/live/
legacy/conf/cert/
stack.env
legacy/conf/cert/

View File

@@ -1,20 +0,0 @@
run:
docker-compose -f docker/docker-compose.prod.yml --env-file=stack.env up -d
build:
docker-compose -f docker/docker-compose.prod.yml --env-file=stack.env build
stop:
docker-compose -f docker/docker-compose.prod.yml --env-file=stack.env down
log:
docker-compose -f docker/docker-compose.prod.yml --env-file=stack.env logs -f
run-infra:
docker-compose -f docker/docker-compose.infra.yml --env-file=stack.env up -d
stop-infra:
docker-compose -f docker/docker-compose.infra.yml --env-file=stack.env down
log-infra:
docker-compose -f docker/docker-compose.infra.yml --env-file=stack.env logs -f

18
db.py
View File

@@ -1,28 +1,23 @@
import random
from os import getenv
import aioredis
import orjson
import redis
# Kafka
from aiokafka import AIOKafkaProducer
from redis import StrictRedis
import util
trues = ("true", "1", "t", True)
MONOLITH_KAFKA_ENABLED = getenv("MONOLITH_KAFKA_ENABLED", "false").lower() in trues
# KAFKA_TOPIC = "msg"
log = util.get_logger("db")
# Redis (legacy)
r = redis.from_url("redis://ssdb:1289", db=0)
r = StrictRedis(unix_socket_path="/var/run/redis/redis.sock", db=0)
# AIORedis
ar = aioredis.from_url("redis://ssdb:1289", db=0)
ar = aioredis.from_url("unix:///var/run/redis/redis.sock", db=0)
TYPES_MAIN = [
"msg",
@@ -49,9 +44,6 @@ KEYNAME = "queue"
async def store_kafka_batch(data):
if not MONOLITH_KAFKA_ENABLED:
log.info(f"Not storing Kafka batch of length {len(data)}, Kafka is disabled.")
return
# log.debug(f"Storing Kafka batch of {len(data)} messages")
producer = AIOKafkaProducer(bootstrap_servers="kafka:9092")
await producer.start()
@@ -122,7 +114,7 @@ async def queue_message(msg):
Queue a message on the Redis buffer.
"""
message = orjson.dumps(msg)
await ar.lpush(KEYNAME, message)
await ar.sadd(KEYNAME, message)
async def queue_message_bulk(data):
@@ -131,4 +123,4 @@ async def queue_message_bulk(data):
"""
for msg in data:
message = orjson.dumps(msg)
await ar.lpush(KEYNAME, message)
await ar.sadd(KEYNAME, message)

View File

@@ -6,7 +6,7 @@ x-superset-depends-on: &superset-depends-on
- redis_superset
x-superset-volumes: &superset-volumes
# /app/pythonpath_docker will be appended to the PYTHONPATH in the final container
- ${PORTAINER_GIT_DIR}/docker/superset:/app/docker
- ./docker/superset:/app/docker
- superset_home:/app/superset_home
services:
@@ -21,7 +21,7 @@ services:
volumes_from:
- tmp
depends_on:
druid:
broker:
condition: service_started
kafka:
condition: service_healthy
@@ -100,7 +100,7 @@ services:
container_name: superset_cache
restart: unless-stopped
volumes:
- redis_superset:/data
- redis:/data
db:
env_file: docker/.env-non-dev
image: postgres:10
@@ -195,116 +195,88 @@ services:
timeout: 30s
retries: 45
druid:
image: pathogen/druid:0.23.0
build: ./docker/druid/
container_name: druid
coordinator:
image: apache/druid:0.23.0
container_name: coordinator
volumes:
- druid_shared:/opt/shared
- druid_var:/opt/druid/var
- coordinator_var:/opt/druid/var
depends_on:
- zookeeper
- postgres
ports:
- "8081:8081"
- "8082:8082"
- "8083:8083"
- "8888:8888"
command:
- coordinator
env_file:
- environment
# coordinator:
# #image: apache/druid:0.23.0
# image: pathogen/druid:0.23.0
# build: ./docker/druid/
# container_name: coordinator
# volumes:
# - druid_shared:/opt/shared
# - coordinator_var:/opt/druid/var
# depends_on:
# - zookeeper
# - postgres
# ports:
# - "8081:8081"
# command:
# - coordinator
# env_file:
# - environment
broker:
image: apache/druid:0.23.0
container_name: broker
volumes:
- broker_var:/opt/druid/var
depends_on:
- zookeeper
- postgres
- coordinator
ports:
- "8082:8082"
command:
- broker
env_file:
- environment
# broker:
# #image: apache/druid:0.23.0
# image: pathogen/druid:0.23.0
# build: ./docker/druid/
# container_name: broker
# volumes:
# - broker_var:/opt/druid/var
# depends_on:
# - zookeeper
# - postgres
# - coordinator
# ports:
# - "8082:8082"
# command:
# - broker
# env_file:
# - environment
historical:
image: apache/druid:0.23.0
container_name: historical
volumes:
- druid_shared:/opt/shared
- historical_var:/opt/druid/var
depends_on:
- zookeeper
- postgres
- coordinator
ports:
- "8083:8083"
command:
- historical
env_file:
- environment
# historical:
# #image: apache/druid:0.23.0
# image: pathogen/druid:0.23.0
# build: ./docker/druid/
# container_name: historical
# volumes:
# - druid_shared:/opt/shared
# - historical_var:/opt/druid/var
# depends_on:
# - zookeeper
# - postgres
# - coordinator
# ports:
# - "8083:8083"
# command:
# - historical
# env_file:
# - environment
middlemanager:
image: apache/druid:0.23.0
container_name: middlemanager
volumes:
- druid_shared:/opt/shared
- middle_var:/opt/druid/var
depends_on:
- zookeeper
- postgres
- coordinator
ports:
- "8091:8091"
- "8100-8105:8100-8105"
command:
- middleManager
env_file:
- environment
# middlemanager:
# #image: apache/druid:0.23.0
# image: pathogen/druid:0.23.0
# build: ./docker/druid/
# container_name: middlemanager
# volumes:
# - druid_shared:/opt/shared
# - middle_var:/opt/druid/var
# depends_on:
# - zookeeper
# - postgres
# - coordinator
# ports:
# - "8091:8091"
# - "8100-8105:8100-8105"
# command:
# - middleManager
# env_file:
# - environment
# router:
# #image: apache/druid:0.23.0
# image: pathogen/druid:0.23.0
# build: ./docker/druid/
# container_name: router
# volumes:
# - router_var:/opt/druid/var
# depends_on:
# - zookeeper
# - postgres
# - coordinator
# ports:
# - "8888:8888"
# command:
# - router
# env_file:
# - environment
router:
image: apache/druid:0.23.0
container_name: router
volumes:
- router_var:/opt/druid/var
depends_on:
- zookeeper
- postgres
- coordinator
ports:
- "8888:8888"
command:
- router
env_file:
- environment
# db:
# #image: pathogen/manticore:kibana
@@ -371,9 +343,9 @@ volumes:
external: false
redis_data: {}
metadata_data: {}
# middle_var: {}
# historical_var: {}
# broker_var: {}
# coordinator_var: {}
druid_var: {}
middle_var: {}
historical_var: {}
broker_var: {}
coordinator_var: {}
router_var: {}
druid_shared: {}

View File

@@ -41,6 +41,6 @@ REDIS_PORT=6379
FLASK_ENV=production
SUPERSET_ENV=production
SUPERSET_LOAD_EXAMPLES=no
SUPERSET_LOAD_EXAMPLES=yes
CYPRESS_CONFIG=false
SUPERSET_PORT=8088

View File

@@ -1,142 +0,0 @@
version: "2.2"
x-superset-image: &superset-image apache/superset:${TAG:-latest-dev}
x-superset-depends-on: &superset-depends-on
- db
- redis_superset
x-superset-volumes: &superset-volumes
# /app/pythonpath_docker will be appended to the PYTHONPATH in the final container
- ${PORTAINER_GIT_DIR}/docker/superset:/app/docker
- superset_home:/app/superset_home
services:
redis_superset:
image: redis:latest
container_name: superset_cache
restart: unless-stopped
volumes:
- redis:/data
db:
env_file: .env-non-dev
image: postgres:10
container_name: superset_db
restart: unless-stopped
volumes:
- db_home:/var/lib/postgresql/data
superset:
env_file: .env-non-dev
image: *superset-image
container_name: superset_app
command: ["/app/docker/docker-bootstrap.sh", "app-gunicorn"]
user: "root"
restart: unless-stopped
ports:
- 8088:8088
depends_on: *superset-depends-on
volumes: *superset-volumes
superset-init:
image: *superset-image
container_name: superset_init
command: ["/app/docker/docker-init.sh"]
env_file: .env-non-dev
depends_on: *superset-depends-on
user: "root"
volumes: *superset-volumes
superset-worker:
image: *superset-image
container_name: superset_worker
command: ["/app/docker/docker-bootstrap.sh", "worker"]
env_file: .env-non-dev
restart: unless-stopped
depends_on: *superset-depends-on
user: "root"
volumes: *superset-volumes
superset-worker-beat:
image: *superset-image
container_name: superset_worker_beat
command: ["/app/docker/docker-bootstrap.sh", "beat"]
env_file: .env-non-dev
restart: unless-stopped
depends_on: *superset-depends-on
user: "root"
volumes: *superset-volumes
postgres:
container_name: postgres
image: postgres:latest
volumes:
- /block/store/metadata_data:/var/lib/postgresql/data
environment:
- POSTGRES_PASSWORD=FoolishPassword
- POSTGRES_USER=druid
- POSTGRES_DB=druid
# Need 3.5 or later for container nodes
zookeeper:
container_name: zookeeper
image: zookeeper:3.5
ports:
- "2181:2181"
environment:
- ZOO_MY_ID=1
kafka:
image: wurstmeister/kafka:latest
container_name: kafka
depends_on:
- zookeeper
ports:
- 9092:9092
- 29092:29092
environment:
KAFKA_BROKER_ID: 1
KAFKA_ZOOKEEPER_CONNECT: zookeeper:2181
KAFKA_ADVERTISED_LISTENERS: PLAINTEXT://kafka:9092,PLAINTEXT_HOST://localhost:29092
KAFKA_LISTENERS: PLAINTEXT://kafka:9092,PLAINTEXT_HOST://localhost:29092
KAFKA_LISTENER_SECURITY_PROTOCOL_MAP: PLAINTEXT:PLAINTEXT,PLAINTEXT_HOST:PLAINTEXT
KAFKA_INTER_BROKER_LISTENER_NAME: PLAINTEXT
KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR: 1
KAFKA_AUTO_CREATE_TOPICS_ENABLE: 'true'
KAFKA_MESSAGE_MAX_BYTES: 2000000
#KAFKA_HEAP_OPTS: -Xmx2g
healthcheck:
test: ["CMD", "kafka-topics.sh", "--list", "--bootstrap-server", "kafka:9092"]
start_period: 15s
interval: 30s
timeout: 30s
retries: 45
druid:
image: pathogen/druid:0.23.0
build: ./druid/
container_name: druid
volumes:
- /block/store/druid_shared:/opt/shared
- /block/store/druid_var:/opt/druid/var
depends_on:
- zookeeper
- postgres
ports:
- "8081:8081"
- "8082:8082"
- "8083:8083"
- "8888:8888"
env_file:
- environment
networks:
default:
external:
name: pathogen
volumes:
superset_home:
external: false
db_home:
external: false
redis:
external: false

View File

@@ -1,14 +1,44 @@
version: "2.2"
# volumes:
# metadata_data: {}
# middle_var: {}
# historical_var: {}
# broker_var: {}
# coordinator_var: {}
# router_var: {}
# druid_shared: {}
x-superset-image: &superset-image apache/superset:${TAG:-latest-dev}
x-superset-depends-on: &superset-depends-on
- db
- redis_superset
x-superset-volumes: &superset-volumes
# /app/pythonpath_docker will be appended to the PYTHONPATH in the final container
- ./docker/superset:/app/docker
- superset_home:/app/superset_home
services:
app:
image: pathogen/monolith:latest
container_name: monolith
build: ${PORTAINER_GIT_DIR}/docker
build: ./docker
volumes:
- ${PORTAINER_GIT_DIR}:/code
env_file:
- ../stack.env
volumes_from:
- tmp
depends_on:
broker:
condition: service_started
kafka:
condition: service_healthy
tmp:
condition: service_started
redis:
condition: service_healthy
# - db
threshold:
image: pathogen/threshold:latest
@@ -17,7 +47,7 @@ services:
volumes:
- ${PORTAINER_GIT_DIR}:/code
- ${THRESHOLD_CONFIG_DIR}:/code/legacy/conf/live
- ${THRESHOLD_TEMPLATE_DIR}:/code/conf/templates
#- ${THRESHOLD_TEMPLATE_DIR}:/code/conf/templates
- ${THRESHOLD_CERT_DIR}:/code/legacy/conf/cert
ports:
- "${THRESHOLD_LISTENER_PORT}:${THRESHOLD_LISTENER_PORT}"
@@ -25,19 +55,283 @@ services:
- "${THRESHOLD_API_PORT}:${THRESHOLD_API_PORT}"
env_file:
- ../stack.env
# for development
extra_hosts:
- "host.docker.internal:host-gateway"
volumes_from:
- tmp
depends_on:
tmp:
condition: service_started
redis:
condition: service_healthy
ssdb:
image: tsl0922/ssdb
container_name: ssdb_monolith
# db:
#image: pathogen/manticore:kibana
# image: manticoresearch/manticore:latest
#build:
# context: ./docker/manticore
# args:
# DEV: 1
# restart: always
# turnilo:
# container_name: turnilo
# image: uchhatre/turnilo:latest
# ports:
# - 9093:9090
# environment:
# - DRUID_BROKER_URL=http://broker:8082
# depends_on:
# - broker
# metabase:
# container_name: metabase
# image: metabase/metabase:latest
# ports:
# - 3096:3000
# environment:
# JAVA_OPTS: -Xmx1g
# MB_DB_TYPE: postgres
# MB_DB_DBNAME: metabase
# MB_DB_PORT: 5432
# MB_DB_USER: druid
# MB_DB_PASS: FoolishPassword
# MB_DB_HOST: postgres
# depends_on:
# - broker
redis_superset:
image: redis:latest
container_name: superset_cache
restart: unless-stopped
volumes:
- ssdb_data:/ssdb/var
- redis:/data
db:
env_file: .env-non-dev
image: postgres:10
container_name: superset_db
restart: unless-stopped
volumes:
- db_home:/var/lib/postgresql/data
superset:
env_file: .env-non-dev
image: *superset-image
container_name: superset_app
command: ["/app/docker/docker-bootstrap.sh", "app-gunicorn"]
user: "root"
restart: unless-stopped
ports:
- "1289:1289"
- 8088:8088
depends_on: *superset-depends-on
volumes: *superset-volumes
superset-init:
image: *superset-image
container_name: superset_init
command: ["/app/docker/docker-init.sh"]
env_file: .env-non-dev
depends_on: *superset-depends-on
user: "root"
volumes: *superset-volumes
superset-worker:
image: *superset-image
container_name: superset_worker
command: ["/app/docker/docker-bootstrap.sh", "worker"]
env_file: .env-non-dev
restart: unless-stopped
depends_on: *superset-depends-on
user: "root"
volumes: *superset-volumes
superset-worker-beat:
image: *superset-image
container_name: superset_worker_beat
command: ["/app/docker/docker-bootstrap.sh", "beat"]
env_file: .env-non-dev
restart: unless-stopped
depends_on: *superset-depends-on
user: "root"
volumes: *superset-volumes
postgres:
container_name: postgres
image: postgres:latest
volumes:
- /block/store/metadata_data:/var/lib/postgresql/data
environment:
- SSDB_PORT=1289
- POSTGRES_PASSWORD=FoolishPassword
- POSTGRES_USER=druid
- POSTGRES_DB=druid
# Need 3.5 or later for container nodes
zookeeper:
container_name: zookeeper
image: zookeeper:3.5
ports:
- "2181:2181"
environment:
- ZOO_MY_ID=1
kafka:
image: wurstmeister/kafka:latest
container_name: kafka
depends_on:
- zookeeper
ports:
- 9092:9092
- 29092:29092
environment:
KAFKA_BROKER_ID: 1
KAFKA_ZOOKEEPER_CONNECT: zookeeper:2181
KAFKA_ADVERTISED_LISTENERS: PLAINTEXT://kafka:9092,PLAINTEXT_HOST://localhost:29092
KAFKA_LISTENERS: PLAINTEXT://kafka:9092,PLAINTEXT_HOST://localhost:29092
KAFKA_LISTENER_SECURITY_PROTOCOL_MAP: PLAINTEXT:PLAINTEXT,PLAINTEXT_HOST:PLAINTEXT
KAFKA_INTER_BROKER_LISTENER_NAME: PLAINTEXT
KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR: 1
KAFKA_AUTO_CREATE_TOPICS_ENABLE: 'true'
KAFKA_MESSAGE_MAX_BYTES: 2000000
#KAFKA_HEAP_OPTS: -Xmx2g
healthcheck:
test: ["CMD", "kafka-topics.sh", "--list", "--bootstrap-server", "kafka:9092"]
start_period: 15s
interval: 30s
timeout: 30s
retries: 45
coordinator:
image: apache/druid:0.23.0
container_name: coordinator
volumes:
- /block/store/druid_shared:/opt/shared
- /block/store/coordinator_var:/opt/druid/var
depends_on:
- zookeeper
- postgres
ports:
- "8081:8081"
command:
- coordinator
env_file:
- environment
broker:
image: apache/druid:0.23.0
container_name: broker
volumes:
- /block/store/broker_var:/opt/druid/var
depends_on:
- zookeeper
- postgres
- coordinator
ports:
- "8082:8082"
command:
- broker
env_file:
- environment
historical:
image: apache/druid:0.23.0
container_name: historical
volumes:
- /block/store/druid_shared:/opt/shared
- /block/store/historical_var:/opt/druid/var
depends_on:
- zookeeper
- postgres
- coordinator
ports:
- "8083:8083"
command:
- historical
env_file:
- environment
middlemanager:
image: apache/druid:0.23.0
container_name: middlemanager
volumes:
- /block/store/druid_shared:/opt/shared
- /block/store/middle_var:/opt/druid/var
depends_on:
- zookeeper
- postgres
- coordinator
ports:
- "8091:8091"
- "8100-8105:8100-8105"
command:
- middleManager
env_file:
- environment
router:
image: apache/druid:0.23.0
container_name: router
volumes:
- /block/store/router_var:/opt/druid/var
depends_on:
- zookeeper
- postgres
- coordinator
ports:
- "8888:8888"
command:
- router
env_file:
- environment
# db:
# #image: pathogen/manticore:kibana
# image: manticoresearch/manticore:dev
# #build:
# # context: ./docker/manticore
# # args:
# # DEV: 1
# restart: always
# ports:
# - 9308
# - 9312
# - 9306
# ulimits:
# nproc: 65535
# nofile:
# soft: 65535
# hard: 65535
# memlock:
# soft: -1
# hard: -1
# environment:
# - MCL=1
# volumes:
# - ./docker/data:/var/lib/manticore
# - ./docker/manticore.conf:/etc/manticoresearch/manticore.conf
tmp:
image: busybox
command: chmod -R 777 /var/run/redis
volumes:
- /var/run/redis
redis:
image: redis
command: redis-server /etc/redis.conf
ulimits:
nproc: 65535
nofile:
soft: 65535
hard: 65535
volumes:
- ${PORTAINER_GIT_DIR}/docker/redis.conf:/etc/redis.conf
- redis_data:/data
volumes_from:
- tmp
healthcheck:
test: "redis-cli -s /var/run/redis/redis.sock ping"
interval: 2s
timeout: 2s
retries: 15
networks:
default:
@@ -45,4 +339,10 @@ networks:
name: pathogen
volumes:
ssdb_data: {}
redis_data: {}
superset_home:
external: false
db_home:
external: false
redis:
external: false

View File

@@ -1,22 +0,0 @@
ARG DRUID_VER=0.23.0
FROM apache/druid:${DRUID_VER} AS druid
FROM ubuntu:bionic
RUN apt-get update && \
apt-get install --yes openjdk-8-jre-headless perl-modules && \
apt-get clean
RUN addgroup --system -gid 1000 druid \
&& adduser --system --uid 1000 --disabled-password --home /opt/druid --shell /bin/bash --group druid
COPY --from=druid --chown=druid:druid /opt/druid /opt/druid
WORKDIR /opt/druid
USER druid
EXPOSE 8888/tcp
EXPOSE 8081/tcp
CMD /opt/druid/bin/start-nano-quickstart

View File

@@ -1,3 +0,0 @@
clusters:
- name: druid
guardDataCubes: true

View File

@@ -0,0 +1,87 @@
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
#
# Java tuning
#DRUID_XMX=1g
#DRUID_XMS=1g
#DRUID_MAXNEWSIZE=250m
#DRUID_NEWSIZE=250m
#DRUID_MAXDIRECTMEMORYSIZE=1g
#druid_emitter_logging_logLevel=debug
#druid_extensions_loadList=["druid-histogram", "druid-datasketches", "druid-lookups-cached-global", "postgresql-metadata-storage", "druid-kafka-indexing-service"]
#druid_zk_service_host=zookeeper
#druid_metadata_storage_host=
#druid_metadata_storage_type=postgresql
#druid_metadata_storage_connector_connectURI=jdbc:postgresql://postgres:5432/druid
#druid_metadata_storage_connector_user=druid
#druid_metadata_storage_connector_password=FoolishPassword
#druid_coordinator_balancer_strategy=cachingCost
#druid_indexer_runner_javaOptsArray=["-server", "-Xmx1g", "-Xms1g", "-XX:MaxDirectMemorySize=3g", "-Duser.timezone=UTC", "-Dfile.encoding=UTF-8", "-Djava.util.logging.manager=org.apache.logging.log4j.jul.LogManager"]
#druid_indexer_fork_property_druid_processing_buffer_sizeBytes=128MiB
#druid_processing_buffer_sizeBytes=268435456 # 256MiB
#druid_storage_type=local
#druid_storage_storageDirectory=/opt/shared/segments
#druid_indexer_logs_type=file
#druid_indexer_logs_directory=/opt/shared/indexing-logs
#druid_processing_numThreads=1
#druid_processing_numMergeBuffers=1
#DRUID_LOG4J=<?xml version="1.0" encoding="UTF-8" ?><Configuration status="WARN"><Appenders><Console name="Console" target="SYSTEM_OUT"><PatternLayout pattern="%d{ISO8601} %p [%t] %c - %m%n"/></Console></Appenders><Loggers><Root level="info"><AppenderRef ref="Console"/></Root><Logger name="org.apache.druid.jetty.RequestLog" additivity="false" level="DEBUG"><AppenderRef ref="Console"/></Logger></Loggers></Configuration>
# Java tuning
#DRUID_XMX=1g
#DRUID_XMS=1g
#DRUID_MAXNEWSIZE=250m
#DRUID_NEWSIZE=250m
#DRUID_MAXDIRECTMEMORYSIZE=6172m
DRUID_SINGLE_NODE_CONF=nano-quickstart
druid_emitter_logging_logLevel=debug
druid_extensions_loadList=["druid-histogram", "druid-datasketches", "druid-lookups-cached-global", "postgresql-metadata-storage", "druid-kafka-indexing-service"]
druid_zk_service_host=zookeeper
druid_metadata_storage_host=
druid_metadata_storage_type=postgresql
druid_metadata_storage_connector_connectURI=jdbc:postgresql://postgres:5432/druid
druid_metadata_storage_connector_user=druid
druid_metadata_storage_connector_password=FoolishPassword
druid_coordinator_balancer_strategy=cachingCost
druid_indexer_runner_javaOptsArray=["-server", "-Xmx1g", "-Xms1g", "-XX:MaxDirectMemorySize=3g", "-Duser.timezone=UTC", "-Dfile.encoding=UTF-8", "-Djava.util.logging.manager=org.apache.logging.log4j.jul.LogManager"]
druid_indexer_fork_property_druid_processing_buffer_sizeBytes=256MiB
druid_storage_type=local
druid_storage_storageDirectory=/opt/shared/segments
druid_indexer_logs_type=file
druid_indexer_logs_directory=/opt/shared/indexing-logs
druid_processing_numThreads=2
druid_processing_numMergeBuffers=2
DRUID_LOG4J=<?xml version="1.0" encoding="UTF-8" ?><Configuration status="WARN"><Appenders><Console name="Console" target="SYSTEM_OUT"><PatternLayout pattern="%d{ISO8601} %p [%t] %c - %m%n"/></Console></Appenders><Loggers><Root level="info"><AppenderRef ref="Console"/></Root><Logger name="org.apache.druid.jetty.RequestLog" additivity="false" level="DEBUG"><AppenderRef ref="Console"/></Logger></Loggers></Configuration>

0
event_log.txt Normal file
View File

View File

@@ -67,7 +67,7 @@ def parsemeta(numName, c):
def queue_message(c):
message = json.dumps(c)
main.g.lpush("queue", message)
main.g.sadd("queue", message)
def event(

View File

@@ -15,7 +15,6 @@ from concurrent.futures import ProcessPoolExecutor
# For timestamp processing
from datetime import datetime
from math import ceil
from os import getenv
import orjson
import regex
@@ -52,12 +51,6 @@ import util
# 4chan schema
from schemas.ch4_s import ATTRMAP
trues = ("true", "1", "t", True)
MONOLITH_PROCESS_PERFSTATS = (
getenv("MONOLITH_PROCESS_PERFSTATS", "false").lower() in trues
)
CUSTOM_FILTERS = [
lambda x: x.lower(),
strip_tags, #
@@ -274,19 +267,17 @@ def process_data(data):
# Add the mutated message to the return buffer
to_store.append(msg)
total_time += (time.process_time() - total_start) * 1000
if MONOLITH_PROCESS_PERFSTATS:
log.debug("=====================================")
log.debug(f"Sentiment: {sentiment_time}")
log.debug(f"Regex: {regex_time}")
log.debug(f"Polyglot: {polyglot_time}")
log.debug(f"Date: {date_time}")
log.debug(f"NLP: {nlp_time}")
log.debug(f"Normalise: {normalise_time}")
log.debug(f"Hash: {hash_time}")
log.debug(f"Normal2: {normal2_time}")
log.debug(f"Soup: {soup_time}")
log.debug(f"Total: {total_time}")
log.debug("=====================================")
log.debug("=====================================")
log.debug(f"Sentiment: {sentiment_time}")
log.debug(f"Regex: {regex_time}")
log.debug(f"Polyglot: {polyglot_time}")
log.debug(f"Date: {date_time}")
log.debug(f"NLP: {nlp_time}")
log.debug(f"Normalise: {normalise_time}")
log.debug(f"Hash: {hash_time}")
log.debug(f"Normal2: {normal2_time}")
log.debug(f"Soup: {soup_time}")
log.debug(f"Total: {total_time}")
log.debug("=====================================")
return to_store

View File

@@ -36,10 +36,7 @@ class Ingest(object):
items = []
# for source in SOURCES:
# key = f"{KEYPREFIX}{source}"
length = await db.ar.llen(KEYNAME)
start_num = length - CHUNK_SIZE
chunk = await db.ar.lrange(KEYNAME, start_num, -1)
# chunk = await db.ar.rpop(KEYNAME, CHUNK_SIZE)
chunk = await db.ar.spop(KEYNAME, CHUNK_SIZE)
if not chunk:
return
for item in chunk: