Begin implementing RTS

This commit is contained in:
2026-02-17 12:14:29 +00:00
parent dc533f266f
commit 81f05d4263
14 changed files with 484 additions and 268 deletions

View File

@@ -8,10 +8,9 @@ from os import getenv
import aiohttp
from numpy import array_split
import psutil
import db
import util
from perf.throttle import DynamicThrottle
# CONFIGURATION #
@@ -35,6 +34,7 @@ BOARDS = getenv("MONOLITH_CH4_BOARDS", "").split(",")
# CONFIGURATION END #
class Chan4(object):
"""
4chan indexer, crawler and ingester.
@@ -44,7 +44,18 @@ class Chan4(object):
name = self.__class__.__name__
self.log = util.get_logger(name)
self.sleep_interval = 0.0
self.throttle = DynamicThrottle(
target_cpu_usage=TARGET_CPU_USAGE,
sleep_increment=0.01,
sleep_decrement=0.01,
sleep_max=0.1,
sleep_min=0,
psutil_interval=0.1,
log=self.log,
start_increment=False,
use_async=True,
)
self.wait = self.throttle.wait
self.api_endpoint = "https://a.4cdn.org"
# self.boards = ["out", "g", "a", "3", "pol"] #
@@ -65,33 +76,6 @@ class Chan4(object):
self.hash_key = self.hash_key.decode("ascii")
self.log.debug(f"Decoded hash key: {self.hash_key}")
async def dynamic_throttle(self):
"""
Dynamically sleeps before a request if CPU usage is above our target.
Also, if CPU usage is far below the target, reduce the sleep time.
Caps the sleep interval at 0.2s.
Prints CPU usage and sleep interval like process.py.
"""
current_cpu_usage = psutil.cpu_percent(interval=0.2)
if current_cpu_usage > TARGET_CPU_USAGE:
self.sleep_interval += 0.01
if self.sleep_interval > 0.1:
self.sleep_interval = 0.1
self.log.info(
f"CPU {current_cpu_usage}% > {TARGET_CPU_USAGE}%, "
f"=> sleep {self.sleep_interval:.3f}s"
)
elif current_cpu_usage < TARGET_CPU_USAGE and self.sleep_interval > 0.01:
self.sleep_interval -= 0.01
self.log.info(
f"CPU {current_cpu_usage}% < {TARGET_CPU_USAGE}%, "
f"=> sleep {self.sleep_interval:.3f}s"
)
if self.sleep_interval > 0:
await asyncio.sleep(self.sleep_interval)
async def run(self):
if "ALL" in BOARDS:
await self.get_board_list()
@@ -204,7 +188,7 @@ class Chan4(object):
async def bound_fetch(self, sem, url, session, mapped):
# Getter function with semaphore.
async with sem:
await self.dynamic_throttle()
await self.wait()
try:
return await self.fetch(url, session, mapped)
except: # noqa