Begin implementing RTS
This commit is contained in:
@@ -8,10 +8,9 @@ from os import getenv
|
||||
import aiohttp
|
||||
from numpy import array_split
|
||||
|
||||
import psutil
|
||||
|
||||
import db
|
||||
import util
|
||||
from perf.throttle import DynamicThrottle
|
||||
|
||||
# CONFIGURATION #
|
||||
|
||||
@@ -35,6 +34,7 @@ BOARDS = getenv("MONOLITH_CH4_BOARDS", "").split(",")
|
||||
|
||||
# CONFIGURATION END #
|
||||
|
||||
|
||||
class Chan4(object):
|
||||
"""
|
||||
4chan indexer, crawler and ingester.
|
||||
@@ -44,7 +44,18 @@ class Chan4(object):
|
||||
name = self.__class__.__name__
|
||||
self.log = util.get_logger(name)
|
||||
|
||||
self.sleep_interval = 0.0
|
||||
self.throttle = DynamicThrottle(
|
||||
target_cpu_usage=TARGET_CPU_USAGE,
|
||||
sleep_increment=0.01,
|
||||
sleep_decrement=0.01,
|
||||
sleep_max=0.1,
|
||||
sleep_min=0,
|
||||
psutil_interval=0.1,
|
||||
log=self.log,
|
||||
start_increment=False,
|
||||
use_async=True,
|
||||
)
|
||||
self.wait = self.throttle.wait
|
||||
|
||||
self.api_endpoint = "https://a.4cdn.org"
|
||||
# self.boards = ["out", "g", "a", "3", "pol"] #
|
||||
@@ -65,33 +76,6 @@ class Chan4(object):
|
||||
self.hash_key = self.hash_key.decode("ascii")
|
||||
self.log.debug(f"Decoded hash key: {self.hash_key}")
|
||||
|
||||
async def dynamic_throttle(self):
|
||||
"""
|
||||
Dynamically sleeps before a request if CPU usage is above our target.
|
||||
Also, if CPU usage is far below the target, reduce the sleep time.
|
||||
Caps the sleep interval at 0.2s.
|
||||
Prints CPU usage and sleep interval like process.py.
|
||||
"""
|
||||
current_cpu_usage = psutil.cpu_percent(interval=0.2)
|
||||
|
||||
if current_cpu_usage > TARGET_CPU_USAGE:
|
||||
self.sleep_interval += 0.01
|
||||
if self.sleep_interval > 0.1:
|
||||
self.sleep_interval = 0.1
|
||||
self.log.info(
|
||||
f"CPU {current_cpu_usage}% > {TARGET_CPU_USAGE}%, "
|
||||
f"=> sleep {self.sleep_interval:.3f}s"
|
||||
)
|
||||
elif current_cpu_usage < TARGET_CPU_USAGE and self.sleep_interval > 0.01:
|
||||
self.sleep_interval -= 0.01
|
||||
self.log.info(
|
||||
f"CPU {current_cpu_usage}% < {TARGET_CPU_USAGE}%, "
|
||||
f"=> sleep {self.sleep_interval:.3f}s"
|
||||
)
|
||||
|
||||
if self.sleep_interval > 0:
|
||||
await asyncio.sleep(self.sleep_interval)
|
||||
|
||||
async def run(self):
|
||||
if "ALL" in BOARDS:
|
||||
await self.get_board_list()
|
||||
@@ -204,7 +188,7 @@ class Chan4(object):
|
||||
async def bound_fetch(self, sem, url, session, mapped):
|
||||
# Getter function with semaphore.
|
||||
async with sem:
|
||||
await self.dynamic_throttle()
|
||||
await self.wait()
|
||||
try:
|
||||
return await self.fetch(url, session, mapped)
|
||||
except: # noqa
|
||||
|
||||
Reference in New Issue
Block a user