Add throttling for performance

This commit is contained in:
2025-01-24 12:17:22 +00:00
parent 352909bec0
commit 54ecfbae64
3 changed files with 101 additions and 2 deletions

View File

@@ -8,6 +8,9 @@ import string
# For timing
import time
# For throttling
import psutil
# Squash errors
import warnings
from concurrent.futures import ProcessPoolExecutor
@@ -57,6 +60,9 @@ KEYNAME = "queue"
MONOLITH_PROCESS_PERFSTATS = (
getenv("MONOLITH_PROCESS_PERFSTATS", "false").lower() in trues
)
TARGET_CPU_USAGE = float(os.getenv("MONOLITH_PROCESS_TARGET_CPU_USAGE", 50.0))
SLEEP_INTERVAL = 0.0
CUSTOM_FILTERS = [
lambda x: x.lower(),
@@ -143,6 +149,7 @@ async def spawn_processing_threads(chunk, length):
def process_data(chunk, index, chunk_size):
global SLEEP_INTERVAL
log.debug(f"[{chunk}/{index}] Processing {chunk_size} messages")
to_store = []
@@ -155,11 +162,13 @@ def process_data(chunk, index, chunk_size):
hash_time = 0.0
normal2_time = 0.0
soup_time = 0.0
sleep_time = 0.0
total_time = 0.0
# Initialise sentiment analyser
analyzer = SentimentIntensityAnalyzer()
for msg_index in range(chunk_size):
msg = db.r.rpop(KEYNAME)
if not msg:
@@ -207,7 +216,9 @@ def process_data(chunk, index, chunk_size):
continue
# pass
else:
msg["type"] = "update"
# msg["type"] = "update"
# Fuck it, updates just brew spam
continue
db.r.set(redis_key, hash)
time_took = (time.process_time() - start) * 1000
hash_time += time_took
@@ -289,6 +300,26 @@ def process_data(chunk, index, chunk_size):
to_store.append(msg)
total_time += (time.process_time() - total_start) * 1000
# Dynamic throttling to reduce CPU usage
if msg_index % 5 == 0:
current_cpu_usage = psutil.cpu_percent(interval=0.2)
if current_cpu_usage > TARGET_CPU_USAGE:
SLEEP_INTERVAL += 0.02
if SLEEP_INTERVAL > 0.5:
SLEEP_INTERVAL = 0.5
log.info(
f"CPU {current_cpu_usage}% > {TARGET_CPU_USAGE}%, "
f"=> sleep {SLEEP_INTERVAL:.3f}s"
)
elif current_cpu_usage < TARGET_CPU_USAGE and SLEEP_INTERVAL > 0.01:
SLEEP_INTERVAL -= 0.01
log.info(
f"CPU {current_cpu_usage}% < {TARGET_CPU_USAGE}%, "
f"=> sleep {SLEEP_INTERVAL:.3f}s"
)
time.sleep(SLEEP_INTERVAL)
sleep_time += SLEEP_INTERVAL
if MONOLITH_PROCESS_PERFSTATS:
log.debug("=====================================")
log.debug(f"Chunk: {chunk}")
@@ -303,6 +334,7 @@ def process_data(chunk, index, chunk_size):
log.debug(f"Normal2: {normal2_time}")
log.debug(f"Soup: {soup_time}")
log.debug(f"Total: {total_time}")
log.debug(f"Throttling: {sleep_time}")
log.debug("=====================================")
return to_store