|
|
|
@ -15,6 +15,7 @@ from concurrent.futures import ProcessPoolExecutor
|
|
|
|
|
# For timestamp processing
|
|
|
|
|
from datetime import datetime
|
|
|
|
|
from math import ceil
|
|
|
|
|
from os import getenv
|
|
|
|
|
|
|
|
|
|
import orjson
|
|
|
|
|
import regex
|
|
|
|
@ -51,6 +52,12 @@ import util
|
|
|
|
|
# 4chan schema
|
|
|
|
|
from schemas.ch4_s import ATTRMAP
|
|
|
|
|
|
|
|
|
|
trues = ("true", "1", "t", True)
|
|
|
|
|
|
|
|
|
|
MONOLITH_PROCESS_PERFSTATS = (
|
|
|
|
|
getenv("MONOLITH_PROCESS_PERFSTATS", "false").lower() in trues
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
CUSTOM_FILTERS = [
|
|
|
|
|
lambda x: x.lower(),
|
|
|
|
|
strip_tags, #
|
|
|
|
@ -267,17 +274,19 @@ def process_data(data):
|
|
|
|
|
# Add the mutated message to the return buffer
|
|
|
|
|
to_store.append(msg)
|
|
|
|
|
total_time += (time.process_time() - total_start) * 1000
|
|
|
|
|
log.debug("=====================================")
|
|
|
|
|
log.debug(f"Sentiment: {sentiment_time}")
|
|
|
|
|
log.debug(f"Regex: {regex_time}")
|
|
|
|
|
log.debug(f"Polyglot: {polyglot_time}")
|
|
|
|
|
log.debug(f"Date: {date_time}")
|
|
|
|
|
log.debug(f"NLP: {nlp_time}")
|
|
|
|
|
log.debug(f"Normalise: {normalise_time}")
|
|
|
|
|
log.debug(f"Hash: {hash_time}")
|
|
|
|
|
log.debug(f"Normal2: {normal2_time}")
|
|
|
|
|
log.debug(f"Soup: {soup_time}")
|
|
|
|
|
log.debug(f"Total: {total_time}")
|
|
|
|
|
log.debug("=====================================")
|
|
|
|
|
|
|
|
|
|
if MONOLITH_PROCESS_PERFSTATS:
|
|
|
|
|
log.debug("=====================================")
|
|
|
|
|
log.debug(f"Sentiment: {sentiment_time}")
|
|
|
|
|
log.debug(f"Regex: {regex_time}")
|
|
|
|
|
log.debug(f"Polyglot: {polyglot_time}")
|
|
|
|
|
log.debug(f"Date: {date_time}")
|
|
|
|
|
log.debug(f"NLP: {nlp_time}")
|
|
|
|
|
log.debug(f"Normalise: {normalise_time}")
|
|
|
|
|
log.debug(f"Hash: {hash_time}")
|
|
|
|
|
log.debug(f"Normal2: {normal2_time}")
|
|
|
|
|
log.debug(f"Soup: {soup_time}")
|
|
|
|
|
log.debug(f"Total: {total_time}")
|
|
|
|
|
log.debug("=====================================")
|
|
|
|
|
|
|
|
|
|
return to_store
|
|
|
|
|