Remove some debugging code

2022-09-05 07:20:30 +01:00 · 2022-09-05 07:20:30 +01:00 · ed7c439b56
parent ecb8079b5b
commit ed7c439b56
2 changed files with 0 additions and 16 deletions
--- a/db.py
+++ b/db.py
@ -22,7 +22,6 @@ def store_message(msg):
    Store a message into Manticore
    :param msg: dict
    """
    print("DISCORD MSGLEN", len(msg["msg"]))
    # normalise fields
    for key, value in list(msg.items()):
        if value is None:
@ -52,14 +51,12 @@ def store_message_bulk(data):
    Store a message into Manticore
    :param msg: dict
    """
    print("BULK", len(data))
    if not data:
        return
    # 10000: maximum inserts we can submit to
    # Manticore as of Sept 2022
    split_posts = array_split(data, ceil(len(data) / 10000))
    for messages in split_posts:
        print("PROCESSING SPLIT OF", len(messages), "MESSAGES")
        total = []
        for msg in messages:
            # normalise fields
@ -86,9 +83,6 @@ def store_message_bulk(data):
            print(api_response)
        except ApiException as e:
            print("Exception when calling IndexApi->bulk: %s\n" % e)
        print("FINISHED PROCESSING SPLIT")
    print("BULK FINISH")
 def update_schema():
--- a/sources/ch4.py
+++ b/sources/ch4.py
@ -96,13 +96,10 @@ class Chan4(object):
                    to_get.append((mapped, no))
            self.log.info(f"Got thread list for {mapped}: {len(response)}")
        print("THREAD LIST FULL LEN", len(to_get))
        if not to_get:
            return
        split_threads = array_split(to_get, ceil(len(to_get) / THREADS_CONCURRENT))
        print("SPLIT THREADS INTO", len(split_threads))
        for threads in split_threads:
            print("SUBMITTED THREADS FOR", len(threads))
            await self.get_threads_content(threads)
            await asyncio.sleep(THREADS_DELAY)
        # await self.get_threads_content(to_get)
@ -138,12 +135,9 @@ class Chan4(object):
        # Split into 10,000 chunks
        if not all_posts:
            print("NOTALL POSTS", all_posts)
            return
        threads_per_core = int(len(all_posts) / CPU_THREADS)
        print("THREADS PER CORE", threads_per_core)
        for i in range(CPU_THREADS):
            print("SUBMITTING CORE", i)
            new_dict = {}
            pulled_posts = self.take_items(all_posts, threads_per_core)
            for k, v in pulled_posts:
@ -151,7 +145,6 @@ class Chan4(object):
                    new_dict[k].append(v)
                else:
                    new_dict[k] = [v]
            print("SUBMITTING", len(new_dict), "THREADS")
            await self.handle_posts_thread(new_dict)
        # print("VAL", ceil(len(all_posts) / threads_per_core))
        # split_posts = array_split(all_posts, ceil(len(all_posts) / threads_per_core))
@ -166,7 +159,6 @@ class Chan4(object):
    @asyncio.coroutine
    def handle_posts_thread(self, posts):
        loop = asyncio.get_event_loop()
        print("HANDLE POSTS THREAD", len(posts.keys()))
        yield from loop.run_in_executor(p, self.handle_posts, posts)
    def handle_posts(self, posts):
@ -226,7 +218,6 @@ class Chan4(object):
            try:
                return (mapped, await response.json())
            except:  # noqa
                print("FETCH ERROR")
                return (mapped, None)
    async def bound_fetch(self, sem, url, session, mapped):
@ -235,7 +226,6 @@ class Chan4(object):
            try:
                return await self.fetch(url, session, mapped)
            except:  # noqa
                print("BOUND ERROR")
                return (mapped, None)
    async def api_call(self, methods={}):