Reformat code
This commit is contained in:
@@ -1,23 +1,22 @@
|
||||
# Python modules can't start with a number...
|
||||
import ujson
|
||||
import asyncio
|
||||
import random
|
||||
import string
|
||||
from concurrent.futures import ProcessPoolExecutor
|
||||
from datetime import datetime
|
||||
|
||||
import aiohttp
|
||||
import ujson
|
||||
from bs4 import BeautifulSoup
|
||||
from siphashc import siphash
|
||||
|
||||
import db
|
||||
import util
|
||||
from schemas.ch4_s import ATTRMAP
|
||||
import aiohttp
|
||||
import asyncio
|
||||
from numpy import array_split
|
||||
from math import ceil
|
||||
|
||||
from concurrent.futures import ProcessPoolExecutor
|
||||
p = ProcessPoolExecutor(10)
|
||||
|
||||
|
||||
class Chan4(object):
|
||||
"""
|
||||
4chan indexer, crawler and ingester.
|
||||
@@ -28,12 +27,12 @@ class Chan4(object):
|
||||
self.log = util.get_logger(name)
|
||||
|
||||
self.api_endpoint = "https://a.4cdn.org"
|
||||
#self.boards = ["out", "g", "a", "3", "pol"] #
|
||||
# self.boards = ["out", "g", "a", "3", "pol"] #
|
||||
self.boards = []
|
||||
self.thread_list = {}
|
||||
|
||||
#self.thread_deferreds = []
|
||||
#self.content_deferreds = []
|
||||
# self.thread_deferreds = []
|
||||
# self.content_deferreds = []
|
||||
|
||||
self.log.info(f"Starting crawler bot to {self.api_endpoint}")
|
||||
|
||||
@@ -82,7 +81,10 @@ class Chan4(object):
|
||||
await self.get_thread_lists(self.boards)
|
||||
|
||||
async def get_threads_content(self, thread_list):
|
||||
thread_urls = {(board, thread): f"{board}/thread/{thread}.json" for board, thread in thread_list}
|
||||
thread_urls = {
|
||||
(board, thread): f"{board}/thread/{thread}.json"
|
||||
for board, thread in thread_list
|
||||
}
|
||||
self.log.debug(f"Getting information for threads: {thread_urls}")
|
||||
responses = await self.api_call(thread_urls)
|
||||
self.log.debug(f"Got information for threads: {thread_urls}")
|
||||
@@ -101,8 +103,8 @@ class Chan4(object):
|
||||
# with futures.ThreadPoolExecutor(max_workers=6) as executor:
|
||||
# print("SUBMITTED THREAD FOR", len(posts))
|
||||
# executor.submit(self.handle_posts, board, thread, posts)
|
||||
#await self.handle_posts(board, thread, response["posts"])
|
||||
#await asyncio.sleep(1)
|
||||
# await self.handle_posts(board, thread, response["posts"])
|
||||
# await asyncio.sleep(1)
|
||||
await self.handle_posts_thread(all_posts)
|
||||
|
||||
@asyncio.coroutine
|
||||
@@ -158,7 +160,7 @@ class Chan4(object):
|
||||
to_store.append(posts[key][index])
|
||||
|
||||
# print({name_map[name]: val for name, val in post.items()})
|
||||
#print(f"Got posts: {len(posts)}")
|
||||
# print(f"Got posts: {len(posts)}")
|
||||
print("HANDLE POSTS DONE")
|
||||
db.store_message_bulk(to_store)
|
||||
print("STORE DB DONE")
|
||||
@@ -167,26 +169,20 @@ class Chan4(object):
|
||||
async with session.get(url) as response:
|
||||
try:
|
||||
return (mapped, await response.json())
|
||||
except:
|
||||
except: # noqa
|
||||
print("FETCH ERROR")
|
||||
return (mapped, None)
|
||||
|
||||
|
||||
async def bound_fetch(self, sem, url, session, mapped):
|
||||
# Getter function with semaphore.
|
||||
async with sem:
|
||||
try:
|
||||
return await self.fetch(url, session, mapped)
|
||||
except:
|
||||
except: # noqa
|
||||
print("BOUND ERROR")
|
||||
return (mapped, None)
|
||||
|
||||
async def api_call(self, methods={}):
|
||||
headers = {
|
||||
"User-Agent": (
|
||||
"Mozilla/5.0 (Windows NT 10.0; rv:68.0) Gecko/20100101 Firefox/68.0"
|
||||
)
|
||||
}
|
||||
tasks = []
|
||||
sem = asyncio.Semaphore(100)
|
||||
connector = aiohttp.TCPConnector(limit=None)
|
||||
@@ -195,8 +191,7 @@ class Chan4(object):
|
||||
url = f"{self.api_endpoint}/{method}"
|
||||
self.log.debug(f"GET {url}")
|
||||
task = asyncio.create_task(self.bound_fetch(sem, url, session, mapped))
|
||||
#task = asyncio.ensure_future(self.bound_fetch(sem, url, session))
|
||||
# task = asyncio.ensure_future(self.bound_fetch(sem, url, session))
|
||||
tasks.append(task)
|
||||
responses = await asyncio.gather(*tasks)
|
||||
return responses
|
||||
|
||||
|
||||
@@ -5,7 +5,6 @@ import discord
|
||||
|
||||
import db
|
||||
import util
|
||||
|
||||
from schemas.dis_s import ATTRMAP
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user