|
|
@ -18,8 +18,8 @@ from attr import attrs, attrib
|
|
|
|
from attr.validators import instance_of, optional
|
|
|
|
from attr.validators import instance_of, optional
|
|
|
|
from typing import TypeVar, Optional, Callable, Union
|
|
|
|
from typing import TypeVar, Optional, Callable, Union
|
|
|
|
|
|
|
|
|
|
|
|
_function = TypeVar('_function', bound=Callable[..., object])
|
|
|
|
_function = TypeVar("_function", bound=Callable[..., object])
|
|
|
|
_exceptions = TypeVar('_exceptions', bound=Optional[Union[type, tuple, list]])
|
|
|
|
_exceptions = TypeVar("_exceptions", bound=Optional[Union[type, tuple, list]])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@attrs
|
|
|
|
@attrs
|
|
|
@ -45,17 +45,17 @@ class ExceptionDecorator:
|
|
|
|
except Exception as caughtException:
|
|
|
|
except Exception as caughtException:
|
|
|
|
import logging
|
|
|
|
import logging
|
|
|
|
|
|
|
|
|
|
|
|
logger = logging.getLogger('webdriver_log')
|
|
|
|
logger = logging.getLogger("webdriver_log")
|
|
|
|
logger.setLevel(logging.ERROR)
|
|
|
|
logger.setLevel(logging.ERROR)
|
|
|
|
handler = logging.FileHandler('.webdriver.log', mode='a+', encoding='utf-8')
|
|
|
|
handler = logging.FileHandler(".webdriver.log", mode="a+", encoding="utf-8")
|
|
|
|
logger.addHandler(handler)
|
|
|
|
logger.addHandler(handler)
|
|
|
|
|
|
|
|
|
|
|
|
if isinstance(self.exception, type):
|
|
|
|
if isinstance(self.exception, type):
|
|
|
|
if not type(caughtException) == self.exception:
|
|
|
|
if not type(caughtException) == self.exception:
|
|
|
|
logger.error(f'unexpected error - {caughtException}')
|
|
|
|
logger.error(f"unexpected error - {caughtException}")
|
|
|
|
else:
|
|
|
|
else:
|
|
|
|
if not type(caughtException) in self.exception:
|
|
|
|
if not type(caughtException) in self.exception:
|
|
|
|
logger.error(f'unexpected error - {caughtException}')
|
|
|
|
logger.error(f"unexpected error - {caughtException}")
|
|
|
|
|
|
|
|
|
|
|
|
return wrapper
|
|
|
|
return wrapper
|
|
|
|
|
|
|
|
|
|
|
@ -89,9 +89,9 @@ class Browser:
|
|
|
|
default_Viewport: dict = attrib(
|
|
|
|
default_Viewport: dict = attrib(
|
|
|
|
validator=instance_of(dict),
|
|
|
|
validator=instance_of(dict),
|
|
|
|
default={
|
|
|
|
default={
|
|
|
|
'defaultViewport': {
|
|
|
|
"defaultViewport": {
|
|
|
|
'width': 500,
|
|
|
|
"width": 500,
|
|
|
|
'height': 1200,
|
|
|
|
"height": 1200,
|
|
|
|
},
|
|
|
|
},
|
|
|
|
},
|
|
|
|
},
|
|
|
|
kw_only=True,
|
|
|
|
kw_only=True,
|
|
|
@ -230,28 +230,28 @@ class RedditScreenshot(Browser, Wait):
|
|
|
|
|
|
|
|
|
|
|
|
await self.click(
|
|
|
|
await self.click(
|
|
|
|
page_instance,
|
|
|
|
page_instance,
|
|
|
|
'//*[contains(@class, \'header-user-dropdown\')]',
|
|
|
|
"//*[contains(@class, 'header-user-dropdown')]",
|
|
|
|
{'timeout': 5000},
|
|
|
|
{"timeout": 5000},
|
|
|
|
)
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
# It's normal not to find it, sometimes there is none :shrug:
|
|
|
|
# It's normal not to find it, sometimes there is none :shrug:
|
|
|
|
await self.click(
|
|
|
|
await self.click(
|
|
|
|
page_instance,
|
|
|
|
page_instance,
|
|
|
|
'//*[contains(text(), \'Settings\')]/ancestor::button[1]',
|
|
|
|
"//*[contains(text(), 'Settings')]/ancestor::button[1]",
|
|
|
|
{'timeout': 5000},
|
|
|
|
{"timeout": 5000},
|
|
|
|
)
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
await self.click(
|
|
|
|
await self.click(
|
|
|
|
page_instance,
|
|
|
|
page_instance,
|
|
|
|
'//*[contains(text(), \'Dark Mode\')]/ancestor::button[1]',
|
|
|
|
"//*[contains(text(), 'Dark Mode')]/ancestor::button[1]",
|
|
|
|
{'timeout': 5000},
|
|
|
|
{"timeout": 5000},
|
|
|
|
)
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
# Closes settings
|
|
|
|
# Closes settings
|
|
|
|
await self.click(
|
|
|
|
await self.click(
|
|
|
|
page_instance,
|
|
|
|
page_instance,
|
|
|
|
'//*[contains(@class, \'header-user-dropdown\')]',
|
|
|
|
"//*[contains(@class, 'header-user-dropdown')]",
|
|
|
|
{'timeout': 5000},
|
|
|
|
{"timeout": 5000},
|
|
|
|
)
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
async def __close_nsfw(
|
|
|
|
async def __close_nsfw(
|
|
|
@ -260,7 +260,7 @@ class RedditScreenshot(Browser, Wait):
|
|
|
|
) -> None:
|
|
|
|
) -> None:
|
|
|
|
from asyncio import ensure_future
|
|
|
|
from asyncio import ensure_future
|
|
|
|
|
|
|
|
|
|
|
|
print_substep('Post is NSFW. You are spicy...')
|
|
|
|
print_substep("Post is NSFW. You are spicy...")
|
|
|
|
# To await indirectly reload
|
|
|
|
# To await indirectly reload
|
|
|
|
navigation = ensure_future(page_instance.waitForNavigation())
|
|
|
|
navigation = ensure_future(page_instance.waitForNavigation())
|
|
|
|
|
|
|
|
|
|
|
@ -268,7 +268,7 @@ class RedditScreenshot(Browser, Wait):
|
|
|
|
await self.click(
|
|
|
|
await self.click(
|
|
|
|
page_instance,
|
|
|
|
page_instance,
|
|
|
|
'//button[text()="Yes"]',
|
|
|
|
'//button[text()="Yes"]',
|
|
|
|
{'timeout': 5000},
|
|
|
|
{"timeout": 5000},
|
|
|
|
)
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
# Await reload
|
|
|
|
# Await reload
|
|
|
@ -277,7 +277,7 @@ class RedditScreenshot(Browser, Wait):
|
|
|
|
await (await self.find_xpath(
|
|
|
|
await (await self.find_xpath(
|
|
|
|
page_instance,
|
|
|
|
page_instance,
|
|
|
|
'//button[text()="Click to see nsfw"]',
|
|
|
|
'//button[text()="Click to see nsfw"]',
|
|
|
|
{'timeout': 5000},
|
|
|
|
{"timeout": 5000},
|
|
|
|
)).click()
|
|
|
|
)).click()
|
|
|
|
|
|
|
|
|
|
|
|
async def __collect_comment(
|
|
|
|
async def __collect_comment(
|
|
|
@ -296,10 +296,10 @@ class RedditScreenshot(Browser, Wait):
|
|
|
|
await comment_page.goto(f'https://reddit.com{comment_obj["comment_url"]}')
|
|
|
|
await comment_page.goto(f'https://reddit.com{comment_obj["comment_url"]}')
|
|
|
|
|
|
|
|
|
|
|
|
# Translates submission' comment
|
|
|
|
# Translates submission' comment
|
|
|
|
if settings.config['reddit']['thread']['post_lang']:
|
|
|
|
if settings.config["reddit"]["thread"]["post_lang"]:
|
|
|
|
comment_tl = ts.google(
|
|
|
|
comment_tl = ts.google(
|
|
|
|
comment_obj['comment_body'],
|
|
|
|
comment_obj["comment_body"],
|
|
|
|
to_language=settings.config['reddit']['thread']['post_lang'],
|
|
|
|
to_language=settings.config["reddit"]["thread"]["post_lang"],
|
|
|
|
)
|
|
|
|
)
|
|
|
|
await comment_page.evaluate(
|
|
|
|
await comment_page.evaluate(
|
|
|
|
f'([tl_content, tl_id]) => document.querySelector(`#t1_{comment_obj["comment_id"]} > div:nth-child(2) '
|
|
|
|
f'([tl_content, tl_id]) => document.querySelector(`#t1_{comment_obj["comment_id"]} > div:nth-child(2) '
|
|
|
@ -309,7 +309,7 @@ class RedditScreenshot(Browser, Wait):
|
|
|
|
await self.screenshot(
|
|
|
|
await self.screenshot(
|
|
|
|
comment_page,
|
|
|
|
comment_page,
|
|
|
|
f'//*[contains(@id, \'t1_{comment_obj["comment_id"]}\')]',
|
|
|
|
f'//*[contains(@id, \'t1_{comment_obj["comment_id"]}\')]',
|
|
|
|
{'path': f'assets/temp/png/comment_{filename_idx}.png'},
|
|
|
|
{"path": f"assets/temp/png/comment_{filename_idx}.png"},
|
|
|
|
)
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
async def download(
|
|
|
|
async def download(
|
|
|
@ -318,31 +318,31 @@ class RedditScreenshot(Browser, Wait):
|
|
|
|
"""
|
|
|
|
"""
|
|
|
|
Downloads screenshots of reddit posts as seen on the web. Downloads to assets/temp/png
|
|
|
|
Downloads screenshots of reddit posts as seen on the web. Downloads to assets/temp/png
|
|
|
|
"""
|
|
|
|
"""
|
|
|
|
print_step('Downloading screenshots of reddit posts...')
|
|
|
|
print_step("Downloading screenshots of reddit posts...")
|
|
|
|
|
|
|
|
|
|
|
|
print_substep('Launching Headless Browser...')
|
|
|
|
print_substep("Launching Headless Browser...")
|
|
|
|
await self.get_browser()
|
|
|
|
await self.get_browser()
|
|
|
|
|
|
|
|
|
|
|
|
# ! Make sure the reddit screenshots folder exists
|
|
|
|
# ! Make sure the reddit screenshots folder exists
|
|
|
|
Path('assets/temp/png').mkdir(parents=True, exist_ok=True)
|
|
|
|
Path("assets/temp/png").mkdir(parents=True, exist_ok=True)
|
|
|
|
|
|
|
|
|
|
|
|
# Get the thread screenshot
|
|
|
|
# Get the thread screenshot
|
|
|
|
reddit_main = await self.browser.newPage()
|
|
|
|
reddit_main = await self.browser.newPage()
|
|
|
|
await reddit_main.goto(self.reddit_object['thread_url'])
|
|
|
|
await reddit_main.goto(self.reddit_object["thread_url"])
|
|
|
|
|
|
|
|
|
|
|
|
if settings.config['settings']['theme'] == 'dark':
|
|
|
|
if settings.config["settings"]["theme"] == "dark":
|
|
|
|
await self.__dark_theme(reddit_main)
|
|
|
|
await self.__dark_theme(reddit_main)
|
|
|
|
|
|
|
|
|
|
|
|
if self.reddit_object['is_nsfw']:
|
|
|
|
if self.reddit_object["is_nsfw"]:
|
|
|
|
# This means the post is NSFW and requires to click the proceed button.
|
|
|
|
# This means the post is NSFW and requires to click the proceed button.
|
|
|
|
await self.__close_nsfw(reddit_main)
|
|
|
|
await self.__close_nsfw(reddit_main)
|
|
|
|
|
|
|
|
|
|
|
|
# Translates submission title
|
|
|
|
# Translates submission title
|
|
|
|
if settings.config['reddit']['thread']['post_lang']:
|
|
|
|
if settings.config["reddit"]["thread"]["post_lang"]:
|
|
|
|
print_substep('Translating post...')
|
|
|
|
print_substep("Translating post...")
|
|
|
|
texts_in_tl = ts.google(
|
|
|
|
texts_in_tl = ts.google(
|
|
|
|
self.reddit_object['thread_title'],
|
|
|
|
self.reddit_object["thread_title"],
|
|
|
|
to_language=settings.config['reddit']['thread']['post_lang'],
|
|
|
|
to_language=settings.config["reddit"]["thread"]["post_lang"],
|
|
|
|
)
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
await reddit_main.evaluate(
|
|
|
|
await reddit_main.evaluate(
|
|
|
@ -351,10 +351,10 @@ class RedditScreenshot(Browser, Wait):
|
|
|
|
texts_in_tl,
|
|
|
|
texts_in_tl,
|
|
|
|
)
|
|
|
|
)
|
|
|
|
else:
|
|
|
|
else:
|
|
|
|
print_substep('Skipping translation...')
|
|
|
|
print_substep("Skipping translation...")
|
|
|
|
|
|
|
|
|
|
|
|
async_tasks_primary = [
|
|
|
|
async_tasks_primary = [
|
|
|
|
self.__collect_comment(self.reddit_object['comments'][idx], idx) for idx in
|
|
|
|
self.__collect_comment(self.reddit_object["comments"][idx], idx) for idx in
|
|
|
|
self.screenshot_idx
|
|
|
|
self.screenshot_idx
|
|
|
|
]
|
|
|
|
]
|
|
|
|
|
|
|
|
|
|
|
@ -362,7 +362,7 @@ class RedditScreenshot(Browser, Wait):
|
|
|
|
self.screenshot(
|
|
|
|
self.screenshot(
|
|
|
|
reddit_main,
|
|
|
|
reddit_main,
|
|
|
|
f'//*[contains(@id, \'t3_{self.reddit_object["thread_id"]}\')]',
|
|
|
|
f'//*[contains(@id, \'t3_{self.reddit_object["thread_id"]}\')]',
|
|
|
|
{'path': f'assets/temp/png/title.png'},
|
|
|
|
{"path": "assets/temp/png/title.png"},
|
|
|
|
)
|
|
|
|
)
|
|
|
|
)
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
@ -380,9 +380,10 @@ class RedditScreenshot(Browser, Wait):
|
|
|
|
chunk_list = async_tasks_primary.__len__() // 10 + (1 if async_tasks_primary.__len__() % 10 != 0 else 0)
|
|
|
|
chunk_list = async_tasks_primary.__len__() // 10 + (1 if async_tasks_primary.__len__() % 10 != 0 else 0)
|
|
|
|
for task in track(
|
|
|
|
for task in track(
|
|
|
|
as_completed(chunked_tasks),
|
|
|
|
as_completed(chunked_tasks),
|
|
|
|
description=f'Downloading comments: Chunk {idx}/{chunk_list}',
|
|
|
|
description=f"Downloading comments: Chunk {idx}/{chunk_list}",
|
|
|
|
|
|
|
|
total=chunked_tasks.__len__(),
|
|
|
|
):
|
|
|
|
):
|
|
|
|
await task
|
|
|
|
await task
|
|
|
|
|
|
|
|
|
|
|
|
print_substep('Comments downloaded Successfully.', style='bold green')
|
|
|
|
print_substep("Comments downloaded Successfully.", style="bold green")
|
|
|
|
await self.close_browser()
|
|
|
|
await self.close_browser()
|
|
|
|