from asyncio import as_completed from pyppeteer import launch from pyppeteer.page import Page as PageCls from pyppeteer.browser import Browser as BrowserCls from pyppeteer.element_handle import ElementHandle as ElementHandleCls from pyppeteer.errors import TimeoutError as BrowserTimeoutError from pathlib import Path from utils import settings from utils.console import print_step, print_substep from rich.progress import track import translators as ts from attr import attrs, attrib from attr.validators import instance_of from typing import Optional import webdriver.common as common common.default_exception = BrowserTimeoutError @attrs class Browser: """ Args: default_Viewport (dict):Pyppeteer Browser default_Viewport options browser (BrowserCls): Pyppeteer Browser instance """ default_Viewport: dict = attrib( validator=instance_of(dict), default={ # 9x21 to see long posts "defaultViewport": { "width": 500, "height": 1200, }, }, kw_only=True, ) browser: BrowserCls async def get_browser( self, ) -> None: """ Creates Pyppeteer browser """ self.browser = await launch(self.default_Viewport) async def close_browser( self, ) -> None: """ Closes Pyppeteer browser """ await self.browser.close() class Wait: @staticmethod @common.catch_exception async def find_xpath( page_instance: PageCls, xpath: Optional[str] = None, options: Optional[dict] = None, ) -> 'ElementHandleCls': """ Explicitly finds element on the page Args: page_instance: Pyppeteer page instance xpath: xpath query options: Pyppeteer waitForXPath parameters Available options are: * ``visible`` (bool): wait for element to be present in DOM and to be visible, i.e. to not have ``display: none`` or ``visibility: hidden`` CSS properties. Defaults to ``False``. * ``hidden`` (bool): wait for element to not be found in the DOM or to be hidden, i.e. have ``display: none`` or ``visibility: hidden`` CSS properties. Defaults to ``False``. * ``timeout`` (int|float): maximum time to wait for in milliseconds. Defaults to 30000 (30 seconds). Pass ``0`` to disable timeout. Returns: Pyppeteer element instance """ if options: el = await page_instance.waitForXPath(xpath, options=options) else: el = await page_instance.waitForXPath(xpath) return el @common.catch_exception async def click( self, page_instance: Optional[PageCls] = None, xpath: Optional[str] = None, options: Optional[dict] = None, *, find_options: Optional[dict] = None, el: Optional[ElementHandleCls] = None, ) -> None: """ Clicks on the element Args: page_instance: Pyppeteer page instance xpath: xpath query find_options: Pyppeteer waitForXPath parameters options: Pyppeteer click parameters el: Pyppeteer element instance """ if not el: el = await self.find_xpath(page_instance, xpath, find_options) if options: await el.click(options) else: await el.click() @common.catch_exception async def screenshot( self, page_instance: Optional[PageCls] = None, xpath: Optional[str] = None, options: Optional[dict] = None, *, find_options: Optional[dict] = None, el: Optional[ElementHandleCls] = None, ) -> None: """ Makes a screenshot of the element Args: page_instance: Pyppeteer page instance xpath: xpath query options: Pyppeteer screenshot parameters find_options: Pyppeteer waitForXPath parameters el: Pyppeteer element instance """ if not el: el = await self.find_xpath(page_instance, xpath, find_options) if options: await el.screenshot(options) else: await el.screenshot() @attrs(auto_attribs=True) class RedditScreenshot(Browser, Wait): """ Args: reddit_object (Dict): Reddit object received from reddit/subreddit.py screenshot_idx (int): List with indexes of voiced comments story_mode (bool): If submission is a story takes screenshot of the story """ reddit_object: dict screenshot_idx: list story_mode: Optional[bool] = attrib( validator=instance_of(bool), default=False, kw_only=True ) def __attrs_post_init__( self, ): self.post_lang: Optional[bool] = settings.config["reddit"]["thread"]["post_lang"] async def __dark_theme( self, page_instance: PageCls, ) -> None: """ Enables dark theme in Reddit Args: page_instance: Pyppeteer page instance with reddit page opened """ await self.click( page_instance, "//div[@class='header-user-dropdown']", find_options={"timeout": 5000}, ) # It's normal not to find it, sometimes there is none :shrug: await self.click( page_instance, "//span[text()='Settings']/ancestor::button[1]", find_options={"timeout": 5000}, ) await self.click( page_instance, "//span[text()='Dark Mode']/ancestor::button[1]", find_options={"timeout": 5000}, ) # Closes settings await self.click( page_instance, "//div[@class='header-user-dropdown']", find_options={"timeout": 5000}, ) async def __close_nsfw( self, page_instance: PageCls, ) -> None: """ Closes NSFW stuff Args: page_instance: Instance of main page """ from asyncio import ensure_future print_substep("Post is NSFW. You are spicy...") # To await indirectly reload navigation = ensure_future(page_instance.waitForNavigation()) # Triggers indirectly reload await self.click( page_instance, '//button[text()="Yes"]', find_options={"timeout": 5000}, ) # Await reload await navigation await self.click( page_instance, '//button[text()="Click to see nsfw"]', find_options={"timeout": 5000}, ) async def __collect_comment( self, comment_obj: dict, filename_idx: int, ) -> None: """ Makes a screenshot of the comment Args: comment_obj: prew comment object filename_idx: index for the filename """ comment_page = await self.browser.newPage() await comment_page.goto(f'https://reddit.com{comment_obj["comment_url"]}') # Translates submission' comment if self.post_lang: comment_tl = ts.google( comment_obj["comment_body"], to_language=self.post_lang, ) await comment_page.evaluate( '([comment_id, comment_tl]) => document.querySelector(`#t1_${comment_id} > div:nth-child(2) > div > div[data-testid="comment"] > div`).textContent = comment_tl', # noqa [comment_obj["comment_id"], comment_tl], ) await self.screenshot( comment_page, f"//div[@id='t1_{comment_obj['comment_id']}']", {"path": f"assets/temp/png/comment_{filename_idx}.png"}, ) # WIP TODO test it async def __collect_story( self, main_page: PageCls, ): # Translates submission text if self.post_lang: story_tl = ts.google( self.reddit_object["thread_post"], to_language=self.post_lang, ) split_story_tl = story_tl.split('\n') await main_page.evaluate( "(split_story_tl) => split_story_tl.map(function(element, i) { return [element, document.querySelectorAll('[data-test-id=\"post-content\"] > [data-click-id=\"text\"] > div > p')[i]]; }).forEach(mappedElement => mappedElement[1].textContent = mappedElement[0])", # noqa split_story_tl, ) await self.screenshot( main_page, "//div[@data-test-id='post-content']//div[@data-click-id='text']", {"path": "assets/temp/png/story_content.png"}, ) async def download( self, ): """ Downloads screenshots of reddit posts as seen on the web. Downloads to assets/temp/png """ print_step("Downloading screenshots of reddit posts...") print_substep("Launching Headless Browser...") await self.get_browser() # ! Make sure the reddit screenshots folder exists Path("assets/temp/png").mkdir(parents=True, exist_ok=True) # Get the thread screenshot reddit_main = await self.browser.newPage() await reddit_main.goto(self.reddit_object["thread_url"]) # noqa if settings.config["settings"]["theme"] == "dark": await self.__dark_theme(reddit_main) if self.reddit_object["is_nsfw"]: # This means the post is NSFW and requires to click the proceed button. await self.__close_nsfw(reddit_main) # Translates submission title if self.post_lang: print_substep("Translating post...") texts_in_tl = ts.google( self.reddit_object["thread_title"], to_language=self.post_lang, ) await reddit_main.evaluate( f"(texts_in_tl) => document.querySelector('[data-test-id=\"post-content\"] > div:nth-child(3) > div > div').textContent = texts_in_tl", # noqa texts_in_tl, ) else: print_substep("Skipping translation...") # No sense to move it to common.py async_tasks_primary = ( # noqa [ self.__collect_comment(self.reddit_object["comments"][idx], idx) for idx in self.screenshot_idx ] if not self.story_mode else [ self.__collect_story(reddit_main) ] ) async_tasks_primary.append( self.screenshot( reddit_main, f"//div[@data-testid='post-container']", {"path": "assets/temp/png/title.png"}, ) ) for idx, chunked_tasks in enumerate( [chunk for chunk in common.chunks(async_tasks_primary, 10)], start=1, ): chunk_list = async_tasks_primary.__len__() // 10 + (1 if async_tasks_primary.__len__() % 10 != 0 else 0) for task in track( as_completed(chunked_tasks), description=f"Downloading comments: Chunk {idx}/{chunk_list}", total=chunked_tasks.__len__(), ): await task print_substep("Comments downloaded Successfully.", style="bold green") await self.close_browser()