added async playwright

3 years ago · 66494022db
parent 27577d0da6
commit 66494022db
3 changed files with 282 additions and 131 deletions
--- a/webdriver/common.py
+++ b/webdriver/common.py
@ -65,3 +65,11 @@ class ExceptionDecorator:
        if func:
            exceptor = exceptor(func)
        return exceptor
 # Lots of tabs - lots of memory
 # chunk needed to minimize memory required
 def chunks(lst, n):
    """Yield successive n-sized chunks from list."""
    for i in range(0, len(lst), n):
        yield lst[i:i + n]
--- a/webdriver/playwright.py
+++ b/webdriver/playwright.py
@ -1,15 +1,21 @@
-from playwright.async_api import async_playwright, ViewportSize
+from asyncio import as_completed
-from playwright.async_api import Browser, Playwright
+
-from rich.progress import track
+from playwright.async_api import async_playwright, TimeoutError
 from playwright.async_api import Browser, Playwright, Page, BrowserContext, Locator
 from pathlib import Path
 import translators as ts
 from utils import settings
 from utils.console import print_step, print_substep
 import translators as ts
 from rich.progress import track
 from attr import attrs, attrib
-from attr.validators import instance_of, optional
+from attr.validators import instance_of
 from typing import Dict, Optional
-from typing import Dict, Optional, Union
+from webdriver.common import ExceptionDecorator, chunks
 catch_exception = ExceptionDecorator(default_exception=TimeoutError).catch_exception
@attrs
@ -23,15 +29,14 @@ class Browser:
        validator=instance_of(dict),
        default={
            # 9x21 to see long posts
            "defaultViewport": {
            "width": 500,
            "height": 1200,
        },
        },
        kw_only=True,
    )
    playwright: Playwright
    browser: Browser
    context: BrowserContext
    async def get_browser(
            self,
@ -41,30 +46,98 @@ class Browser:
        """
        self.playwright = await async_playwright().start()
        self.browser = await self.playwright.chromium.launch()
        self.context = await self.browser.new_context(viewport=self.default_Viewport)
    async def close_browser(
            self,
    ) -> None:
        """
-        Closes Pyppeteer browser
+        Closes Playwright stuff
        """
        await self.context.close()
        await self.browser.close()
        await self.playwright.stop()
 class Flaky:
    """
    All methods decorated with function catching default exceptions and writing logs
    """
    @staticmethod
    @catch_exception
    def find_element(
            query: str,
            page_instance: Page,
            options: Optional[dict] = None,
    ) -> Locator:
        return page_instance.locator(query, **options) if options else page_instance.locator(query)
    @catch_exception
    async def click(
            self,
            page_instance: Optional[Page] = None,
            query: Optional[str] = None,
            options: Optional[dict] = None,
            *,
            find_options: Optional[dict] = None,
            element: Optional[Locator] = None,
    ) -> None:
        if element:
            await element.click(**options) if options else element.click()
        else:
            results = (
                self.find_element(query, page_instance, **find_options)
                if find_options
                else self.find_element(query, page_instance)
            )
            await results.click(**options) if options else await results.click()
    @catch_exception
    async def screenshot(
            self,
            page_instance: Optional[Page] = None,
            query: Optional[str] = None,
            options: Optional[dict] = None,
            *,
            find_options: Optional[dict] = None,
            element: Optional[Locator] = None,
    ) -> None:
        if element:
            await element.screenshot(**options) if options else await element.screenshot()
        else:
            results = (
                self.find_element(query, page_instance, **find_options)
                if find_options
                else self.find_element(query, page_instance)
            )
            await results.screenshot(**options) if options else await results.screenshot()
@attrs(auto_attribs=True)
-class RedditScreenshot(Browser):
+class RedditScreenshot(Flaky, Browser):
    """
    Args:
        reddit_object (Dict): Reddit object received from reddit/subreddit.py
        screenshot_idx (int): List with indexes of voiced comments
        story_mode (bool): If submission is a story takes screenshot of the story
    """
    reddit_object: dict
    screenshot_idx: list
    story_mode: Optional[bool] = attrib(
        validator=instance_of(bool),
        default=False,
        kw_only=True
    )
    def __attrs_post_init__(
            self
    ):
        self.post_lang: Optional[bool] = settings.config["reddit"]["thread"]["post_lang"]
    async def __dark_theme(
            self,
-            page_instance: PageCls,
+            page_instance: Page,
    ) -> None:
        """
        Enables dark theme in Reddit
@ -75,128 +148,190 @@ class RedditScreenshot(Browser):
        await self.click(
            page_instance,
-            "//*[contains(@class, 'header-user-dropdown')]",
+            "header-user-dropdown",
            {"timeout": 5000},
        )
        # It's normal not to find it, sometimes there is none :shrug:
        await self.click(
            page_instance,
-            "//*[contains(text(), 'Settings')]/ancestor::button[1]",
+            ":nth-match(button) >> 'Settings'",
            {"timeout": 5000},
        )
        await self.click(
            page_instance,
-            "//*[contains(text(), 'Dark Mode')]/ancestor::button[1]",
+            ":nth-match(button) >> 'Dark Mode'",
            {"timeout": 5000},
        )
        # Closes settings
        await self.click(
            page_instance,
-            "//*[contains(@class, 'header-user-dropdown')]",
+            "header-user-dropdown"
            {"timeout": 5000},
        )
    async def __close_nsfw(
            self,
            page_instance: Page,
    ) -> None:
        """
        Closes NSFW stuff
        Args:
            page_instance:  Instance of main page
        """
-storymode = False
+        print_substep("Post is NSFW. You are spicy...")
        # Triggers indirectly reload
        await self.click(
            page_instance,
            'button:has-text("Yes")',
            {"timeout": 5000},
        )
-def download_screenshots_of_reddit_posts(reddit_object: dict, screenshot_num: int):
+        # Await indirect reload
-    """Downloads screenshots of reddit posts as seen on the web. Downloads to assets/temp/png
+        await page_instance.wait_for_load_state()
        await self.click(
            page_instance,
            'button:has-text("Click to see nsfw")',
            {"timeout": 5000},
        )
    async def __collect_comment(
            self,
            comment_obj: dict,
            filename_idx: int,
    ) -> None:
        """
        Makes a screenshot of the comment
        Args:
-        reddit_object (Dict): Reddit object received from reddit/subreddit.py
+            comment_obj: prew comment object
-        screenshot_num (int): Number of screenshots to download
+            filename_idx: index for the filename
        """
-    print_step("Downloading screenshots of reddit posts...")
+        comment_page = await self.context.new_page()
        await comment_page.goto(f'https://reddit.com{comment_obj["comment_url"]}')
-    # ! Make sure the reddit screenshots folder exists
+        # Translates submission' comment
-    Path("assets/temp/png").mkdir(parents=True, exist_ok=True)
+        if self.post_lang:
            comment_tl = ts.google(
                comment_obj["comment_body"],
                to_language=self.post_lang,
            )
            await comment_page.evaluate(
                f"document.querySelector('#t1_{comment_obj['comment_id']} > div:nth-child(2) "
                f'> div > div[data-testid="comment"] > div\').textContent = {comment_tl}',
            )
        await self.screenshot(
            comment_page,
            f"id=t1_{comment_obj['comment_id']}",
            {"path": f"assets/temp/png/comment_{filename_idx}.png"},
        )
    # WIP  TODO test it
    async def __collect_story(
            self,
            main_page: Page,
    ):
        # Translates submission text
        if self.post_lang:
            story_tl = ts.google(
                self.reddit_object["thread_post"],
                to_language=self.post_lang,
            )
            split_story_tl = story_tl.split('\n')
            await main_page.evaluate(
                # Find all elements
                'var elements = document.querySelectorAll(`[data-test-id="post-content"]'
                ' > [data-click-id="text"] > div > p`);'
                # Set array with translated text
                f"var texts = {split_story_tl};"
                # Map 2 arrays together
                "var text_map = texts.map(function(e, i) { return [e, elements[i]]; });"
                # Change text on the page
                "for (i = 0; i < text_map.length; ++i) { text_map[i][1].textContent = text_map[i][0] ; };"
            )
        await self.screenshot(
            main_page,
            '[data-click-id="text"]',
            {"path": "assets/temp/png/story_content.png"},
        )
    async def download(
            self,
    ):
        """
        Downloads screenshots of reddit posts as seen on the web. Downloads to assets/temp/png
        """
        print_step("Downloading screenshots of reddit posts...")
    with sync_playwright() as p:
        print_substep("Launching Headless Browser...")
        await self.get_browser()
-        browser = p.chromium.launch()
+        # ! Make sure the reddit screenshots folder exists
-        context = browser.new_context()
+        Path("assets/temp/png").mkdir(parents=True, exist_ok=True)
        if settings.config["settings"]["theme"] == "dark":
            cookie_file = open("./video_creation/data/cookie-dark-mode.json", encoding="utf-8")
        else:
            cookie_file = open("./video_creation/data/cookie-light-mode.json", encoding="utf-8")
        cookies = json.load(cookie_file)
        context.add_cookies(cookies)  # load preference cookies
        # Get the thread screenshot
-        page = context.new_page()
+        reddit_main = await self.browser.new_page()
-        page.goto(reddit_object["thread_url"], timeout=0)
+        # noinspection Duplicates
-        page.set_viewport_size(ViewportSize(width=1920, height=1080))
+        await reddit_main.goto(self.reddit_object["thread_url"])
        if page.locator('[data-testid="content-gate"]').is_visible():
            # This means the post is NSFW and requires to click the proceed button.
            print_substep("Post is NSFW. You are spicy...")
            page.locator('[data-testid="content-gate"] button').click()
            page.wait_for_load_state() # Wait for page to fully load
-            if page.locator('[data-click-id="text"] button').is_visible():
+        if settings.config["settings"]["theme"] == "dark":
-                page.locator(
+            await self.__dark_theme(reddit_main)
                    '[data-click-id="text"] button'
                ).click()  # Remove "Click to see nsfw" Button in Screenshot
-        # translate code
+        if self.reddit_object["is_nsfw"]:
            # This means the post is NSFW and requires to click the proceed button.
            await self.__close_nsfw(reddit_main)
        # Translates submission title
        if settings.config["reddit"]["thread"]["post_lang"]:
            print_substep("Translating post...")
            texts_in_tl = ts.google(
-                reddit_object["thread_title"],
+                self.reddit_object["thread_title"],
                to_language=settings.config["reddit"]["thread"]["post_lang"],
            )
-            page.evaluate(
+            await reddit_main.evaluate(
-                "tl_content => document.querySelector('[data-test-id=\"post-content\"] > div:nth-child(3) > div > div').textContent = tl_content",
+                "document.querySelector('[data-test-id=\"post-content\"] > div:nth-child(3) > div > "
-                texts_in_tl,
+                f"div').textContent = {texts_in_tl}",
            )
        else:
            print_substep("Skipping translation...")
-        page.locator('[data-test-id="post-content"]').screenshot(path="assets/temp/png/title.png")
+        # No sense to move it in common.py
-
+        # noinspection Duplicates
-        if storymode:
+        async_tasks_primary = (
-            page.locator('[data-click-id="text"]').screenshot(
+            [
-                path="assets/temp/png/story_content.png"
+                self.__collect_comment(self.reddit_object["comments"][idx], idx) for idx in
                self.screenshot_idx
            ]
            if not self.story_mode
            else [
                self.__collect_story(reddit_main)
            ]
        )
        else:
            for idx, comment in enumerate(
                track(reddit_object["comments"], "Downloading screenshots...")
            ):
                # Stop if we have reached the screenshot_num
                if idx >= screenshot_num:
                    break
-                if page.locator('[data-testid="content-gate"]').is_visible():
+        async_tasks_primary.append(
-                    page.locator('[data-testid="content-gate"] button').click()
+            self.screenshot(
-
+                reddit_main,
-                page.goto(f'https://reddit.com{comment["comment_url"]}', timeout=0)
+                f"id=t3_{self.reddit_object['thread_id']}",
-
+                {"path": "assets/temp/png/title.png"},
                # translate code
                if settings.config["reddit"]["thread"]["post_lang"]:
                    comment_tl = ts.google(
                        comment["comment_body"],
                        to_language=settings.config["reddit"]["thread"]["post_lang"],
            )
                    page.evaluate(
                        '([tl_content, tl_id]) => document.querySelector(`#t1_${tl_id} > div:nth-child(2) > div > div[data-testid="comment"] > div`).textContent = tl_content',
                        [comment_tl, comment["comment_id"]],
        )
-                try:
+
-                    page.locator(f"#t1_{comment['comment_id']}").screenshot(
+        for idx, chunked_tasks in enumerate(
-                        path=f"assets/temp/png/comment_{idx}.png"
+                [chunk for chunk in chunks(async_tasks_primary, 10)],
-                    )
+                start=1,
-                except TimeoutError:
+        ):
-                    del reddit_object["comments"]
+            chunk_list = async_tasks_primary.__len__() // 10 + (1 if async_tasks_primary.__len__() % 10 != 0 else 0)
-                    screenshot_num += 1
+            for task in track(
-                    print("TimeoutError: Skipping screenshot...")
+                    as_completed(chunked_tasks),
-                    continue
+                    description=f"Downloading comments: Chunk {idx}/{chunk_list}",
-        print_substep("Screenshots downloaded Successfully.", style="bold green")
+                    total=chunked_tasks.__len__(),
            ):
                await task
        print_substep("Comments downloaded Successfully.", style="bold green")
        await self.close_browser()
--- a/webdriver/pyppeteer.py
+++ b/webdriver/pyppeteer.py
@ -7,18 +7,16 @@ from pyppeteer.element_handle import ElementHandle as ElementHandleCls
 from pyppeteer.errors import TimeoutError as BrowserTimeoutError
 from pathlib import Path
 from typing import Dict
 from utils import settings
-
+from utils.console import print_step, print_substep
 from rich.progress import track
 import translators as ts
 from utils.console import print_step, print_substep
 from attr import attrs, attrib
-from attr.validators import instance_of, optional
+from attr.validators import instance_of
 from typing import Optional
-from webdriver.common import ExceptionDecorator
+from webdriver.common import ExceptionDecorator, chunks
 catch_exception = ExceptionDecorator(default_exception=BrowserTimeoutError).catch_exception
@ -100,8 +98,9 @@ class Wait:
            self,
            page_instance: Optional[PageCls] = None,
            xpath: Optional[str] = None,
            find_options: Optional[dict] = None,
            options: Optional[dict] = None,
            *,
            find_options: Optional[dict] = None,
            el: Optional[ElementHandleCls] = None,
    ) -> None:
        """
@ -127,6 +126,7 @@ class Wait:
            page_instance: Optional[PageCls] = None,
            xpath: Optional[str] = None,
            options: Optional[dict] = None,
            *,
            find_options: Optional[dict] = None,
            el: Optional[ElementHandleCls] = None,
    ) -> None:
@ -154,14 +154,21 @@ class RedditScreenshot(Browser, Wait):
    Args:
        reddit_object (Dict): Reddit object received from reddit/subreddit.py
        screenshot_idx (int): List with indexes of voiced comments
        story_mode (bool): If submission is a story takes screenshot of the story
    """
    reddit_object: dict
    screenshot_idx: list
    story_mode: Optional[bool] = attrib(
        validator=instance_of(bool),
        default=False,
        kw_only=True
    )
    def __attrs_post_init__(
            self,
    ):
        self.post_lang: Optional[bool] = settings.config["reddit"]["thread"]["post_lang"]
    async def __dark_theme(
            self,
            page_instance: PageCls,
@ -176,33 +183,40 @@ class RedditScreenshot(Browser, Wait):
        await self.click(
            page_instance,
            "//*[contains(@class, 'header-user-dropdown')]",
-            {"timeout": 5000},
+            find_options={"timeout": 5000},
        )
        # It's normal not to find it, sometimes there is none :shrug:
        await self.click(
            page_instance,
            "//*[contains(text(), 'Settings')]/ancestor::button[1]",
-            {"timeout": 5000},
+            find_options={"timeout": 5000},
        )
        await self.click(
            page_instance,
            "//*[contains(text(), 'Dark Mode')]/ancestor::button[1]",
-            {"timeout": 5000},
+            find_options={"timeout": 5000},
        )
        # Closes settings
        await self.click(
            page_instance,
            "//*[contains(@class, 'header-user-dropdown')]",
-            {"timeout": 5000},
+            find_options={"timeout": 5000},
        )
    async def __close_nsfw(
            self,
-            page_instance: PageCls
+            page_instance: PageCls,
    ) -> None:
        """
        Closes NSFW stuff
        Args:
            page_instance:  Instance of main page
        """
        from asyncio import ensure_future
        print_substep("Post is NSFW. You are spicy...")
@ -213,17 +227,17 @@ class RedditScreenshot(Browser, Wait):
        await self.click(
            page_instance,
            '//button[text()="Yes"]',
-            {"timeout": 5000},
+            find_options={"timeout": 5000},
        )
        # Await reload
        await navigation
-        await (await self.find_xpath(
+        await self.click(
            page_instance,
            '//button[text()="Click to see nsfw"]',
-            {"timeout": 5000},
+            find_options={"timeout": 5000},
-        )).click()
+        )
    async def __collect_comment(
            self,
@ -241,19 +255,19 @@ class RedditScreenshot(Browser, Wait):
        await comment_page.goto(f'https://reddit.com{comment_obj["comment_url"]}')
        # Translates submission' comment
-        if settings.config["reddit"]["thread"]["post_lang"]:
+        if self.post_lang:
            comment_tl = ts.google(
                comment_obj["comment_body"],
-                to_language=settings.config["reddit"]["thread"]["post_lang"],
+                to_language=self.post_lang,
            )
            await comment_page.evaluate(
-                f'([tl_content, tl_id]) => document.querySelector(`#t1_{comment_obj["comment_id"]} > div:nth-child(2) '
+                f"([tl_content, tl_id]) => document.querySelector('#t1_{comment_obj['comment_id']} > div:nth-child(2) "
-                f'> div > div[data-testid="comment"] > div`).textContent = {comment_tl}',
+                f'> div > div[data-testid="comment"] > div\').textContent = {comment_tl}',
            )
        await self.screenshot(
            comment_page,
-            f'//*[contains(@id, \'t1_{comment_obj["comment_id"]}\')]',
+            f"//*[contains(@id, 't1_{comment_obj['comment_id']}')]",
            {"path": f"assets/temp/png/comment_{filename_idx}.png"},
        )
@ -261,13 +275,12 @@ class RedditScreenshot(Browser, Wait):
    async def __collect_story(
            self,
            main_page: PageCls,
    ):
        # Translates submission text
-        if settings.config["reddit"]["thread"]["post_lang"]:
+        if self.post_lang:
            story_tl = ts.google(
                self.reddit_object["thread_post"],
-                to_language=settings.config["reddit"]["thread"]["post_lang"],
+                to_language=self.post_lang,
            )
            split_story_tl = story_tl.split('\n')
            await main_page.evaluate(
@ -304,6 +317,7 @@ class RedditScreenshot(Browser, Wait):
        # Get the thread screenshot
        reddit_main = await self.browser.newPage()
        # noinspection Duplicates
        await reddit_main.goto(self.reddit_object["thread_url"])
        if settings.config["settings"]["theme"] == "dark":
@ -322,13 +336,14 @@ class RedditScreenshot(Browser, Wait):
            )
            await reddit_main.evaluate(
-                "tl_content => document.querySelector('[data-test-id=\"post-content\"] > div:nth-child(3) > div > "
+                "document.querySelector('[data-test-id=\"post-content\"] > div:nth-child(3) > div > "
-                "div').textContent = tl_content",
+                f"div').textContent = {texts_in_tl}",
                texts_in_tl,
            )
        else:
            print_substep("Skipping translation...")
        # No sense to move it in common.py
        # noinspection Duplicates
        async_tasks_primary = (
            [
                self.__collect_comment(self.reddit_object["comments"][idx], idx) for idx in
@ -348,13 +363,6 @@ class RedditScreenshot(Browser, Wait):
            )
        )
        # Lots of tabs - lots of memory
        # chunk needed to minimize memory required
        def chunks(lst, n):
            """Yield successive n-sized chunks from list."""
            for i in range(0, len(lst), n):
                yield lst[i:i + n]
        for idx, chunked_tasks in enumerate(
                [chunk for chunk in chunks(async_tasks_primary, 10)],
                start=1,