async playwright in factory WIP, added collect_story in pyppetter

3 years ago · 27577d0da6
parent b1663f3381
commit 27577d0da6
9 changed files with 369 additions and 73 deletions
--- a/README.md
+++ b/README.md
@ -33,12 +33,15 @@ The only original thing being done is the editing and gathering of all materials
 ## Requirements
 - Python 3.9+
 - Playwright (this should install automatically in installation)
 ## Installation 👩‍💻
 1. Clone this repository
 2. Run `pip install -r requirements.txt`
 3. Run `python -m playwright install` and `python -m playwright install-deps`
 **EXPERIMENTAL!!!!**
 On MacOS and Linux (debian, arch, fedora and centos, and based on those), you can run an install script that will automatically install steps 1 to 3. (requires bash)
--- a/install.sh
+++ b/install.sh
@ -12,7 +12,7 @@ function Help(){
    echo "Options:" 
    echo "  -h: Show this help message and exit" 
    echo "  -d: Install only dependencies" 
-    echo "  -p: Install only python dependencies"
+    echo "  -p: Install only python dependencies (including playwright)"
    echo "  -b: Install just the bot"
    echo "  -l: Install the bot and the python dependencies"
 } 
@ -107,6 +107,23 @@ function install_python_dep(){
    cd ..
 } 
 # install playwright function
 function install_playwright(){
    # tell the user that the script is going to install playwright
    echo "Installing playwright"
    # cd into the directory where the script is downloaded
    cd RedditVideoMakerBot
    # run the install script
    python3 -m playwright install
    python3 -m playwright install-deps
    # give a note
    printf "Note, if these gave any errors, playwright may not be officially supported on your OS, check this issues page for support\nhttps://github.com/microsoft/playwright/issues"
    if [ -x "$(command -v pacman)" ]; then
        printf "It seems you are on and Arch based distro.\nTry installing these from the AUR for playwright to run:\nenchant1.6\nicu66\nlibwebp052\n"
    fi
    cd ..
 }
 # Install depndencies
 function install_deps(){ 
    # if the platform is mac, install macos
@ -131,7 +148,7 @@ function install_deps(){
    # else
    else
        # print an error message and exit
-        printf "Your OS is not supported\n Please install python3, pip3 and git manually\n After that, run the script again with the -pb option to install python and dependencies\n If you want to add support for your OS, please open a pull request on github\n
+        printf "Your OS is not supported\n Please install python3, pip3 and git manually\n After that, run the script again with the -pb option to install python and playwright dependencies\n If you want to add support for your OS, please open a pull request on github\n
 https://github.com/elebumm/RedditVideoMakerBot"
        exit 1
    fi
@ -159,9 +176,10 @@ function install_main(){
        echo "Installing only dependencies" 
        install_deps
    elif [[ PYTHON_ONLY -eq 1 ]]; then
-    # if the -p (only python dependencies) options is selected install just the python dependencies
+    # if the -p (only python dependencies) options is selected install just the python dependencies and playwright
        echo "Installing only python dependencies" 
        install_python_dep 
        install_playwright
    # if the -b (only the bot) options is selected install just the bot
    elif [[ JUST_BOT -eq 1 ]]; then
        echo "Installing only the bot"
@ -177,6 +195,7 @@ function install_main(){
        install_deps 
        get_the_bot 
        install_python_dep
        install_playwright
    fi
    DIR="./RedditVideoMakerBot"
--- a/main.py
+++ b/main.py
@ -14,7 +14,7 @@ from video_creation.background import (
    get_background_config,
 )
 from video_creation.final_video import FinalVideo
-from video_creation.screenshot_downloader import RedditScreenshot
+from webdriver.web_engine import screenshot_factory
 from video_creation.voices import save_text_to_mp3
 __VERSION__ = "2.3.1"
@ -41,7 +41,8 @@ async def main(POST_ID=None):
    cleanup()
    reddit_object = get_subreddit_threads(POST_ID)
    comments_created = save_text_to_mp3(reddit_object)
-    await RedditScreenshot(reddit_object, comments_created).download()
+    webdriver = screenshot_factory(config["settings"]["times_to_run"])  # TODO add in config
    await webdriver(reddit_object, comments_created).download()
    bg_config = get_background_config()
    FinalVideo().make(comments_created, reddit_object, bg_config)
--- a/requirements.txt
+++ b/requirements.txt
@ -2,6 +2,7 @@ boto3==1.24.24
 botocore==1.27.24
 gTTS==2.2.4
 moviepy==1.0.3
 playwright==1.23.0
 praw==7.6.0
 pytube==12.1.0
 requests==2.28.1
--- a/webdriver/init.py
+++ b/webdriver/init.py
--- a/webdriver/common.py
+++ b/webdriver/common.py
@ -0,0 +1,67 @@
 from attr import attrs, attrib
 from typing import TypeVar, Optional, Callable, Union
 _function = TypeVar("_function", bound=Callable[..., object])
 _exceptions = TypeVar("_exceptions", bound=Optional[Union[type, tuple, list]])
@attrs
 class ExceptionDecorator:
    """
    Decorator factory for catching exceptions and writing logs
    """
    exception: Optional[_exceptions] = attrib(default=None)
    _default_exception: Optional[_exceptions] = attrib(
        kw_only=True,
        default=None
    )
    def __attrs_post_init__(self):
        if not self.exception:
            self.exception = self._default_exception
    def __call__(
            self,
            func: _function,
    ):
        async def wrapper(*args, **kwargs):
            try:
                obj_to_return = await func(*args, **kwargs)
                return obj_to_return
            except Exception as caughtException:
                import logging
                logger = logging.getLogger("webdriver_log")
                logger.setLevel(logging.ERROR)
                handler = logging.FileHandler(".webdriver.log", mode="a+", encoding="utf-8")
                logger.addHandler(handler)
                if isinstance(self.exception, type):
                    if not type(caughtException) == self.exception:
                        logger.error(f"unexpected error - {caughtException}")
                else:
                    if not type(caughtException) in self.exception:
                        logger.error(f"unexpected error - {caughtException}")
        return wrapper
    @classmethod
    def catch_exception(
            cls,
            func: Optional[_function],
            exception: Optional[_exceptions] = None,
    ) -> Union[object, _function]:
        """
        Decorator for catching exceptions and writing logs
        Args:
            func: Function to be decorated
            exception: Expected exception(s)
        Returns:
            Decorated function
        """
        exceptor = cls(exception)
        if func:
            exceptor = exceptor(func)
        return exceptor
--- a/webdriver/playwright.py
+++ b/webdriver/playwright.py
@ -0,0 +1,202 @@
 from playwright.async_api import async_playwright, ViewportSize
 from playwright.async_api import Browser, Playwright
 from rich.progress import track
 from pathlib import Path
 import translators as ts
 from utils import settings
 from utils.console import print_step, print_substep
 from attr import attrs, attrib
 from attr.validators import instance_of, optional
 from typing import Dict, Optional, Union
@attrs
 class Browser:
    """
    Args:
        default_Viewport (dict):Pyppeteer Browser default_Viewport options
        browser (BrowserCls): Pyppeteer Browser instance
    """
    default_Viewport: dict = attrib(
        validator=instance_of(dict),
        default={
            # 9x21 to see long posts
            "defaultViewport": {
                "width": 500,
                "height": 1200,
            },
        },
        kw_only=True,
    )
    playwright: Playwright
    browser: Browser
    async def get_browser(
            self,
    ) -> None:
        """
        Creates Playwright instance & browser
        """
        self.playwright = await async_playwright().start()
        self.browser = await self.playwright.chromium.launch()
    async def close_browser(
            self,
    ) -> None:
        """
        Closes Pyppeteer browser
        """
        await self.browser.close()
        await self.playwright.stop()
@attrs(auto_attribs=True)
 class RedditScreenshot(Browser):
    """
        Args:
            reddit_object (Dict): Reddit object received from reddit/subreddit.py
            screenshot_idx (int): List with indexes of voiced comments
        """
    reddit_object: dict
    screenshot_idx: list
    async def __dark_theme(
            self,
            page_instance: PageCls,
    ) -> None:
        """
        Enables dark theme in Reddit
        Args:
            page_instance: Pyppeteer page instance with reddit page opened
        """
        await self.click(
            page_instance,
            "//*[contains(@class, 'header-user-dropdown')]",
            {"timeout": 5000},
        )
        # It's normal not to find it, sometimes there is none :shrug:
        await self.click(
            page_instance,
            "//*[contains(text(), 'Settings')]/ancestor::button[1]",
            {"timeout": 5000},
        )
        await self.click(
            page_instance,
            "//*[contains(text(), 'Dark Mode')]/ancestor::button[1]",
            {"timeout": 5000},
        )
        # Closes settings
        await self.click(
            page_instance,
            "//*[contains(@class, 'header-user-dropdown')]",
            {"timeout": 5000},
        )
 storymode = False
 def download_screenshots_of_reddit_posts(reddit_object: dict, screenshot_num: int):
    """Downloads screenshots of reddit posts as seen on the web. Downloads to assets/temp/png
    Args:
        reddit_object (Dict): Reddit object received from reddit/subreddit.py
        screenshot_num (int): Number of screenshots to download
    """
    print_step("Downloading screenshots of reddit posts...")
    # ! Make sure the reddit screenshots folder exists
    Path("assets/temp/png").mkdir(parents=True, exist_ok=True)
    with sync_playwright() as p:
        print_substep("Launching Headless Browser...")
        browser = p.chromium.launch()
        context = browser.new_context()
        if settings.config["settings"]["theme"] == "dark":
            cookie_file = open("./video_creation/data/cookie-dark-mode.json", encoding="utf-8")
        else:
            cookie_file = open("./video_creation/data/cookie-light-mode.json", encoding="utf-8")
        cookies = json.load(cookie_file)
        context.add_cookies(cookies)  # load preference cookies
        # Get the thread screenshot
        page = context.new_page()
        page.goto(reddit_object["thread_url"], timeout=0)
        page.set_viewport_size(ViewportSize(width=1920, height=1080))
        if page.locator('[data-testid="content-gate"]').is_visible():
            # This means the post is NSFW and requires to click the proceed button.
            print_substep("Post is NSFW. You are spicy...")
            page.locator('[data-testid="content-gate"] button').click()
            page.wait_for_load_state() # Wait for page to fully load
            if page.locator('[data-click-id="text"] button').is_visible():
                page.locator(
                    '[data-click-id="text"] button'
                ).click()  # Remove "Click to see nsfw" Button in Screenshot
        # translate code
        if settings.config["reddit"]["thread"]["post_lang"]:
            print_substep("Translating post...")
            texts_in_tl = ts.google(
                reddit_object["thread_title"],
                to_language=settings.config["reddit"]["thread"]["post_lang"],
            )
            page.evaluate(
                "tl_content => document.querySelector('[data-test-id=\"post-content\"] > div:nth-child(3) > div > div').textContent = tl_content",
                texts_in_tl,
            )
        else:
            print_substep("Skipping translation...")
        page.locator('[data-test-id="post-content"]').screenshot(path="assets/temp/png/title.png")
        if storymode:
            page.locator('[data-click-id="text"]').screenshot(
                path="assets/temp/png/story_content.png"
            )
        else:
            for idx, comment in enumerate(
                track(reddit_object["comments"], "Downloading screenshots...")
            ):
                # Stop if we have reached the screenshot_num
                if idx >= screenshot_num:
                    break
                if page.locator('[data-testid="content-gate"]').is_visible():
                    page.locator('[data-testid="content-gate"] button').click()
                page.goto(f'https://reddit.com{comment["comment_url"]}', timeout=0)
                # translate code
                if settings.config["reddit"]["thread"]["post_lang"]:
                    comment_tl = ts.google(
                        comment["comment_body"],
                        to_language=settings.config["reddit"]["thread"]["post_lang"],
                    )
                    page.evaluate(
                        '([tl_content, tl_id]) => document.querySelector(`#t1_${tl_id} > div:nth-child(2) > div > div[data-testid="comment"] > div`).textContent = tl_content',
                        [comment_tl, comment["comment_id"]],
                    )
                try:
                    page.locator(f"#t1_{comment['comment_id']}").screenshot(
                        path=f"assets/temp/png/comment_{idx}.png"
                    )
                except TimeoutError:
                    del reddit_object["comments"]
                    screenshot_num += 1
                    print("TimeoutError: Skipping screenshot...")
                    continue
        print_substep("Screenshots downloaded Successfully.", style="bold green")
--- a/video_creation/screenshot_downloader.py
+++ b/video_creation/screenshot_downloader.py
@ -16,67 +16,11 @@ from utils.console import print_step, print_substep
 from attr import attrs, attrib
 from attr.validators import instance_of, optional
-from typing import TypeVar, Optional, Callable, Union
+from typing import Optional
-_function = TypeVar("_function", bound=Callable[..., object])
+from webdriver.common import ExceptionDecorator
 _exceptions = TypeVar("_exceptions", bound=Optional[Union[type, tuple, list]])
-
+catch_exception = ExceptionDecorator(default_exception=BrowserTimeoutError).catch_exception
@attrs
 class ExceptionDecorator:
    """
    Decorator factory for catching exceptions and writing logs
    """
    exception: Optional[_exceptions] = attrib(default=None)
    __default_exception: _exceptions = attrib(default=BrowserTimeoutError)
    def __attrs_post_init__(self):
        if not self.exception:
            self.exception = self.__default_exception
    def __call__(
            self,
            func: _function,
    ):
        async def wrapper(*args, **kwargs):
            try:
                obj_to_return = await func(*args, **kwargs)
                return obj_to_return
            except Exception as caughtException:
                import logging
                logger = logging.getLogger("webdriver_log")
                logger.setLevel(logging.ERROR)
                handler = logging.FileHandler(".webdriver.log", mode="a+", encoding="utf-8")
                logger.addHandler(handler)
                if isinstance(self.exception, type):
                    if not type(caughtException) == self.exception:
                        logger.error(f"unexpected error - {caughtException}")
                else:
                    if not type(caughtException) in self.exception:
                        logger.error(f"unexpected error - {caughtException}")
        return wrapper
 def catch_exception(
        func: Optional[_function],
        exception: Optional[_exceptions] = None,
 ) -> Union[ExceptionDecorator, _function]:
    """
    Decorator for catching exceptions and writing logs
    Args:
        func: Function to be decorated
        exception: Expected exception(s)
    Returns:
        Decorated function
    """
    exceptor = ExceptionDecorator(exception)
    if func:
        exceptor = exceptor(func)
    return exceptor
@attrs
@ -97,11 +41,7 @@ class Browser:
        },
        kw_only=True,
    )
-    browser: Optional[BrowserCls] = attrib(
+    browser: BrowserCls
        validator=optional(instance_of(BrowserCls)),
        default=None,
        kw_only=True,
    )
    async def get_browser(
            self,
@ -217,6 +157,10 @@ class RedditScreenshot(Browser, Wait):
    """
    reddit_object: dict
    screenshot_idx: list
    story_mode: Optional[bool] = attrib(
        validator=instance_of(bool),
        default=False,
    )
    async def __dark_theme(
            self,
@ -313,6 +257,37 @@ class RedditScreenshot(Browser, Wait):
            {"path": f"assets/temp/png/comment_{filename_idx}.png"},
        )
    # WIP  TODO test it
    async def __collect_story(
            self,
            main_page: PageCls,
    ):
        # Translates submission text
        if settings.config["reddit"]["thread"]["post_lang"]:
            story_tl = ts.google(
                self.reddit_object["thread_post"],
                to_language=settings.config["reddit"]["thread"]["post_lang"],
            )
            split_story_tl = story_tl.split('\n')
            await main_page.evaluate(
                # Find all elements
                'var elements = document.querySelectorAll(`[data-test-id="post-content"]'
                ' > [data-click-id="text"] > div > p`);'
                # Set array with translated text
                f"var texts = {split_story_tl};"
                # Map 2 arrays together
                "var text_map = texts.map(function(e, i) { return [e, elements[i]]; });"
                # Change text on the page
                "for (i = 0; i < text_map.length; ++i) { text_map[i][1].textContent = text_map[i][0] ; };"
            )
        await self.screenshot(
            main_page,
            "//*[@data-click-id='text']",
            {"path": "assets/temp/png/story_content.png"},
        )
    async def download(
            self,
    ):
@ -354,10 +329,16 @@ class RedditScreenshot(Browser, Wait):
        else:
            print_substep("Skipping translation...")
-        async_tasks_primary = [
+        async_tasks_primary = (
-            self.__collect_comment(self.reddit_object["comments"][idx], idx) for idx in
+            [
-            self.screenshot_idx
+                self.__collect_comment(self.reddit_object["comments"][idx], idx) for idx in
-        ]
+                self.screenshot_idx
            ]
            if not self.story_mode
            else [
                self.__collect_story(reddit_main)
            ]
        )
        async_tasks_primary.append(
            self.screenshot(
--- a/webdriver/web_engine.py
+++ b/webdriver/web_engine.py
@ -0,0 +1,22 @@
 from typing import Union
 from webdriver.pyppeteer import RedditScreenshot as Pyppeteer
 def screenshot_factory(
        driver: str,
 ) -> Union[Pyppeteer]:
    """
    Factory for webdriver
    Args:
        driver: (str) Name of a driver
    Returns:
        Webdriver instance
    """
    web_drivers = {
        "pyppeteer": Pyppeteer,
        "playwright": None,
    }
    return web_drivers[driver]