From 27577d0da6fd8b5a6a2b7833a462f0da4967890a Mon Sep 17 00:00:00 2001 From: Drugsosos <44712637+Drugsosos@users.noreply.github.com> Date: Tue, 19 Jul 2022 22:46:13 +0300 Subject: [PATCH] async playwright in factory WIP, added collect_story in pyppetter --- README.md | 3 + install.sh | 25 ++- main.py | 5 +- requirements.txt | 1 + webdriver/__init__.py | 0 webdriver/common.py | 67 ++++++ webdriver/playwright.py | 202 ++++++++++++++++++ .../pyppeteer.py | 117 +++++----- webdriver/web_engine.py | 22 ++ 9 files changed, 369 insertions(+), 73 deletions(-) create mode 100644 webdriver/__init__.py create mode 100644 webdriver/common.py create mode 100644 webdriver/playwright.py rename video_creation/screenshot_downloader.py => webdriver/pyppeteer.py (81%) create mode 100644 webdriver/web_engine.py diff --git a/README.md b/README.md index d7e1816..d7227f9 100644 --- a/README.md +++ b/README.md @@ -33,12 +33,15 @@ The only original thing being done is the editing and gathering of all materials ## Requirements - Python 3.9+ +- Playwright (this should install automatically in installation) ## Installation 👩‍💻 1. Clone this repository 2. Run `pip install -r requirements.txt` +3. Run `python -m playwright install` and `python -m playwright install-deps` + **EXPERIMENTAL!!!!** On MacOS and Linux (debian, arch, fedora and centos, and based on those), you can run an install script that will automatically install steps 1 to 3. (requires bash) diff --git a/install.sh b/install.sh index 254438f..fb8a431 100644 --- a/install.sh +++ b/install.sh @@ -12,7 +12,7 @@ function Help(){ echo "Options:" echo " -h: Show this help message and exit" echo " -d: Install only dependencies" - echo " -p: Install only python dependencies" + echo " -p: Install only python dependencies (including playwright)" echo " -b: Install just the bot" echo " -l: Install the bot and the python dependencies" } @@ -107,6 +107,23 @@ function install_python_dep(){ cd .. } +# install playwright function +function install_playwright(){ + # tell the user that the script is going to install playwright + echo "Installing playwright" + # cd into the directory where the script is downloaded + cd RedditVideoMakerBot + # run the install script + python3 -m playwright install + python3 -m playwright install-deps + # give a note + printf "Note, if these gave any errors, playwright may not be officially supported on your OS, check this issues page for support\nhttps://github.com/microsoft/playwright/issues" + if [ -x "$(command -v pacman)" ]; then + printf "It seems you are on and Arch based distro.\nTry installing these from the AUR for playwright to run:\nenchant1.6\nicu66\nlibwebp052\n" + fi + cd .. +} + # Install depndencies function install_deps(){ # if the platform is mac, install macos @@ -131,7 +148,7 @@ function install_deps(){ # else else # print an error message and exit - printf "Your OS is not supported\n Please install python3, pip3 and git manually\n After that, run the script again with the -pb option to install python and dependencies\n If you want to add support for your OS, please open a pull request on github\n + printf "Your OS is not supported\n Please install python3, pip3 and git manually\n After that, run the script again with the -pb option to install python and playwright dependencies\n If you want to add support for your OS, please open a pull request on github\n https://github.com/elebumm/RedditVideoMakerBot" exit 1 fi @@ -159,9 +176,10 @@ function install_main(){ echo "Installing only dependencies" install_deps elif [[ PYTHON_ONLY -eq 1 ]]; then - # if the -p (only python dependencies) options is selected install just the python dependencies + # if the -p (only python dependencies) options is selected install just the python dependencies and playwright echo "Installing only python dependencies" install_python_dep + install_playwright # if the -b (only the bot) options is selected install just the bot elif [[ JUST_BOT -eq 1 ]]; then echo "Installing only the bot" @@ -177,6 +195,7 @@ function install_main(){ install_deps get_the_bot install_python_dep + install_playwright fi DIR="./RedditVideoMakerBot" diff --git a/main.py b/main.py index 1e76442..ded1f2c 100755 --- a/main.py +++ b/main.py @@ -14,7 +14,7 @@ from video_creation.background import ( get_background_config, ) from video_creation.final_video import FinalVideo -from video_creation.screenshot_downloader import RedditScreenshot +from webdriver.web_engine import screenshot_factory from video_creation.voices import save_text_to_mp3 __VERSION__ = "2.3.1" @@ -41,7 +41,8 @@ async def main(POST_ID=None): cleanup() reddit_object = get_subreddit_threads(POST_ID) comments_created = save_text_to_mp3(reddit_object) - await RedditScreenshot(reddit_object, comments_created).download() + webdriver = screenshot_factory(config["settings"]["times_to_run"]) # TODO add in config + await webdriver(reddit_object, comments_created).download() bg_config = get_background_config() FinalVideo().make(comments_created, reddit_object, bg_config) diff --git a/requirements.txt b/requirements.txt index a0fb434..9684dc7 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,6 +2,7 @@ boto3==1.24.24 botocore==1.27.24 gTTS==2.2.4 moviepy==1.0.3 +playwright==1.23.0 praw==7.6.0 pytube==12.1.0 requests==2.28.1 diff --git a/webdriver/__init__.py b/webdriver/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/webdriver/common.py b/webdriver/common.py new file mode 100644 index 0000000..3c70a9f --- /dev/null +++ b/webdriver/common.py @@ -0,0 +1,67 @@ +from attr import attrs, attrib +from typing import TypeVar, Optional, Callable, Union + + +_function = TypeVar("_function", bound=Callable[..., object]) +_exceptions = TypeVar("_exceptions", bound=Optional[Union[type, tuple, list]]) + + +@attrs +class ExceptionDecorator: + """ + Decorator factory for catching exceptions and writing logs + """ + exception: Optional[_exceptions] = attrib(default=None) + _default_exception: Optional[_exceptions] = attrib( + kw_only=True, + default=None + ) + + def __attrs_post_init__(self): + if not self.exception: + self.exception = self._default_exception + + def __call__( + self, + func: _function, + ): + async def wrapper(*args, **kwargs): + try: + obj_to_return = await func(*args, **kwargs) + return obj_to_return + except Exception as caughtException: + import logging + + logger = logging.getLogger("webdriver_log") + logger.setLevel(logging.ERROR) + handler = logging.FileHandler(".webdriver.log", mode="a+", encoding="utf-8") + logger.addHandler(handler) + + if isinstance(self.exception, type): + if not type(caughtException) == self.exception: + logger.error(f"unexpected error - {caughtException}") + else: + if not type(caughtException) in self.exception: + logger.error(f"unexpected error - {caughtException}") + + return wrapper + + @classmethod + def catch_exception( + cls, + func: Optional[_function], + exception: Optional[_exceptions] = None, + ) -> Union[object, _function]: + """ + Decorator for catching exceptions and writing logs + + Args: + func: Function to be decorated + exception: Expected exception(s) + Returns: + Decorated function + """ + exceptor = cls(exception) + if func: + exceptor = exceptor(func) + return exceptor diff --git a/webdriver/playwright.py b/webdriver/playwright.py new file mode 100644 index 0000000..f1934d9 --- /dev/null +++ b/webdriver/playwright.py @@ -0,0 +1,202 @@ +from playwright.async_api import async_playwright, ViewportSize +from playwright.async_api import Browser, Playwright +from rich.progress import track + +from pathlib import Path +import translators as ts +from utils import settings +from utils.console import print_step, print_substep +from attr import attrs, attrib +from attr.validators import instance_of, optional + +from typing import Dict, Optional, Union + + +@attrs +class Browser: + """ + Args: + default_Viewport (dict):Pyppeteer Browser default_Viewport options + browser (BrowserCls): Pyppeteer Browser instance + """ + default_Viewport: dict = attrib( + validator=instance_of(dict), + default={ + # 9x21 to see long posts + "defaultViewport": { + "width": 500, + "height": 1200, + }, + }, + kw_only=True, + ) + playwright: Playwright + browser: Browser + + async def get_browser( + self, + ) -> None: + """ + Creates Playwright instance & browser + """ + self.playwright = await async_playwright().start() + self.browser = await self.playwright.chromium.launch() + + async def close_browser( + self, + ) -> None: + """ + Closes Pyppeteer browser + """ + await self.browser.close() + await self.playwright.stop() + + +@attrs(auto_attribs=True) +class RedditScreenshot(Browser): + """ + Args: + reddit_object (Dict): Reddit object received from reddit/subreddit.py + screenshot_idx (int): List with indexes of voiced comments + """ + reddit_object: dict + screenshot_idx: list + + async def __dark_theme( + self, + page_instance: PageCls, + ) -> None: + """ + Enables dark theme in Reddit + + Args: + page_instance: Pyppeteer page instance with reddit page opened + """ + + await self.click( + page_instance, + "//*[contains(@class, 'header-user-dropdown')]", + {"timeout": 5000}, + ) + + # It's normal not to find it, sometimes there is none :shrug: + await self.click( + page_instance, + "//*[contains(text(), 'Settings')]/ancestor::button[1]", + {"timeout": 5000}, + ) + + await self.click( + page_instance, + "//*[contains(text(), 'Dark Mode')]/ancestor::button[1]", + {"timeout": 5000}, + ) + + # Closes settings + await self.click( + page_instance, + "//*[contains(@class, 'header-user-dropdown')]", + {"timeout": 5000}, + ) + + + +storymode = False + + +def download_screenshots_of_reddit_posts(reddit_object: dict, screenshot_num: int): + """Downloads screenshots of reddit posts as seen on the web. Downloads to assets/temp/png + + Args: + reddit_object (Dict): Reddit object received from reddit/subreddit.py + screenshot_num (int): Number of screenshots to download + """ + print_step("Downloading screenshots of reddit posts...") + + # ! Make sure the reddit screenshots folder exists + Path("assets/temp/png").mkdir(parents=True, exist_ok=True) + + with sync_playwright() as p: + print_substep("Launching Headless Browser...") + + browser = p.chromium.launch() + context = browser.new_context() + + if settings.config["settings"]["theme"] == "dark": + cookie_file = open("./video_creation/data/cookie-dark-mode.json", encoding="utf-8") + else: + cookie_file = open("./video_creation/data/cookie-light-mode.json", encoding="utf-8") + cookies = json.load(cookie_file) + context.add_cookies(cookies) # load preference cookies + # Get the thread screenshot + page = context.new_page() + page.goto(reddit_object["thread_url"], timeout=0) + page.set_viewport_size(ViewportSize(width=1920, height=1080)) + if page.locator('[data-testid="content-gate"]').is_visible(): + # This means the post is NSFW and requires to click the proceed button. + + print_substep("Post is NSFW. You are spicy...") + page.locator('[data-testid="content-gate"] button').click() + page.wait_for_load_state() # Wait for page to fully load + + if page.locator('[data-click-id="text"] button').is_visible(): + page.locator( + '[data-click-id="text"] button' + ).click() # Remove "Click to see nsfw" Button in Screenshot + + # translate code + + if settings.config["reddit"]["thread"]["post_lang"]: + print_substep("Translating post...") + texts_in_tl = ts.google( + reddit_object["thread_title"], + to_language=settings.config["reddit"]["thread"]["post_lang"], + ) + + page.evaluate( + "tl_content => document.querySelector('[data-test-id=\"post-content\"] > div:nth-child(3) > div > div').textContent = tl_content", + texts_in_tl, + ) + else: + print_substep("Skipping translation...") + + page.locator('[data-test-id="post-content"]').screenshot(path="assets/temp/png/title.png") + + if storymode: + page.locator('[data-click-id="text"]').screenshot( + path="assets/temp/png/story_content.png" + ) + else: + for idx, comment in enumerate( + track(reddit_object["comments"], "Downloading screenshots...") + ): + # Stop if we have reached the screenshot_num + if idx >= screenshot_num: + break + + if page.locator('[data-testid="content-gate"]').is_visible(): + page.locator('[data-testid="content-gate"] button').click() + + page.goto(f'https://reddit.com{comment["comment_url"]}', timeout=0) + + # translate code + + if settings.config["reddit"]["thread"]["post_lang"]: + comment_tl = ts.google( + comment["comment_body"], + to_language=settings.config["reddit"]["thread"]["post_lang"], + ) + page.evaluate( + '([tl_content, tl_id]) => document.querySelector(`#t1_${tl_id} > div:nth-child(2) > div > div[data-testid="comment"] > div`).textContent = tl_content', + [comment_tl, comment["comment_id"]], + ) + try: + page.locator(f"#t1_{comment['comment_id']}").screenshot( + path=f"assets/temp/png/comment_{idx}.png" + ) + except TimeoutError: + del reddit_object["comments"] + screenshot_num += 1 + print("TimeoutError: Skipping screenshot...") + continue + print_substep("Screenshots downloaded Successfully.", style="bold green") diff --git a/video_creation/screenshot_downloader.py b/webdriver/pyppeteer.py similarity index 81% rename from video_creation/screenshot_downloader.py rename to webdriver/pyppeteer.py index 62e4df1..b9b409b 100644 --- a/video_creation/screenshot_downloader.py +++ b/webdriver/pyppeteer.py @@ -16,67 +16,11 @@ from utils.console import print_step, print_substep from attr import attrs, attrib from attr.validators import instance_of, optional -from typing import TypeVar, Optional, Callable, Union +from typing import Optional -_function = TypeVar("_function", bound=Callable[..., object]) -_exceptions = TypeVar("_exceptions", bound=Optional[Union[type, tuple, list]]) +from webdriver.common import ExceptionDecorator - -@attrs -class ExceptionDecorator: - """ - Decorator factory for catching exceptions and writing logs - """ - exception: Optional[_exceptions] = attrib(default=None) - __default_exception: _exceptions = attrib(default=BrowserTimeoutError) - - def __attrs_post_init__(self): - if not self.exception: - self.exception = self.__default_exception - - def __call__( - self, - func: _function, - ): - async def wrapper(*args, **kwargs): - try: - obj_to_return = await func(*args, **kwargs) - return obj_to_return - except Exception as caughtException: - import logging - - logger = logging.getLogger("webdriver_log") - logger.setLevel(logging.ERROR) - handler = logging.FileHandler(".webdriver.log", mode="a+", encoding="utf-8") - logger.addHandler(handler) - - if isinstance(self.exception, type): - if not type(caughtException) == self.exception: - logger.error(f"unexpected error - {caughtException}") - else: - if not type(caughtException) in self.exception: - logger.error(f"unexpected error - {caughtException}") - - return wrapper - - -def catch_exception( - func: Optional[_function], - exception: Optional[_exceptions] = None, -) -> Union[ExceptionDecorator, _function]: - """ - Decorator for catching exceptions and writing logs - - Args: - func: Function to be decorated - exception: Expected exception(s) - Returns: - Decorated function - """ - exceptor = ExceptionDecorator(exception) - if func: - exceptor = exceptor(func) - return exceptor +catch_exception = ExceptionDecorator(default_exception=BrowserTimeoutError).catch_exception @attrs @@ -97,11 +41,7 @@ class Browser: }, kw_only=True, ) - browser: Optional[BrowserCls] = attrib( - validator=optional(instance_of(BrowserCls)), - default=None, - kw_only=True, - ) + browser: BrowserCls async def get_browser( self, @@ -217,6 +157,10 @@ class RedditScreenshot(Browser, Wait): """ reddit_object: dict screenshot_idx: list + story_mode: Optional[bool] = attrib( + validator=instance_of(bool), + default=False, + ) async def __dark_theme( self, @@ -313,6 +257,37 @@ class RedditScreenshot(Browser, Wait): {"path": f"assets/temp/png/comment_{filename_idx}.png"}, ) + # WIP TODO test it + async def __collect_story( + self, + main_page: PageCls, + + ): + # Translates submission text + if settings.config["reddit"]["thread"]["post_lang"]: + story_tl = ts.google( + self.reddit_object["thread_post"], + to_language=settings.config["reddit"]["thread"]["post_lang"], + ) + split_story_tl = story_tl.split('\n') + await main_page.evaluate( + # Find all elements + 'var elements = document.querySelectorAll(`[data-test-id="post-content"]' + ' > [data-click-id="text"] > div > p`);' + # Set array with translated text + f"var texts = {split_story_tl};" + # Map 2 arrays together + "var text_map = texts.map(function(e, i) { return [e, elements[i]]; });" + # Change text on the page + "for (i = 0; i < text_map.length; ++i) { text_map[i][1].textContent = text_map[i][0] ; };" + ) + + await self.screenshot( + main_page, + "//*[@data-click-id='text']", + {"path": "assets/temp/png/story_content.png"}, + ) + async def download( self, ): @@ -354,10 +329,16 @@ class RedditScreenshot(Browser, Wait): else: print_substep("Skipping translation...") - async_tasks_primary = [ - self.__collect_comment(self.reddit_object["comments"][idx], idx) for idx in - self.screenshot_idx - ] + async_tasks_primary = ( + [ + self.__collect_comment(self.reddit_object["comments"][idx], idx) for idx in + self.screenshot_idx + ] + if not self.story_mode + else [ + self.__collect_story(reddit_main) + ] + ) async_tasks_primary.append( self.screenshot( diff --git a/webdriver/web_engine.py b/webdriver/web_engine.py new file mode 100644 index 0000000..2ca28ab --- /dev/null +++ b/webdriver/web_engine.py @@ -0,0 +1,22 @@ +from typing import Union + +from webdriver.pyppeteer import RedditScreenshot as Pyppeteer + + +def screenshot_factory( + driver: str, +) -> Union[Pyppeteer]: + """ + Factory for webdriver + Args: + driver: (str) Name of a driver + + Returns: + Webdriver instance + """ + web_drivers = { + "pyppeteer": Pyppeteer, + "playwright": None, + } + + return web_drivers[driver]