From ed8cd3cd09617f2010c66e263a86e786ef23fe1f Mon Sep 17 00:00:00 2001 From: Drugsosos <44712637+Drugsosos@users.noreply.github.com> Date: Sun, 10 Jul 2022 01:37:01 +0300 Subject: [PATCH 01/39] moved to async webdriver --- main.py | 23 +- reddit/subreddit.py | 5 +- requirements.txt | 3 +- video_creation/screenshot_downloader.py | 312 ++++++++++++++++++------ 4 files changed, 266 insertions(+), 77 deletions(-) diff --git a/main.py b/main.py index 8ce8725..0ba8f27 100755 --- a/main.py +++ b/main.py @@ -1,4 +1,5 @@ #!/usr/bin/env python +from asyncio import run import math from subprocess import Popen from os import name @@ -14,7 +15,7 @@ from video_creation.background import ( get_background_config, ) from video_creation.final_video import make_final_video -from video_creation.screenshot_downloader import download_screenshots_of_reddit_posts +from video_creation.screenshot_downloader import Reddit from video_creation.voices import save_text_to_mp3 VERSION = "2.2.9" @@ -35,24 +36,28 @@ print_markdown( print_step(f"You are using V{VERSION} of the bot") -def main(POST_ID=None): +async def main( + POST_ID=None +): cleanup() reddit_object = get_subreddit_threads(POST_ID) length, number_of_comments = save_text_to_mp3(reddit_object) length = math.ceil(length) - download_screenshots_of_reddit_posts(reddit_object, number_of_comments) + reddit_screenshots = Reddit(reddit_object, number_of_comments) + browser = await reddit_screenshots.get_browser() + await reddit_screenshots.download_screenshots(browser) bg_config = get_background_config() download_background(bg_config) chop_background_video(bg_config, length) make_final_video(number_of_comments, length, reddit_object, bg_config) -def run_many(times): +async def run_many(times): for x in range(1, times + 1): print_step( f'on the {x}{("th", "st", "nd", "rd", "th", "th", "th", "th","th", "th")[x%10]} iteration of {times}' ) # correct 1st 2nd 3rd 4th 5th.... - main() + await main() Popen("cls" if name == "nt" else "clear", shell=True).wait() @@ -61,7 +66,9 @@ if __name__ == "__main__": config is False and exit() try: if config["settings"]["times_to_run"]: - run_many(config["settings"]["times_to_run"]) + run( + run_many(config["settings"]["times_to_run"]) + ) elif len(config["reddit"]["thread"]["post_id"].split("+")) > 1: for index, post_id in enumerate(config["reddit"]["thread"]["post_id"].split("+")): @@ -69,7 +76,9 @@ if __name__ == "__main__": print_step( f'on the {index}{("st" if index%10 == 1 else ("nd" if index%10 == 2 else ("rd" if index%10 == 3 else "th")))} post of {len(config["reddit"]["thread"]["post_id"].split("+"))}' ) - main(post_id) + run( + main(post_id) + ) Popen("cls" if name == "nt" else "clear", shell=True).wait() else: main() diff --git a/reddit/subreddit.py b/reddit/subreddit.py index b64a52a..2ce80ce 100644 --- a/reddit/subreddit.py +++ b/reddit/subreddit.py @@ -9,7 +9,9 @@ from utils.subreddit import get_subreddit_undone from utils.videos import check_done -def get_subreddit_threads(POST_ID: str): +def get_subreddit_threads( + POST_ID: str +): """ Returns a list of threads from the AskReddit subreddit. """ @@ -87,6 +89,7 @@ def get_subreddit_threads(POST_ID: str): content["thread_title"] = submission.title content["thread_post"] = submission.selftext content["thread_id"] = submission.id + content["is_nsfw"] = 'nsfw' in submission.whitelist_status content["comments"] = [] for top_level_comment in submission.comments: diff --git a/requirements.txt b/requirements.txt index 8b377c2..10ce9c3 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,10 +2,11 @@ boto3==1.24.24 botocore==1.27.24 gTTS==2.2.4 moviepy==1.0.3 -playwright==1.23.0 praw==7.6.0 pytube==12.1.0 requests==2.28.1 rich==12.4.4 toml==0.10.2 translators==5.3.1 +pyppeteer==1.0.2 +attrs==21.4.0 diff --git a/video_creation/screenshot_downloader.py b/video_creation/screenshot_downloader.py index 6fb9ef4..526a17c 100644 --- a/video_creation/screenshot_downloader.py +++ b/video_creation/screenshot_downloader.py @@ -1,107 +1,283 @@ -import json +from asyncio import as_completed from pathlib import Path from typing import Dict + from utils import settings -from playwright.async_api import async_playwright # pylint: disable=unused-import -# do not remove the above line +from pyppeteer import launch +from pyppeteer.page import Page as PageCls +from pyppeteer.browser import Browser as BrowserCls +from pyppeteer.element_handle import ElementHandle as ElementHandleCls +from pyppeteer.errors import TimeoutError as BrowserTimeoutError -from playwright.sync_api import sync_playwright, ViewportSize from rich.progress import track import translators as ts from utils.console import print_step, print_substep -storymode = False +from attr import attrs, attrib +from typing import TypeVar, Optional, Callable, Union + +_function = TypeVar('_function', bound=Callable[..., object]) +_exceptions = TypeVar('_exceptions', bound=Optional[Union[type, tuple, list]]) + + +@attrs +class ExceptionDecorator: + __exception: Optional[_exceptions] = attrib(default=None) + __default_exception: _exceptions = attrib(default=BrowserTimeoutError) + + def __attrs_post_init__(self): + if not self.__exception: + self.__exception = self.__default_exception + + def __call__( + self, + func: _function, + ): + async def wrapper(*args, **kwargs): + try: + obj_to_return = await func(*args, **kwargs) + return obj_to_return + except Exception as caughtException: + import logging + + if isinstance(self.__exception, type): + if not type(caughtException) == self.__exception: + logging.basicConfig(filename='.webdriver.log', filemode='w', encoding='utf-8', + level=logging.DEBUG) + logging.error(f'unexpected error - {caughtException}') + else: + if not type(caughtException) in self.__exception: + logging.error(f'unexpected error - {caughtException}') + + return wrapper + + +def catch_exception( + func: Optional[_function], + exception: Optional[_exceptions] = None, +) -> ExceptionDecorator | _function: + exceptor = ExceptionDecorator(exception) + if func: + exceptor = exceptor(func) + return exceptor + +@attrs +class Browser: + # default_Viewport: dict = attrib(validator=instance_of(dict), default=dict()) + # + # def __attrs_post_init__(self): + # if self.default_Viewport.__len__() == 0: + # self.default_Viewport['isLandscape'] = True -def download_screenshots_of_reddit_posts(reddit_object: dict, screenshot_num: int): - """Downloads screenshots of reddit posts as seen on the web. Downloads to assets/temp/png + async def get_browser( + self, + ) -> BrowserCls: + return await launch() + async def close_browser( + self, + browser: BrowserCls + ) -> None: + await browser.close() + + +class Wait: + @staticmethod + @catch_exception + async def find_xpath( + page_instance: PageCls, + xpath: Optional[str] = None, + options: Optional[dict] = None, + ) -> 'ElementHandleCls': + if options: + el = await page_instance.waitForXPath(xpath, options=options) + else: + el = await page_instance.waitForXPath(xpath) + return el + + @catch_exception + async def click( + self, + page_instance: Optional[PageCls] = None, + xpath: Optional[str] = None, + find_options: Optional[dict] = None, + options: Optional[dict] = None, + el: Optional[ElementHandleCls] = None, + ) -> None: + if not el: + el = await self.find_xpath(page_instance, xpath, find_options) + if options: + await el.click(options) + else: + await el.click() + + @catch_exception + async def screenshot( + self, + page_instance: Optional[PageCls] = None, + xpath: Optional[str] = None, + options: Optional[dict] = None, + find_options: Optional[dict] = None, + el: Optional[ElementHandleCls] = None, + ) -> None: + if not el: + el = await self.find_xpath(page_instance, xpath, find_options) + if options: + await el.screenshot(options) + else: + await el.screenshot() + + +@attrs(auto_attribs=True) +class Reddit(Browser, Wait): + """ Args: reddit_object (Dict): Reddit object received from reddit/subreddit.py screenshot_num (int): Number of screenshots to download """ - print_step("Downloading screenshots of reddit posts...") + reddit_object: dict + screenshot_num: int = attrib() - # ! Make sure the reddit screenshots folder exists - Path("assets/temp/png").mkdir(parents=True, exist_ok=True) + @screenshot_num.validator + def validate_screenshot_num(self, attribute, value): + if value <= 0: + raise ValueError('Check screenshot_num in config') - with sync_playwright() as p: - print_substep("Launching Headless Browser...") + async def dark_theme( + self, + page_instance: PageCls, + ) -> None: + """ + Enables dark theme in Reddit + """ - browser = p.chromium.launch() - context = browser.new_context() + await self.click( + page_instance, + '//*[contains(@class, \'header-user-dropdown\')]', + {'timeout': 5000}, + ) + + # It's normal not to find it, sometimes there is none :shrug: + await self.click( + page_instance, + '//*[contains(text(), \'Settings\')]/ancestor::button[1]', + {'timeout': 5000}, + ) + + await self.click( + page_instance, + '//*[contains(text(), \'Dark Mode\')]/ancestor::button[1]', + {'timeout': 5000}, + ) + + # Closes settings + await self.click( + page_instance, + '//*[contains(@class, \'header-user-dropdown\')]', + {'timeout': 5000}, + ) + + async def download_screenshots( + self, + browser: BrowserCls + + ): + """ + Downloads screenshots of reddit posts as seen on the web. Downloads to assets/temp/png + """ + print_step('Downloading screenshots of reddit posts...') + + # ! Make sure the reddit screenshots folder exists + Path('assets/temp/png').mkdir(parents=True, exist_ok=True) + + print_substep('Launching Headless Browser...') - if settings.config["settings"]["theme"] == "dark": - cookie_file = open("./video_creation/data/cookie-dark-mode.json", encoding="utf-8") - else: - cookie_file = open("./video_creation/data/cookie-light-mode.json", encoding="utf-8") - cookies = json.load(cookie_file) - context.add_cookies(cookies) # load preference cookies # Get the thread screenshot - page = context.new_page() - page.goto(reddit_object["thread_url"], timeout=0) - page.set_viewport_size(ViewportSize(width=1920, height=1080)) - if page.locator('[data-testid="content-gate"]').is_visible(): + reddit_main = await browser.newPage() + await reddit_main.goto(self.reddit_object['thread_url']) + + if settings.config['settings']['theme'] == 'dark': + await self.dark_theme(reddit_main) + + if self.reddit_object['is_nsfw']: # This means the post is NSFW and requires to click the proceed button. - print_substep("Post is NSFW. You are spicy...") - page.locator('[data-testid="content-gate"] button').click() - page.locator( - '[data-click-id="text"] button' - ).click() # Remove "Click to see nsfw" Button in Screenshot + print_substep('Post is NSFW. You are spicy...') + await self.click( + reddit_main, + '//button[contains(text(), \'Yes\')]', + {'timeout': 5000}, + ) + + await self.click( + reddit_main, + '//button[contains(text(), \'nsfw\')]', + {'timeout': 5000}, + ) # translate code - if settings.config["reddit"]["thread"]["post_lang"]: - print_substep("Translating post...") + if settings.config['reddit']['thread']['post_lang']: + print_substep('Translating post...') texts_in_tl = ts.google( - reddit_object["thread_title"], - to_language=settings.config["reddit"]["thread"]["post_lang"], + self.reddit_object['thread_title'], + to_language=settings.config['reddit']['thread']['post_lang'], ) - page.evaluate( - "tl_content => document.querySelector('[data-test-id=\"post-content\"] > div:nth-child(3) > div > div').textContent = tl_content", + await reddit_main.evaluate( + "tl_content => document.querySelector('[data-test-id=\"post-content\"] > div:nth-child(3) > div > " + "div').textContent = tl_content", texts_in_tl, ) else: print_substep("Skipping translation...") - page.locator('[data-test-id="post-content"]').screenshot(path="assets/temp/png/title.png") + await self.screenshot( + reddit_main, + f'//*[contains(@id, \'t3_{self.reddit_object["thread_id"]}\')]', + {'path': f'assets/temp/png/title.png'}, + ) - if storymode: - page.locator('[data-click-id="text"]').screenshot( - path="assets/temp/png/story_content.png" - ) - else: - for idx, comment in enumerate( - track(reddit_object["comments"], "Downloading screenshots...") - ): - # Stop if we have reached the screenshot_num - if idx >= screenshot_num: - break - - if page.locator('[data-testid="content-gate"]').is_visible(): - page.locator('[data-testid="content-gate"] button').click() - - page.goto(f'https://reddit.com{comment["comment_url"]}', timeout=0) - - # translate code - - if settings.config["reddit"]["thread"]["post_lang"]: - comment_tl = ts.google( - comment["comment_body"], - to_language=settings.config["reddit"]["thread"]["post_lang"], - ) - page.evaluate( - '([tl_content, tl_id]) => document.querySelector(`#t1_${tl_id} > div:nth-child(2) > div > div[data-testid="comment"] > div`).textContent = tl_content', - [comment_tl, comment["comment_id"]], - ) - - page.locator(f"#t1_{comment['comment_id']}").screenshot( - path=f"assets/temp/png/comment_{idx}.png" + async def collect_comment( + comment_obj: dict, + filename_idx: int, + ): + comment_page = await browser.newPage() + await comment_page.goto(f'https://reddit.com{comment_obj["comment_url"]}') + + # translate code + if settings.config["reddit"]["thread"]["post_lang"]: + comment_tl = ts.google( + comment_obj["comment_body"], + to_language=settings.config["reddit"]["thread"]["post_lang"], ) + await comment_page.evaluate( + '([tl_content, tl_id]) => document.querySelector(`#t1_${tl_id} > div:nth-child(2) > div > div[' + 'data-testid="comment"] > div`).textContent = tl_content', + [comment_tl, comment_obj["comment_id"]], + ) + + await self.screenshot( + comment_page, + f'//*[contains(@id, \'t1_{comment_obj["comment_id"]}\')]', + {'path': f'assets/temp/png/comment_{filename_idx}.png'}, + ) + + async_tasks_primary = [ + collect_comment(comment, idx) for idx, comment in + enumerate(self.reddit_object['comments']) + if idx < self.screenshot_num + ] + + for task in track( + as_completed(async_tasks_primary), + description='Downloading screenshots...', + total=async_tasks_primary.__len__(), + ): + await task - print_substep("Screenshots downloaded Successfully.", style="bold green") + print_substep('Screenshots downloaded Successfully.', style='bold green') From eff38a5852d0d9b5d9c42ffe2c5270d28c8d0a55 Mon Sep 17 00:00:00 2001 From: Drugsosos <44712637+Drugsosos@users.noreply.github.com> Date: Sun, 10 Jul 2022 01:48:25 +0300 Subject: [PATCH 02/39] updated README.md & install.sh --- README.md | 2 -- install.sh | 25 +++---------------------- video_creation/data/videos.json | 2 +- 3 files changed, 4 insertions(+), 25 deletions(-) diff --git a/README.md b/README.md index 028dcd4..57aa6d4 100644 --- a/README.md +++ b/README.md @@ -40,8 +40,6 @@ The only original thing being done is the editing and gathering of all materials 1. Clone this repository 2. Run `pip install -r requirements.txt` -3. Run `python -m playwright install` and `python -m playwright install-deps` - **EXPERIMENTAL!!!!** On MacOS and Linux (debian, arch, fedora and centos, and based on those), you can run an install script that will automatically install steps 1 to 3. (requires bash) diff --git a/install.sh b/install.sh index 782069d..254438f 100644 --- a/install.sh +++ b/install.sh @@ -12,7 +12,7 @@ function Help(){ echo "Options:" echo " -h: Show this help message and exit" echo " -d: Install only dependencies" - echo " -p: Install only python dependencies (including playwright)" + echo " -p: Install only python dependencies" echo " -b: Install just the bot" echo " -l: Install the bot and the python dependencies" } @@ -107,23 +107,6 @@ function install_python_dep(){ cd .. } -# install playwright function -function install_playwright(){ - # tell the user that the script is going to install playwright - echo "Installing playwright" - # cd into the directory where the script is downloaded - cd RedditVideoMakerBot - # run the install script - python3 -m playwright install - python3 -m playwright install-deps - # give a note - printf "Note, if these gave any errors, playwright may not be officially supported on your OS, check this issues page for support\nhttps://github.com/microsoft/playwright/issues" - if [ -x "$(command -v pacman)" ]; then - printf "It seems you are on and Arch based distro.\nTry installing these from the AUR for playwright to run:\nenchant1.6\nicu66\nlibwebp052\n" - fi - cd .. -} - # Install depndencies function install_deps(){ # if the platform is mac, install macos @@ -148,7 +131,7 @@ function install_deps(){ # else else # print an error message and exit - printf "Your OS is not supported\n Please install python3, pip3 and git manually\n After that, run the script again with the -pb option to install python and playwright dependencies\n If you want to add support for your OS, please open a pull request on github\n + printf "Your OS is not supported\n Please install python3, pip3 and git manually\n After that, run the script again with the -pb option to install python and dependencies\n If you want to add support for your OS, please open a pull request on github\n https://github.com/elebumm/RedditVideoMakerBot" exit 1 fi @@ -176,10 +159,9 @@ function install_main(){ echo "Installing only dependencies" install_deps elif [[ PYTHON_ONLY -eq 1 ]]; then - # if the -p (only python dependencies) options is selected install just the python dependencies and playwright + # if the -p (only python dependencies) options is selected install just the python dependencies echo "Installing only python dependencies" install_python_dep - install_playwright # if the -b (only the bot) options is selected install just the bot elif [[ JUST_BOT -eq 1 ]]; then echo "Installing only the bot" @@ -195,7 +177,6 @@ function install_main(){ install_deps get_the_bot install_python_dep - install_playwright fi DIR="./RedditVideoMakerBot" diff --git a/video_creation/data/videos.json b/video_creation/data/videos.json index fe51488..0637a08 100644 --- a/video_creation/data/videos.json +++ b/video_creation/data/videos.json @@ -1 +1 @@ -[] +[] \ No newline at end of file From e19e532ea5d0b82aa29985fdc7bdd6002b30b70b Mon Sep 17 00:00:00 2001 From: Drugsosos <44712637+Drugsosos@users.noreply.github.com> Date: Sun, 10 Jul 2022 20:49:55 +0300 Subject: [PATCH 03/39] added typing --- main.py | 6 +- video_creation/screenshot_downloader.py | 179 +++++++++++++++++------- 2 files changed, 132 insertions(+), 53 deletions(-) diff --git a/main.py b/main.py index 0ba8f27..411ed90 100755 --- a/main.py +++ b/main.py @@ -15,7 +15,7 @@ from video_creation.background import ( get_background_config, ) from video_creation.final_video import make_final_video -from video_creation.screenshot_downloader import Reddit +from video_creation.screenshot_downloader import RedditScreenshot from video_creation.voices import save_text_to_mp3 VERSION = "2.2.9" @@ -43,9 +43,7 @@ async def main( reddit_object = get_subreddit_threads(POST_ID) length, number_of_comments = save_text_to_mp3(reddit_object) length = math.ceil(length) - reddit_screenshots = Reddit(reddit_object, number_of_comments) - browser = await reddit_screenshots.get_browser() - await reddit_screenshots.download_screenshots(browser) + await RedditScreenshot(reddit_object, number_of_comments).download() bg_config = get_background_config() download_background(bg_config) chop_background_video(bg_config, length) diff --git a/video_creation/screenshot_downloader.py b/video_creation/screenshot_downloader.py index 526a17c..12eba72 100644 --- a/video_creation/screenshot_downloader.py +++ b/video_creation/screenshot_downloader.py @@ -1,30 +1,33 @@ from asyncio import as_completed -from pathlib import Path -from typing import Dict - -from utils import settings - from pyppeteer import launch from pyppeteer.page import Page as PageCls from pyppeteer.browser import Browser as BrowserCls from pyppeteer.element_handle import ElementHandle as ElementHandleCls from pyppeteer.errors import TimeoutError as BrowserTimeoutError +from pathlib import Path +from typing import Dict +from utils import settings + from rich.progress import track import translators as ts - from utils.console import print_step, print_substep from attr import attrs, attrib +from attr.validators import instance_of, optional from typing import TypeVar, Optional, Callable, Union + _function = TypeVar('_function', bound=Callable[..., object]) _exceptions = TypeVar('_exceptions', bound=Optional[Union[type, tuple, list]]) @attrs class ExceptionDecorator: + """ + Factory for decorating functions + """ __exception: Optional[_exceptions] = attrib(default=None) __default_exception: _exceptions = attrib(default=BrowserTimeoutError) @@ -59,6 +62,15 @@ def catch_exception( func: Optional[_function], exception: Optional[_exceptions] = None, ) -> ExceptionDecorator | _function: + """ + Decorator for catching exceptions and writing logs + + Args: + func: Function to be decorated + exception: Expected exception(s) + Returns: + Decorated function + """ exceptor = ExceptionDecorator(exception) if func: exceptor = exceptor(func) @@ -67,22 +79,41 @@ def catch_exception( @attrs class Browser: - # default_Viewport: dict = attrib(validator=instance_of(dict), default=dict()) - # - # def __attrs_post_init__(self): - # if self.default_Viewport.__len__() == 0: - # self.default_Viewport['isLandscape'] = True + """ + Args: + default_Viewport (dict):Pyppeteer Browser default_Viewport options + browser (BrowserCls): Pyppeteer Browser instance + """ + default_Viewport: dict = attrib( + validator=instance_of(dict), + default=dict(), + kw_only=True, + ) + browser: Optional[BrowserCls] = attrib( + validator=optional(instance_of(BrowserCls)), + default=None, + kw_only=True, + ) + + def __attrs_post_init__(self): + if self.default_Viewport.__len__() == 0: + self.default_Viewport['isLandscape'] = True async def get_browser( self, - ) -> BrowserCls: - return await launch() + ) -> None: + """ + Creates Pyppeteer browser + """ + self.browser = await launch(self.default_Viewport) async def close_browser( self, - browser: BrowserCls ) -> None: - await browser.close() + """ + Closes Pyppeteer browser + """ + await self.browser.close() class Wait: @@ -93,6 +124,27 @@ class Wait: xpath: Optional[str] = None, options: Optional[dict] = None, ) -> 'ElementHandleCls': + """ + Explicitly finds element on the page + + Args: + page_instance: Pyppeteer page instance + xpath: xpath query + options: Pyppeteer waitForXPath parameters + + Available options are: + + * ``visible`` (bool): wait for element to be present in DOM and to be + visible, i.e. to not have ``display: none`` or ``visibility: hidden`` + CSS properties. Defaults to ``False``. + * ``hidden`` (bool): wait for element to not be found in the DOM or to + be hidden, i.e. have ``display: none`` or ``visibility: hidden`` CSS + properties. Defaults to ``False``. + * ``timeout`` (int|float): maximum time to wait for in milliseconds. + Defaults to 30000 (30 seconds). Pass ``0`` to disable timeout. + Returns: + Pyppeteer element instance + """ if options: el = await page_instance.waitForXPath(xpath, options=options) else: @@ -108,6 +160,16 @@ class Wait: options: Optional[dict] = None, el: Optional[ElementHandleCls] = None, ) -> None: + """ + Clicks on the element + + Args: + page_instance: Pyppeteer page instance + xpath: xpath query + find_options: Pyppeteer waitForXPath parameters + options: Pyppeteer click parameters + el: Pyppeteer element instance + """ if not el: el = await self.find_xpath(page_instance, xpath, find_options) if options: @@ -124,6 +186,16 @@ class Wait: find_options: Optional[dict] = None, el: Optional[ElementHandleCls] = None, ) -> None: + """ + Makes a screenshot of the element + + Args: + page_instance: Pyppeteer page instance + xpath: xpath query + options: Pyppeteer screenshot parameters + find_options: Pyppeteer waitForXPath parameters + el: Pyppeteer element instance + """ if not el: el = await self.find_xpath(page_instance, xpath, find_options) if options: @@ -133,7 +205,7 @@ class Wait: @attrs(auto_attribs=True) -class Reddit(Browser, Wait): +class RedditScreenshot(Browser, Wait): """ Args: reddit_object (Dict): Reddit object received from reddit/subreddit.py @@ -147,12 +219,15 @@ class Reddit(Browser, Wait): if value <= 0: raise ValueError('Check screenshot_num in config') - async def dark_theme( + async def __dark_theme( self, page_instance: PageCls, ) -> None: """ Enables dark theme in Reddit + + Args: + page_instance: Pyppeteer page instance with reddit page opened """ await self.click( @@ -181,14 +256,45 @@ class Reddit(Browser, Wait): {'timeout': 5000}, ) - async def download_screenshots( + async def __collect_comment( self, - browser: BrowserCls + comment_obj: dict, + filename_idx: int, + ) -> None: + """ + Makes a screenshot of the comment + Args: + comment_obj: prew comment object + filename_idx: index for the filename + """ + comment_page = await self.browser.newPage() + await comment_page.goto(f'https://reddit.com{comment_obj["comment_url"]}') + + # Translates submission' comment + if settings.config["reddit"]["thread"]["post_lang"]: + comment_tl = ts.google( + comment_obj["comment_body"], + to_language=settings.config["reddit"]["thread"]["post_lang"], + ) + await comment_page.evaluate( + f'([tl_content, tl_id]) => document.querySelector(`#t1_{comment_obj["comment_id"]} > div:nth-child(2) ' + f'> div > div[data-testid="comment"] > div`).textContent = {comment_tl}', + ) + + await self.screenshot( + comment_page, + f'//*[contains(@id, \'t1_{comment_obj["comment_id"]}\')]', + {'path': f'assets/temp/png/comment_{filename_idx}.png'}, + ) + + async def download( + self, ): """ Downloads screenshots of reddit posts as seen on the web. Downloads to assets/temp/png """ + await self.get_browser() print_step('Downloading screenshots of reddit posts...') # ! Make sure the reddit screenshots folder exists @@ -197,11 +303,11 @@ class Reddit(Browser, Wait): print_substep('Launching Headless Browser...') # Get the thread screenshot - reddit_main = await browser.newPage() + reddit_main = await self.browser.newPage() await reddit_main.goto(self.reddit_object['thread_url']) if settings.config['settings']['theme'] == 'dark': - await self.dark_theme(reddit_main) + await self.__dark_theme(reddit_main) if self.reddit_object['is_nsfw']: # This means the post is NSFW and requires to click the proceed button. @@ -219,8 +325,7 @@ class Reddit(Browser, Wait): {'timeout': 5000}, ) - # translate code - + # Translates submission title if settings.config['reddit']['thread']['post_lang']: print_substep('Translating post...') texts_in_tl = ts.google( @@ -242,33 +347,8 @@ class Reddit(Browser, Wait): {'path': f'assets/temp/png/title.png'}, ) - async def collect_comment( - comment_obj: dict, - filename_idx: int, - ): - comment_page = await browser.newPage() - await comment_page.goto(f'https://reddit.com{comment_obj["comment_url"]}') - - # translate code - if settings.config["reddit"]["thread"]["post_lang"]: - comment_tl = ts.google( - comment_obj["comment_body"], - to_language=settings.config["reddit"]["thread"]["post_lang"], - ) - await comment_page.evaluate( - '([tl_content, tl_id]) => document.querySelector(`#t1_${tl_id} > div:nth-child(2) > div > div[' - 'data-testid="comment"] > div`).textContent = tl_content', - [comment_tl, comment_obj["comment_id"]], - ) - - await self.screenshot( - comment_page, - f'//*[contains(@id, \'t1_{comment_obj["comment_id"]}\')]', - {'path': f'assets/temp/png/comment_{filename_idx}.png'}, - ) - async_tasks_primary = [ - collect_comment(comment, idx) for idx, comment in + self.__collect_comment(comment, idx) for idx, comment in enumerate(self.reddit_object['comments']) if idx < self.screenshot_num ] @@ -281,3 +361,4 @@ class Reddit(Browser, Wait): await task print_substep('Screenshots downloaded Successfully.', style='bold green') + await self.close_browser() From 6e4e6527a1ab4411cd5c41b629e512490257b3a9 Mon Sep 17 00:00:00 2001 From: Drugsosos <44712637+Drugsosos@users.noreply.github.com> Date: Mon, 11 Jul 2022 21:20:49 +0300 Subject: [PATCH 04/39] added fixes --- TTS/common.py | 10 ++ TTS/engine_wrapper.py | 146 ++++++++++-------------- main.py | 10 +- video_creation/final_video.py | 127 +++++++++++---------- video_creation/screenshot_downloader.py | 9 +- video_creation/voices.py | 47 ++++---- 6 files changed, 168 insertions(+), 181 deletions(-) create mode 100644 TTS/common.py diff --git a/TTS/common.py b/TTS/common.py new file mode 100644 index 0000000..a56444e --- /dev/null +++ b/TTS/common.py @@ -0,0 +1,10 @@ +def audio_length( + path: str, +) -> float | int: + from mutagen.mp3 import MP3 + + try: + audio = MP3(path) + return audio.info.length + except Exception as e: # TODO add logging + return 0 diff --git a/TTS/engine_wrapper.py b/TTS/engine_wrapper.py index a171db7..1bacd86 100644 --- a/TTS/engine_wrapper.py +++ b/TTS/engine_wrapper.py @@ -8,16 +8,16 @@ import re # from mutagen.mp3 import MP3, HeaderNotFoundError import translators as ts from rich.progress import track -from moviepy.editor import AudioFileClip, CompositeAudioClip, concatenate_audioclips +from attr import attrs, attrib + from utils.console import print_step, print_substep from utils.voice import sanitize_text from utils import settings - -DEFUALT_MAX_LENGTH: int = 50 # video length variable +from TTS.common import audio_length +@attrs(auto_attribs=True) class TTSEngine: - """Calls the given TTS engine to reduce code duplication and allow multiple TTS engines. Args: @@ -29,94 +29,72 @@ class TTSEngine: Notes: tts_module must take the arguments text and filepath. """ - - def __init__( - self, - tts_module, - reddit_object: dict, - path: str = "assets/temp/mp3", - max_length: int = DEFUALT_MAX_LENGTH, - ): - self.tts_module = tts_module() - self.reddit_object = reddit_object - self.path = path - self.max_length = max_length - self.length = 0 - - def run(self) -> Tuple[int, int]: + tts_module: object + reddit_object: dict + path: str = 'assets/temp/mp3' + max_length: int = 50 # TODO move to config + __total_length: int = attrib( + default=0, + kw_only=True + ) + + def run( + self + ) -> list: Path(self.path).mkdir(parents=True, exist_ok=True) - # This file needs to be removed in case this post does not use post text, so that it wont appear in the final video + # This file needs to be removed in case this post does not use post text + # so that it won't appear in the final video try: - Path(f"{self.path}/posttext.mp3").unlink() + Path(f'{self.path}/posttext.mp3').unlink() except OSError: pass - print_step("Saving Text to MP3 files...") - - self.call_tts("title", self.reddit_object["thread_title"]) - if ( - self.reddit_object["thread_post"] != "" - and settings.config["settings"]["storymode"] == True - ): - self.call_tts("posttext", self.reddit_object["thread_post"]) - - idx = None - for idx, comment in track(enumerate(self.reddit_object["comments"]), "Saving..."): - # ! Stop creating mp3 files if the length is greater than max length. - if self.length > self.max_length: - break - if not self.tts_module.max_chars: - self.call_tts(f"{idx}", comment["comment_body"]) - else: - self.split_post(comment["comment_body"], idx) - - print_substep("Saved Text to MP3 files successfully.", style="bold green") - return self.length, idx - - def split_post(self, text: str, idx: int): - split_files = [] - split_text = [ - x.group().strip() - for x in re.finditer(rf" *((.{{0,{self.tts_module.max_chars}}})(\.|.$))", text) + print_step('Saving Text to MP3 files...') + + self.call_tts('title', self.reddit_object['thread_title']) + + if self.reddit_object['thread_post'] and settings.config['settings']['storymode']: + self.call_tts('posttext', self.reddit_object['thread_post']) + + sync_tasks_primary = [ + self.call_tts(str(idx), comment['comment_body']) + for idx, comment in track(enumerate(self.reddit_object['comments']), description='Saving...') + ] + + print_substep('Saved Text to MP3 files successfully.', style='bold green') + return [ + comments for comments, condition in + zip(self.reddit_object['comments'], sync_tasks_primary) + if condition ] - idy = None - for idy, text_cut in enumerate(split_text): - # print(f"{idx}-{idy}: {text_cut}\n") - self.call_tts(f"{idx}-{idy}.part", text_cut) - split_files.append(AudioFileClip(f"{self.path}/{idx}-{idy}.part.mp3")) - CompositeAudioClip([concatenate_audioclips(split_files)]).write_audiofile( - f"{self.path}/{idx}.mp3", fps=44100, verbose=False, logger=None + def call_tts( + self, + filename: str, + text: str + ) -> bool: + self.tts_module.run( + text=self.process_text(text), + filepath=f'{self.path}/{filename}.mp3' ) - for i in split_files: - name = i.filename - i.close() - Path(name).unlink() - - # for i in range(0, idy + 1): - # print(f"Cleaning up {self.path}/{idx}-{i}.part.mp3") - - # Path(f"{self.path}/{idx}-{i}.part.mp3").unlink() - - def call_tts(self, filename: str, text: str): - self.tts_module.run(text=process_text(text), filepath=f"{self.path}/{filename}.mp3") - # try: - # self.length += MP3(f"{self.path}/{filename}.mp3").info.length - # except (MutagenError, HeaderNotFoundError): - # self.length += sox.file_info.duration(f"{self.path}/{filename}.mp3") - clip = AudioFileClip(f"{self.path}/{filename}.mp3") - self.length += clip.duration - clip.close() - - -def process_text(text: str): - lang = settings.config["reddit"]["thread"]["post_lang"] - new_text = sanitize_text(text) - if lang: - print_substep("Translating Text...") - translated_text = ts.google(text, to_language=lang) - new_text = sanitize_text(translated_text) - return new_text + clip_length = audio_length(f'assets/audio/{filename}.mp3') + + if self.__total_length + clip_length <= self.max_length: + self.max_length += clip_length + return True + return False + + @staticmethod + def process_text( + text: str, + ) -> str: + lang = settings.config['reddit']['thread']['post_lang'] + new_text = sanitize_text(text) + if lang: + print_substep('Translating Text...') + translated_text = ts.google(text, to_language=lang) + new_text = sanitize_text(translated_text) + return new_text diff --git a/main.py b/main.py index 411ed90..82d459b 100755 --- a/main.py +++ b/main.py @@ -1,6 +1,5 @@ #!/usr/bin/env python from asyncio import run -import math from subprocess import Popen from os import name from reddit.subreddit import get_subreddit_threads @@ -41,13 +40,10 @@ async def main( ): cleanup() reddit_object = get_subreddit_threads(POST_ID) - length, number_of_comments = save_text_to_mp3(reddit_object) - length = math.ceil(length) - await RedditScreenshot(reddit_object, number_of_comments).download() + comments_created = save_text_to_mp3(reddit_object) + await RedditScreenshot(reddit_object, comments_created).download() bg_config = get_background_config() - download_background(bg_config) - chop_background_video(bg_config, length) - make_final_video(number_of_comments, length, reddit_object, bg_config) + make_final_video(comments_created, reddit_object, bg_config) async def run_many(times): diff --git a/video_creation/final_video.py b/video_creation/final_video.py index f1e1f96..b7b40d2 100755 --- a/video_creation/final_video.py +++ b/video_creation/final_video.py @@ -3,7 +3,7 @@ import multiprocessing import os import re from os.path import exists -from typing import Dict, Tuple, Any +from typing import Tuple, Any import translators as ts @@ -13,7 +13,6 @@ from moviepy.editor import ( ImageClip, concatenate_videoclips, concatenate_audioclips, - CompositeAudioClip, CompositeVideoClip, ) from moviepy.video.io.ffmpeg_tools import ffmpeg_merge_video_audio, ffmpeg_extract_subclip @@ -23,24 +22,26 @@ from utils.cleanup import cleanup from utils.console import print_step, print_substep from utils.videos import save_data from utils import settings - +from video_creation.background import download_background, chop_background_video console = Console() -W, H = 1080, 1920 +W, H = 1080, 1920 # TODO move to config -def name_normalize(name: str) -> str: +def name_normalize( + name: str +) -> str: name = re.sub(r'[?\\"%*:|<>]', "", name) - name = re.sub(r"( [w,W]\s?\/\s?[o,O,0])", r" without", name) - name = re.sub(r"( [w,W]\s?\/)", r" with", name) - name = re.sub(r"(\d+)\s?\/\s?(\d+)", r"\1 of \2", name) - name = re.sub(r"(\w+)\s?\/\s?(\w+)", r"\1 or \2", name) - name = re.sub(r"\/", r"", name) + name = re.sub(r'( [w,W]\s?\/\s?[o,O,0])', r' without', name) + name = re.sub(r'( [w,W]\s?\/)', r' with', name) + name = re.sub(r'(\d+)\s?\/\s?(\d+)', r'\1 of \2', name) + name = re.sub(r'(\w+)\s?\/\s?(\w+)', r'\1 or \2', name) + name = re.sub(r'\/', '', name) - lang = settings.config["reddit"]["thread"]["post_lang"] + lang = settings.config['reddit']['thread']['post_lang'] if lang: - print_substep("Translating filename...") + print_substep('Translating filename...') translated_name = ts.google(name, to_language=lang) return translated_name @@ -49,48 +50,46 @@ def name_normalize(name: str) -> str: def make_final_video( - number_of_clips: int, length: int, reddit_obj: dict, background_config: Tuple[str, str, str, Any] -): - """Gathers audio clips, gathers all screenshots, stitches them together and saves the final video to assets/temp + indexes_of_clips: list, + reddit_obj: dict, + background_config: Tuple[str, str, str, Any], +) -> None: + """ + Gathers audio clips, gathers all screenshots, stitches them together and saves the final video to assets/temp + Args: - number_of_clips (int): Index to end at when going through the screenshots' - length (int): Length of the video + indexes_of_clips (list): Indexes with created comments' reddit_obj (dict): The reddit object that contains the posts to read. background_config (Tuple[str, str, str, Any]): The background config to use. """ - print_step("Creating the final video 🎥") + print_step('Creating the final video 🎥') VideoFileClip.reW = lambda clip: clip.resize(width=W) VideoFileClip.reH = lambda clip: clip.resize(width=H) - opacity = settings.config["settings"]["opacity"] - background_clip = ( - VideoFileClip("assets/temp/background.mp4") - .without_audio() - .resize(height=H) - .crop(x1=1166.6, y1=0, x2=2246.6, y2=1920) - ) + opacity = settings.config['settings']['opacity'] + + final_length = 0 # Gather all audio clips - audio_clips = [AudioFileClip(f"assets/temp/mp3/{i}.mp3") for i in range(number_of_clips)] - audio_clips.insert(0, AudioFileClip("assets/temp/mp3/title.mp3")) - audio_concat = concatenate_audioclips(audio_clips) - audio_composite = CompositeAudioClip([audio_concat]) + audio_clips = [AudioFileClip(f'assets/temp/mp3/{i}.mp3') for i in indexes_of_clips] + audio_clips.insert(0, AudioFileClip('assets/temp/mp3/title.mp3')) + audio_composite = concatenate_audioclips(audio_clips) - console.log(f"[bold green] Video Will Be: {length} Seconds Long") + console.log(f'[bold green] Video Will Be: {audio_composite.length} Seconds Long') # add title to video image_clips = [] # Gather all images new_opacity = 1 if opacity is None or float(opacity) >= 1 else float(opacity) image_clips.insert( 0, - ImageClip("assets/temp/png/title.png") + ImageClip('assets/temp/png/title.png') .set_duration(audio_clips[0].duration) .resize(width=W - 100) .set_opacity(new_opacity), ) - for i in range(0, number_of_clips): + for i in indexes_of_clips: image_clips.append( - ImageClip(f"assets/temp/png/comment_{i}.png") + ImageClip(f'assets/temp/png/comment_{i}.png') .set_duration(audio_clips[i + 1].duration) .resize(width=W - 100) .set_opacity(new_opacity) @@ -109,63 +108,73 @@ def make_final_video( img_clip_pos = background_config[3] image_concat = concatenate_videoclips(image_clips).set_position(img_clip_pos) image_concat.audio = audio_composite + + download_background(background_config) + chop_background_video(background_config, final_length) + background_clip = ( + VideoFileClip("assets/temp/background.mp4") + .without_audio() + .resize(height=H) + .crop(x1=1166.6, y1=0, x2=2246.6, y2=1920) + ) + final = CompositeVideoClip([background_clip, image_concat]) - title = re.sub(r"[^\w\s-]", "", reddit_obj["thread_title"]) - idx = re.sub(r"[^\w\s-]", "", reddit_obj["thread_id"]) + title = re.sub(r'[^\w\s-]', '', reddit_obj['thread_title']) + idx = re.sub(r'[^\w\s-]', '', reddit_obj['thread_id']) - filename = f"{name_normalize(title)}.mp4" - subreddit = settings.config["reddit"]["thread"]["subreddit"] + filename = f'{name_normalize(title)}.mp4' + subreddit = settings.config['reddit']['thread']['subreddit'] save_data(subreddit, filename, title, idx, background_config[2]) - if not exists(f"./results/{subreddit}"): - print_substep("The results folder didn't exist so I made it") - os.makedirs(f"./results/{subreddit}") + if not exists(f'./results/{subreddit}'): + print_substep('The results folder didn\'t exist so I made it') + os.makedirs(f'./results/{subreddit}') final.write_videofile( - "assets/temp/temp.mp4", + 'assets/temp/temp.mp4', fps=30, - audio_codec="aac", - audio_bitrate="192k", + audio_codec='aac', + audio_bitrate='192k', verbose=False, threads=multiprocessing.cpu_count(), ) - if settings.config["settings"]["background_audio"]: - print("[bold green] Merging background audio with video") - if not exists(f"assets/backgrounds/background.mp3"): + if settings.config['settings']['background_audio']: + print('[bold green] Merging background audio with video') + if not exists('assets/backgrounds/background.mp3'): print_substep( - "Cannot find assets/backgrounds/background.mp3 audio file didn't so skipping." + 'Cannot find assets/backgrounds/background.mp3 audio file didn\'t so skipping.' ) ffmpeg_extract_subclip( - "assets/temp/temp.mp4", + 'assets/temp/temp.mp4', 0, final.duration, - targetname=f"results/{subreddit}/{filename}", + targetname=f'results/{subreddit}/{filename}', ) else: ffmpeg_merge_video_audio( - "assets/temp/temp.mp4", - "assets/backgrounds/background.mp3", - "assets/temp/temp_audio.mp4", + 'assets/temp/temp.mp4', + 'assets/backgrounds/background.mp3', + 'assets/temp/temp_audio.mp4', ) ffmpeg_extract_subclip( # check if this gets run - "assets/temp/temp_audio.mp4", + 'assets/temp/temp_audio.mp4', 0, final.duration, targetname=f"results/{subreddit}/{filename}", ) else: - print("debug duck") + print('debug duck') ffmpeg_extract_subclip( - "assets/temp/temp.mp4", + 'assets/temp/temp.mp4', 0, final.duration, - targetname=f"results/{subreddit}/{filename}", + targetname=f'results/{subreddit}/{filename}', ) - print_step("Removing temporary files 🗑") + print_step('Removing temporary files 🗑') cleanups = cleanup() - print_substep(f"Removed {cleanups} temporary files 🗑") - print_substep("See result in the results folder!") + print_substep(f'Removed {cleanups} temporary files 🗑') + print_substep('See result in the results folder!') print_step( f'Reddit title: {reddit_obj["thread_title"]} \n Background Credit: {background_config[2]}' diff --git a/video_creation/screenshot_downloader.py b/video_creation/screenshot_downloader.py index 12eba72..f4d2bca 100644 --- a/video_creation/screenshot_downloader.py +++ b/video_creation/screenshot_downloader.py @@ -209,10 +209,10 @@ class RedditScreenshot(Browser, Wait): """ Args: reddit_object (Dict): Reddit object received from reddit/subreddit.py - screenshot_num (int): Number of screenshots to download + screenshot_idx (int): List with indexes of voiced comments """ reddit_object: dict - screenshot_num: int = attrib() + screenshot_idx: list = attrib() @screenshot_num.validator def validate_screenshot_num(self, attribute, value): @@ -348,9 +348,8 @@ class RedditScreenshot(Browser, Wait): ) async_tasks_primary = [ - self.__collect_comment(comment, idx) for idx, comment in - enumerate(self.reddit_object['comments']) - if idx < self.screenshot_num + self.__collect_comment(self.reddit_object['comments'][idx], idx) for idx in + self.screenshot_idx ] for task in track( diff --git a/video_creation/voices.py b/video_creation/voices.py index ffc0898..b4eaf1f 100644 --- a/video_creation/voices.py +++ b/video_creation/voices.py @@ -1,55 +1,50 @@ -#!/usr/bin/env python - -from typing import Dict, Tuple - -from rich.console import Console - from TTS.engine_wrapper import TTSEngine from TTS.GTTS import GTTS from TTS.streamlabs_polly import StreamlabsPolly from TTS.aws_polly import AWSPolly from TTS.TikTok import TikTok + from utils import settings from utils.console import print_table, print_step -console = Console() - TTSProviders = { - "GoogleTranslate": GTTS, - "AWSPolly": AWSPolly, - "StreamlabsPolly": StreamlabsPolly, - "TikTok": TikTok, + 'GoogleTranslate': GTTS, + 'AWSPolly': AWSPolly, + 'StreamlabsPolly': StreamlabsPolly, + 'TikTok': TikTok, } -def save_text_to_mp3(reddit_obj) -> Tuple[int, int]: +async def save_text_to_mp3( + reddit_obj: dict, +) -> list: """Saves text to MP3 files. Args: reddit_obj (): Reddit object received from reddit API in reddit/subreddit.py Returns: - tuple[int,int]: (total length of the audio, the number of comments audio was generated for) + The number of comments audio was generated for """ - voice = settings.config["settings"]["tts"]["choice"] - if voice.casefold() in map(lambda _: _.casefold(), TTSProviders): - text_to_mp3 = TTSEngine(get_case_insensitive_key_value(TTSProviders, voice), reddit_obj) - else: + voice = settings.config['settings']['tts']['choice'] + if voice.casefold() not in map(lambda _: _.casefold(), TTSProviders): while True: - print_step("Please choose one of the following TTS providers: ") + print_step('Please choose one of the following TTS providers: ') print_table(TTSProviders) - choice = input("\n") - if choice.casefold() in map(lambda _: _.casefold(), TTSProviders): + voice = input('\n') + if voice.casefold() in map(lambda _: _.casefold(), TTSProviders): break - print("Unknown Choice") - text_to_mp3 = TTSEngine(get_case_insensitive_key_value(TTSProviders, choice), reddit_obj) - - return text_to_mp3.run() + print('Unknown Choice') + engine_instance = TTSEngine(get_case_insensitive_key_value(TTSProviders, voice), reddit_obj) + return await engine_instance.run() -def get_case_insensitive_key_value(input_dict, key): +def get_case_insensitive_key_value( + input_dict, + key +) -> object: return next( (value for dict_key, value in input_dict.items() if dict_key.lower() == key.lower()), None, From ed279b1ea1ebcb1feedcb6b7ef5a96471d28a442 Mon Sep 17 00:00:00 2001 From: Drugsosos <44712637+Drugsosos@users.noreply.github.com> Date: Mon, 11 Jul 2022 02:08:05 +0300 Subject: [PATCH 05/39] fixes in final_video.py --- video_creation/final_video.py | 87 ++++++++++++++++++++----- video_creation/screenshot_downloader.py | 4 +- 2 files changed, 74 insertions(+), 17 deletions(-) diff --git a/video_creation/final_video.py b/video_creation/final_video.py index b7b40d2..7d07b9f 100755 --- a/video_creation/final_video.py +++ b/video_creation/final_video.py @@ -17,6 +17,7 @@ from moviepy.editor import ( ) from moviepy.video.io.ffmpeg_tools import ffmpeg_merge_video_audio, ffmpeg_extract_subclip from rich.console import Console +from rich.progress import track from utils.cleanup import cleanup from utils.console import print_step, print_substep @@ -67,18 +68,70 @@ def make_final_video( VideoFileClip.reH = lambda clip: clip.resize(width=H) opacity = settings.config['settings']['opacity'] - final_length = 0 + def create_audio_clip( + clip_title: str | int, + clip_start: float, + ) -> 'AudioFileClip': + return ( + AudioFileClip(f'assets/audio/{clip_title}.mp3') + .set_start(clip_start) + ) + + video_duration = 0 # Gather all audio clips - audio_clips = [AudioFileClip(f'assets/temp/mp3/{i}.mp3') for i in indexes_of_clips] - audio_clips.insert(0, AudioFileClip('assets/temp/mp3/title.mp3')) + audio_clips = list() + + audio_title = create_audio_clip( + 'title', + 0, + ) + video_duration += audio_title.duration + audio_clips.append(audio_title) + indexes_for_videos = list() + + for audio in track( + indexes_of_clips, + description='Gathering audio clips...', + ): + temp_audio_clip = create_audio_clip( + audio, + video_duration, + ) + if video_duration + temp_audio_clip.duration > max_length: + continue + video_duration += temp_audio_clip.duration + audio_clips.append(temp_audio_clip) + indexes_for_videos.append(audio) + + for idx in indexes_of_clips: + audio_clips.append(AudioFileClip(f'assets/temp/mp3/{idx}.mp3')) audio_composite = concatenate_audioclips(audio_clips) console.log(f'[bold green] Video Will Be: {audio_composite.length} Seconds Long') # add title to video - image_clips = [] + image_clips = list() # Gather all images - new_opacity = 1 if opacity is None or float(opacity) >= 1 else float(opacity) + new_opacity = 1 if opacity is None or float(opacity) >= 1 else float(opacity) # TODO move to pydentic and percents + + def create_image_clip( + self, + image_title: str | int, + audio_start: float, + audio_end: float, + audio_duration: float, + ) -> 'ImageClip': + return ( + ImageClip(f'assets/temp/png/{image_title}.png') + .set_start(audio_start - self.time_before_tts) + .set_end(audio_end + self.time_before_tts) + .set_duration(self.time_before_tts * 2 + audio_duration, change_end=False) + .set_opacity(new_opacity) + .resize(width=W - 100) + ) + + index_offset = 1 + image_clips.insert( 0, ImageClip('assets/temp/png/title.png') @@ -87,12 +140,14 @@ def make_final_video( .set_opacity(new_opacity), ) - for i in indexes_of_clips: + for photo_idx in indexes_of_clips: image_clips.append( - ImageClip(f'assets/temp/png/comment_{i}.png') - .set_duration(audio_clips[i + 1].duration) - .resize(width=W - 100) - .set_opacity(new_opacity) + create_image_clip( + f'comment_{photo_idx}', + audio_clips[photo_idx + index_offset].start, + audio_clips[photo_idx + index_offset].end, + audio_clips[photo_idx + index_offset].duration + ) ) # if os.path.exists("assets/mp3/posttext.mp3"): @@ -110,9 +165,11 @@ def make_final_video( image_concat.audio = audio_composite download_background(background_config) - chop_background_video(background_config, final_length) + chop_background_video(background_config, video_duration) background_clip = ( - VideoFileClip("assets/temp/background.mp4") + VideoFileClip('assets/temp/background.mp4') + .set_start(0) + .set_end(video_duration) .without_audio() .resize(height=H) .crop(x1=1166.6, y1=0, x2=2246.6, y2=1920) @@ -148,7 +205,7 @@ def make_final_video( ffmpeg_extract_subclip( 'assets/temp/temp.mp4', 0, - final.duration, + video_duration, targetname=f'results/{subreddit}/{filename}', ) else: @@ -160,7 +217,7 @@ def make_final_video( ffmpeg_extract_subclip( # check if this gets run 'assets/temp/temp_audio.mp4', 0, - final.duration, + video_duration, targetname=f"results/{subreddit}/{filename}", ) else: @@ -168,7 +225,7 @@ def make_final_video( ffmpeg_extract_subclip( 'assets/temp/temp.mp4', 0, - final.duration, + video_duration, targetname=f'results/{subreddit}/{filename}', ) print_step('Removing temporary files 🗑') diff --git a/video_creation/screenshot_downloader.py b/video_creation/screenshot_downloader.py index f4d2bca..95109a6 100644 --- a/video_creation/screenshot_downloader.py +++ b/video_creation/screenshot_downloader.py @@ -214,8 +214,8 @@ class RedditScreenshot(Browser, Wait): reddit_object: dict screenshot_idx: list = attrib() - @screenshot_num.validator - def validate_screenshot_num(self, attribute, value): + @screenshot_idx.validator + def validate_screenshot_idx(self, attribute, value): if value <= 0: raise ValueError('Check screenshot_num in config') From b4f6fe41ca4fd792fece6ce5e70a6bfb13c74c63 Mon Sep 17 00:00:00 2001 From: Drugsosos <44712637+Drugsosos@users.noreply.github.com> Date: Mon, 11 Jul 2022 04:37:21 +0300 Subject: [PATCH 06/39] fixes in async code & other fixes --- video_creation/final_video.py | 57 ++++++++++++------------- video_creation/screenshot_downloader.py | 31 ++++++++------ video_creation/voices.py | 15 +++++-- 3 files changed, 56 insertions(+), 47 deletions(-) diff --git a/video_creation/final_video.py b/video_creation/final_video.py index 7d07b9f..f566711 100755 --- a/video_creation/final_video.py +++ b/video_creation/final_video.py @@ -73,7 +73,7 @@ def make_final_video( clip_start: float, ) -> 'AudioFileClip': return ( - AudioFileClip(f'assets/audio/{clip_title}.mp3') + AudioFileClip(f'assets/temp/mp3/{clip_title}.mp3') .set_start(clip_start) ) @@ -90,32 +90,27 @@ def make_final_video( audio_clips.append(audio_title) indexes_for_videos = list() - for audio in track( - indexes_of_clips, + for idx, audio in track( + enumerate(indexes_of_clips), description='Gathering audio clips...', ): temp_audio_clip = create_audio_clip( audio, video_duration, ) - if video_duration + temp_audio_clip.duration > max_length: - continue - video_duration += temp_audio_clip.duration - audio_clips.append(temp_audio_clip) - indexes_for_videos.append(audio) - - for idx in indexes_of_clips: - audio_clips.append(AudioFileClip(f'assets/temp/mp3/{idx}.mp3')) + if video_duration + temp_audio_clip.duration <= max_length: + video_duration += temp_audio_clip.duration + audio_clips.append(temp_audio_clip) + indexes_for_videos.append(idx) + audio_composite = concatenate_audioclips(audio_clips) - console.log(f'[bold green] Video Will Be: {audio_composite.length} Seconds Long') - # add title to video - image_clips = list() + console.log(f'[bold green] Video Will Be: {video_duration} Seconds Long') + # Gather all images new_opacity = 1 if opacity is None or float(opacity) >= 1 else float(opacity) # TODO move to pydentic and percents def create_image_clip( - self, image_title: str | int, audio_start: float, audio_end: float, @@ -123,30 +118,32 @@ def make_final_video( ) -> 'ImageClip': return ( ImageClip(f'assets/temp/png/{image_title}.png') - .set_start(audio_start - self.time_before_tts) - .set_end(audio_end + self.time_before_tts) - .set_duration(self.time_before_tts * 2 + audio_duration, change_end=False) + .set_start(audio_start) + .set_end(audio_end) + .set_duration(audio_duration, change_end=False) .set_opacity(new_opacity) .resize(width=W - 100) ) - index_offset = 1 + # add title to video + image_clips = list() - image_clips.insert( - 0, - ImageClip('assets/temp/png/title.png') - .set_duration(audio_clips[0].duration) - .resize(width=W - 100) - .set_opacity(new_opacity), + image_clips.append( + create_image_clip( + 'title', + audio_clips[0].start, + audio_clips[0].end, + audio_clips[0].duration + ) ) - for photo_idx in indexes_of_clips: + for photo_idx in indexes_for_videos: image_clips.append( create_image_clip( f'comment_{photo_idx}', - audio_clips[photo_idx + index_offset].start, - audio_clips[photo_idx + index_offset].end, - audio_clips[photo_idx + index_offset].duration + audio_clips[photo_idx].start, + audio_clips[photo_idx].end, + audio_clips[photo_idx].duration ) ) @@ -218,7 +215,7 @@ def make_final_video( 'assets/temp/temp_audio.mp4', 0, video_duration, - targetname=f"results/{subreddit}/{filename}", + targetname=f'results/{subreddit}/{filename}', ) else: print('debug duck') diff --git a/video_creation/screenshot_downloader.py b/video_creation/screenshot_downloader.py index 95109a6..81c7850 100644 --- a/video_creation/screenshot_downloader.py +++ b/video_creation/screenshot_downloader.py @@ -18,7 +18,6 @@ from attr import attrs, attrib from attr.validators import instance_of, optional from typing import TypeVar, Optional, Callable, Union - _function = TypeVar('_function', bound=Callable[..., object]) _exceptions = TypeVar('_exceptions', bound=Optional[Union[type, tuple, list]]) @@ -212,12 +211,7 @@ class RedditScreenshot(Browser, Wait): screenshot_idx (int): List with indexes of voiced comments """ reddit_object: dict - screenshot_idx: list = attrib() - - @screenshot_idx.validator - def validate_screenshot_idx(self, attribute, value): - if value <= 0: - raise ValueError('Check screenshot_num in config') + screenshot_idx: list async def __dark_theme( self, @@ -352,12 +346,21 @@ class RedditScreenshot(Browser, Wait): self.screenshot_idx ] - for task in track( - as_completed(async_tasks_primary), - description='Downloading screenshots...', - total=async_tasks_primary.__len__(), - ): - await task + def chunks(lst, n): + """Yield successive n-sized chunks from lst.""" + for i in range(0, len(lst), n): + yield lst[i:i + n] - print_substep('Screenshots downloaded Successfully.', style='bold green') + for idx, tasks in enumerate( + [chunk for chunk in chunks(async_tasks_primary, 15)], + start=1, + ): + for task in track( + as_completed(tasks), + description=f'Downloading comments: Chunk {idx}', + total=tasks.__len__() + ): + await task + + print_substep('Comments downloaded Successfully.', style='bold green') await self.close_browser() diff --git a/video_creation/voices.py b/video_creation/voices.py index b4eaf1f..e792ec3 100644 --- a/video_creation/voices.py +++ b/video_creation/voices.py @@ -37,13 +37,22 @@ async def save_text_to_mp3( if voice.casefold() in map(lambda _: _.casefold(), TTSProviders): break print('Unknown Choice') - engine_instance = TTSEngine(get_case_insensitive_key_value(TTSProviders, voice), reddit_obj) - return await engine_instance.run() + TTS_instance = get_case_insensitive_key_value(TTSProviders, voice) + if TTS_instance == StreamlabsPolly or TTS_instance == TikTok: + from aiohttp import ClientSession + + async with ClientSession() as client: + engine_instance = TTSEngine(TTS_instance(client), reddit_obj) + results = await engine_instance.run() + else: + engine_instance = TTSEngine(TTS_instance, reddit_obj) + results = await engine_instance.run() + return results def get_case_insensitive_key_value( input_dict, - key + key, ) -> object: return next( (value for dict_key, value in input_dict.items() if dict_key.lower() == key.lower()), From 01b932bf88785abb6874bfb87a6d3f8a607e129a Mon Sep 17 00:00:00 2001 From: Drugsosos <44712637+Drugsosos@users.noreply.github.com> Date: Mon, 11 Jul 2022 21:45:19 +0300 Subject: [PATCH 07/39] fixes --- TTS/engine_wrapper.py | 12 ++++++++---- video_creation/final_video.py | 1 + video_creation/voices.py | 15 +++------------ 3 files changed, 12 insertions(+), 16 deletions(-) diff --git a/TTS/engine_wrapper.py b/TTS/engine_wrapper.py index 1bacd86..e41e55c 100644 --- a/TTS/engine_wrapper.py +++ b/TTS/engine_wrapper.py @@ -66,7 +66,7 @@ class TTSEngine: print_substep('Saved Text to MP3 files successfully.', style='bold green') return [ comments for comments, condition in - zip(self.reddit_object['comments'], sync_tasks_primary) + zip(range(self.reddit_object['comments'].__len__()), sync_tasks_primary) if condition ] @@ -75,15 +75,19 @@ class TTSEngine: filename: str, text: str ) -> bool: - self.tts_module.run( + if not text: + return False + + self.tts_module().run( text=self.process_text(text), filepath=f'{self.path}/{filename}.mp3' ) - clip_length = audio_length(f'assets/audio/{filename}.mp3') + clip_length = audio_length(f'assets/temp/mp3/{filename}.mp3') + print(clip_length, '/', self.__total_length) if self.__total_length + clip_length <= self.max_length: - self.max_length += clip_length + self.__total_length += clip_length return True return False diff --git a/video_creation/final_video.py b/video_creation/final_video.py index f566711..9989415 100755 --- a/video_creation/final_video.py +++ b/video_creation/final_video.py @@ -28,6 +28,7 @@ from video_creation.background import download_background, chop_background_video console = Console() W, H = 1080, 1920 # TODO move to config +max_length: int = 50 # TODO move to config def name_normalize( diff --git a/video_creation/voices.py b/video_creation/voices.py index e792ec3..0098da0 100644 --- a/video_creation/voices.py +++ b/video_creation/voices.py @@ -16,7 +16,7 @@ TTSProviders = { } -async def save_text_to_mp3( +def save_text_to_mp3( reddit_obj: dict, ) -> list: """Saves text to MP3 files. @@ -37,17 +37,8 @@ async def save_text_to_mp3( if voice.casefold() in map(lambda _: _.casefold(), TTSProviders): break print('Unknown Choice') - TTS_instance = get_case_insensitive_key_value(TTSProviders, voice) - if TTS_instance == StreamlabsPolly or TTS_instance == TikTok: - from aiohttp import ClientSession - - async with ClientSession() as client: - engine_instance = TTSEngine(TTS_instance(client), reddit_obj) - results = await engine_instance.run() - else: - engine_instance = TTSEngine(TTS_instance, reddit_obj) - results = await engine_instance.run() - return results + engine_instance = TTSEngine(get_case_insensitive_key_value(TTSProviders, voice), reddit_obj) + return engine_instance.run() def get_case_insensitive_key_value( From 3498ffdf098fd1ea85f3104e01b2711cd1f8763c Mon Sep 17 00:00:00 2001 From: Drugsosos <44712637+Drugsosos@users.noreply.github.com> Date: Mon, 11 Jul 2022 21:59:11 +0300 Subject: [PATCH 08/39] added crunch for sync tiktok tts --- TTS/TikTok.py | 1 + TTS/engine_wrapper.py | 4 ++-- video_creation/data/videos.json | 11 ++++++++++- video_creation/final_video.py | 1 + 4 files changed, 14 insertions(+), 3 deletions(-) diff --git a/TTS/TikTok.py b/TTS/TikTok.py index 743118c..9fa83b7 100644 --- a/TTS/TikTok.py +++ b/TTS/TikTok.py @@ -94,6 +94,7 @@ class TikTok: # TikTok Text-to-Speech Wrapper vstr = [r.json()["data"]["v_str"]][0] b64d = base64.b64decode(vstr) + with open(filepath, "wb") as out: out.write(b64d) diff --git a/TTS/engine_wrapper.py b/TTS/engine_wrapper.py index e41e55c..e8643e1 100644 --- a/TTS/engine_wrapper.py +++ b/TTS/engine_wrapper.py @@ -84,9 +84,9 @@ class TTSEngine: ) clip_length = audio_length(f'assets/temp/mp3/{filename}.mp3') - print(clip_length, '/', self.__total_length) + print(clip_length, '/', self.__total_length) # TODO remove debug - if self.__total_length + clip_length <= self.max_length: + if clip_length and self.__total_length + clip_length <= self.max_length: self.__total_length += clip_length return True return False diff --git a/video_creation/data/videos.json b/video_creation/data/videos.json index 0637a08..0a1ca06 100644 --- a/video_creation/data/videos.json +++ b/video_creation/data/videos.json @@ -1 +1,10 @@ -[] \ No newline at end of file +[ + { + "subreddit": "AskReddit", + "id": "vwgslz", + "time": "1657565829", + "background_credit": "bbswitzer", + "reddit_title": "Which singer should never have been famous", + "filename": "Which singer should never have been famous.mp4" + } +] \ No newline at end of file diff --git a/video_creation/final_video.py b/video_creation/final_video.py index 9989415..5a9a5b0 100755 --- a/video_creation/final_video.py +++ b/video_creation/final_video.py @@ -28,6 +28,7 @@ from video_creation.background import download_background, chop_background_video console = Console() W, H = 1080, 1920 # TODO move to config + max_length: int = 50 # TODO move to config From 77dfedef3189d7e2bf48e1de6beff6ab18bbb584 Mon Sep 17 00:00:00 2001 From: Drugsosos <44712637+Drugsosos@users.noreply.github.com> Date: Mon, 11 Jul 2022 22:55:35 +0300 Subject: [PATCH 09/39] fix in audio/video timings --- video_creation/data/videos.json | 11 +---------- video_creation/final_video.py | 2 +- 2 files changed, 2 insertions(+), 11 deletions(-) diff --git a/video_creation/data/videos.json b/video_creation/data/videos.json index 0a1ca06..0637a08 100644 --- a/video_creation/data/videos.json +++ b/video_creation/data/videos.json @@ -1,10 +1 @@ -[ - { - "subreddit": "AskReddit", - "id": "vwgslz", - "time": "1657565829", - "background_credit": "bbswitzer", - "reddit_title": "Which singer should never have been famous", - "filename": "Which singer should never have been famous.mp4" - } -] \ No newline at end of file +[] \ No newline at end of file diff --git a/video_creation/final_video.py b/video_creation/final_video.py index 5a9a5b0..c13026f 100755 --- a/video_creation/final_video.py +++ b/video_creation/final_video.py @@ -93,7 +93,7 @@ def make_final_video( indexes_for_videos = list() for idx, audio in track( - enumerate(indexes_of_clips), + enumerate(indexes_of_clips, start=1), description='Gathering audio clips...', ): temp_audio_clip = create_audio_clip( From 048369bcf6e5321a793cb3102a0105f094937752 Mon Sep 17 00:00:00 2001 From: Drugsosos <44712637+Drugsosos@users.noreply.github.com> Date: Mon, 11 Jul 2022 23:24:48 +0300 Subject: [PATCH 10/39] fixed creating lots of tts & screenshots --- TTS/engine_wrapper.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/TTS/engine_wrapper.py b/TTS/engine_wrapper.py index bfd5a73..1f27521 100644 --- a/TTS/engine_wrapper.py +++ b/TTS/engine_wrapper.py @@ -63,6 +63,8 @@ class TTSEngine: sync_tasks_primary = [ self.call_tts(str(idx), comment['comment_body']) for idx, comment in track(enumerate(self.reddit_object['comments']), description='Saving...') + # Crunch, there will be fix in async TTS api + if self.__total_length + self.__total_length * 0.05 < self.max_length ] print_substep('Saved Text to MP3 files successfully.', style='bold green') @@ -109,7 +111,6 @@ class TTSEngine: ) clip_length = audio_length(f'assets/temp/mp3/{filename}.mp3') - print(clip_length, '/', self.__total_length) # TODO remove debug if clip_length and self.__total_length + clip_length <= self.max_length: self.__total_length += clip_length From e218ec5e7b269478fee04a8eca14ab7b8ac499fa Mon Sep 17 00:00:00 2001 From: Drugsosos <44712637+Drugsosos@users.noreply.github.com> Date: Mon, 11 Jul 2022 23:58:32 +0300 Subject: [PATCH 11/39] changed screenshot size aka viewport & fixed indexes of photo clips --- video_creation/data/videos.json | 11 ++++++++++- video_creation/final_video.py | 8 ++++---- video_creation/screenshot_downloader.py | 11 ++++++----- 3 files changed, 20 insertions(+), 10 deletions(-) diff --git a/video_creation/data/videos.json b/video_creation/data/videos.json index 0637a08..5c92929 100644 --- a/video_creation/data/videos.json +++ b/video_creation/data/videos.json @@ -1 +1,10 @@ -[] \ No newline at end of file +[ + { + "subreddit": "AskReddit", + "id": "vwgslz", + "time": "1657573375", + "background_credit": "bbswitzer", + "reddit_title": "Which singer should never have been famous", + "filename": "Which singer should never have been famous.mp4" + } +] \ No newline at end of file diff --git a/video_creation/final_video.py b/video_creation/final_video.py index c13026f..46513b8 100755 --- a/video_creation/final_video.py +++ b/video_creation/final_video.py @@ -93,7 +93,7 @@ def make_final_video( indexes_for_videos = list() for idx, audio in track( - enumerate(indexes_of_clips, start=1), + enumerate(indexes_of_clips), description='Gathering audio clips...', ): temp_audio_clip = create_audio_clip( @@ -143,9 +143,9 @@ def make_final_video( image_clips.append( create_image_clip( f'comment_{photo_idx}', - audio_clips[photo_idx].start, - audio_clips[photo_idx].end, - audio_clips[photo_idx].duration + audio_clips[photo_idx + 1].start, + audio_clips[photo_idx + 1].end, + audio_clips[photo_idx + 1].duration ) ) diff --git a/video_creation/screenshot_downloader.py b/video_creation/screenshot_downloader.py index 81c7850..63be15f 100644 --- a/video_creation/screenshot_downloader.py +++ b/video_creation/screenshot_downloader.py @@ -85,7 +85,12 @@ class Browser: """ default_Viewport: dict = attrib( validator=instance_of(dict), - default=dict(), + default={ + 'defaultViewport': { + 'width': 500, + 'height': 900, + }, + }, kw_only=True, ) browser: Optional[BrowserCls] = attrib( @@ -94,10 +99,6 @@ class Browser: kw_only=True, ) - def __attrs_post_init__(self): - if self.default_Viewport.__len__() == 0: - self.default_Viewport['isLandscape'] = True - async def get_browser( self, ) -> None: From 1f6ea04b46c35c4d9452cda97c4fc784904149ce Mon Sep 17 00:00:00 2001 From: Drugsosos <44712637+Drugsosos@users.noreply.github.com> Date: Tue, 12 Jul 2022 01:35:43 +0300 Subject: [PATCH 12/39] fixed NSFW button & improved logger --- video_creation/data/videos.json | 11 +--- video_creation/final_video.py | 13 +++-- video_creation/screenshot_downloader.py | 77 +++++++++++++++---------- 3 files changed, 55 insertions(+), 46 deletions(-) diff --git a/video_creation/data/videos.json b/video_creation/data/videos.json index 5c92929..0637a08 100644 --- a/video_creation/data/videos.json +++ b/video_creation/data/videos.json @@ -1,10 +1 @@ -[ - { - "subreddit": "AskReddit", - "id": "vwgslz", - "time": "1657573375", - "background_credit": "bbswitzer", - "reddit_title": "Which singer should never have been famous", - "filename": "Which singer should never have been famous.mp4" - } -] \ No newline at end of file +[] \ No newline at end of file diff --git a/video_creation/final_video.py b/video_creation/final_video.py index 46513b8..6da3034 100755 --- a/video_creation/final_video.py +++ b/video_creation/final_video.py @@ -92,12 +92,12 @@ def make_final_video( audio_clips.append(audio_title) indexes_for_videos = list() - for idx, audio in track( - enumerate(indexes_of_clips), + for idx in track( + indexes_of_clips, description='Gathering audio clips...', ): temp_audio_clip = create_audio_clip( - audio, + idx, video_duration, ) if video_duration + temp_audio_clip.duration <= max_length: @@ -107,7 +107,7 @@ def make_final_video( audio_composite = concatenate_audioclips(audio_clips) - console.log(f'[bold green] Video Will Be: {video_duration} Seconds Long') + console.log('[bold green] Video Will Be: %.2f Seconds Long' % video_duration) # Gather all images new_opacity = 1 if opacity is None or float(opacity) >= 1 else float(opacity) # TODO move to pydentic and percents @@ -139,10 +139,11 @@ def make_final_video( ) ) - for photo_idx in indexes_for_videos: + for photo_idx in range(indexes_for_videos.__len__()): image_clips.append( create_image_clip( - f'comment_{photo_idx}', + f'comment_{indexes_for_videos[photo_idx]}', + # + title clip audio_clips[photo_idx + 1].start, audio_clips[photo_idx + 1].end, audio_clips[photo_idx + 1].duration diff --git a/video_creation/screenshot_downloader.py b/video_creation/screenshot_downloader.py index 63be15f..3cc4dd4 100644 --- a/video_creation/screenshot_downloader.py +++ b/video_creation/screenshot_downloader.py @@ -27,12 +27,12 @@ class ExceptionDecorator: """ Factory for decorating functions """ - __exception: Optional[_exceptions] = attrib(default=None) + exception: Optional[_exceptions] = attrib(default=None) __default_exception: _exceptions = attrib(default=BrowserTimeoutError) def __attrs_post_init__(self): - if not self.__exception: - self.__exception = self.__default_exception + if not self.exception: + self.exception = self.__default_exception def __call__( self, @@ -45,13 +45,14 @@ class ExceptionDecorator: except Exception as caughtException: import logging - if isinstance(self.__exception, type): - if not type(caughtException) == self.__exception: - logging.basicConfig(filename='.webdriver.log', filemode='w', encoding='utf-8', - level=logging.DEBUG) + logging.basicConfig(filename='.webdriver.log', filemode='a+', + encoding='utf-8', level=logging.ERROR) + + if isinstance(self.exception, type): + if not type(caughtException) == self.exception: logging.error(f'unexpected error - {caughtException}') else: - if not type(caughtException) in self.__exception: + if not type(caughtException) in self.exception: logging.error(f'unexpected error - {caughtException}') return wrapper @@ -251,6 +252,32 @@ class RedditScreenshot(Browser, Wait): {'timeout': 5000}, ) + async def __close_nsfw( + self, + page_instance: PageCls + ) -> None: + from asyncio import ensure_future + + print_substep('Post is NSFW. You are spicy...') + # To await indirectly reload + navigation = ensure_future(page_instance.waitForNavigation()) + + # Triggers indirectly reload + await self.click( + page_instance, + '//button[text()="Yes"]', + {'timeout': 5000}, + ) + + # Await reload + await navigation + + await (await self.find_xpath( + page_instance, + '//button[text()="Click to see nsfw"]', + {'timeout': 5000}, + )).click() + async def __collect_comment( self, comment_obj: dict, @@ -289,14 +316,14 @@ class RedditScreenshot(Browser, Wait): """ Downloads screenshots of reddit posts as seen on the web. Downloads to assets/temp/png """ - await self.get_browser() print_step('Downloading screenshots of reddit posts...') + print_substep('Launching Headless Browser...') + await self.get_browser() + # ! Make sure the reddit screenshots folder exists Path('assets/temp/png').mkdir(parents=True, exist_ok=True) - print_substep('Launching Headless Browser...') - # Get the thread screenshot reddit_main = await self.browser.newPage() await reddit_main.goto(self.reddit_object['thread_url']) @@ -306,19 +333,7 @@ class RedditScreenshot(Browser, Wait): if self.reddit_object['is_nsfw']: # This means the post is NSFW and requires to click the proceed button. - - print_substep('Post is NSFW. You are spicy...') - await self.click( - reddit_main, - '//button[contains(text(), \'Yes\')]', - {'timeout': 5000}, - ) - - await self.click( - reddit_main, - '//button[contains(text(), \'nsfw\')]', - {'timeout': 5000}, - ) + await self.__close_nsfw(reddit_main) # Translates submission title if settings.config['reddit']['thread']['post_lang']: @@ -336,17 +351,19 @@ class RedditScreenshot(Browser, Wait): else: print_substep("Skipping translation...") - await self.screenshot( - reddit_main, - f'//*[contains(@id, \'t3_{self.reddit_object["thread_id"]}\')]', - {'path': f'assets/temp/png/title.png'}, - ) - async_tasks_primary = [ self.__collect_comment(self.reddit_object['comments'][idx], idx) for idx in self.screenshot_idx ] + async_tasks_primary.append( + self.screenshot( + reddit_main, + f'//*[contains(@id, \'t3_{self.reddit_object["thread_id"]}\')]', + {'path': f'assets/temp/png/title.png'}, + ) + ) + def chunks(lst, n): """Yield successive n-sized chunks from lst.""" for i in range(0, len(lst), n): From fee2d936e2a5d29c9efd3cc6da5c2b6b1d6d888c Mon Sep 17 00:00:00 2001 From: Drugsosos <44712637+Drugsosos@users.noreply.github.com> Date: Wed, 13 Jul 2022 00:09:49 +0300 Subject: [PATCH 13/39] cherry-picked split text from async-tts-api --- TTS/GTTS.py | 16 ++-- TTS/TikTok.py | 158 +++++++++++++++++++++------------------- TTS/aws_polly.py | 81 ++++++++++---------- TTS/common.py | 58 +++++++++++++++ TTS/engine_wrapper.py | 29 +------- TTS/streamlabs_polly.py | 95 +++++++++++++----------- utils/voice.py | 4 +- 7 files changed, 245 insertions(+), 196 deletions(-) diff --git a/TTS/GTTS.py b/TTS/GTTS.py index 31e29df..c8d6ae8 100644 --- a/TTS/GTTS.py +++ b/TTS/GTTS.py @@ -1,23 +1,19 @@ #!/usr/bin/env python3 -import random from utils import settings from gtts import gTTS -max_chars = 0 - class GTTS: - def __init__(self): - self.max_chars = 0 - self.voices = [] + max_chars = 0 - def run(self, text, filepath): + @staticmethod + async def run( + text, + filepath + ) -> None: tts = gTTS( text=text, lang=settings.config["reddit"]["thread"]["post_lang"] or "en", slow=False, ) tts.save(filepath) - - def randomvoice(self): - return random.choice(self.voices) diff --git a/TTS/TikTok.py b/TTS/TikTok.py index 9fa83b7..6a23bb8 100644 --- a/TTS/TikTok.py +++ b/TTS/TikTok.py @@ -1,102 +1,108 @@ import base64 from utils import settings -import random import requests from requests.adapters import HTTPAdapter, Retry -# from profanity_filter import ProfanityFilter -# pf = ProfanityFilter() -# Code by @JasonLovesDoggo -# https://twitter.com/scanlime/status/1512598559769702406 +from attr import attrs, attrib +from attr.validators import instance_of -nonhuman = [ # DISNEY VOICES - "en_us_ghostface", # Ghost Face - "en_us_chewbacca", # Chewbacca - "en_us_c3po", # C3PO - "en_us_stitch", # Stitch - "en_us_stormtrooper", # Stormtrooper - "en_us_rocket", # Rocket +from TTS.common import BaseApiTTS, get_random_voice + +# TTS examples: https://twitter.com/scanlime/status/1512598559769702406 + +voices = dict() + +voices['nonhuman'] = [ # DISNEY VOICES + 'en_us_ghostface', # Ghost Face + 'en_us_chewbacca', # Chewbacca + 'en_us_c3po', # C3PO + 'en_us_stitch', # Stitch + 'en_us_stormtrooper', # Stormtrooper + 'en_us_rocket', # Rocket # ENGLISH VOICES ] -human = [ - "en_au_001", # English AU - Female - "en_au_002", # English AU - Male - "en_uk_001", # English UK - Male 1 - "en_uk_003", # English UK - Male 2 - "en_us_001", # English US - Female (Int. 1) - "en_us_002", # English US - Female (Int. 2) - "en_us_006", # English US - Male 1 - "en_us_007", # English US - Male 2 - "en_us_009", # English US - Male 3 - "en_us_010", +voices['human'] = [ + 'en_au_001', # English AU - Female + 'en_au_002', # English AU - Male + 'en_uk_001', # English UK - Male 1 + 'en_uk_003', # English UK - Male 2 + 'en_us_001', # English US - Female (Int. 1) + 'en_us_002', # English US - Female (Int. 2) + 'en_us_006', # English US - Male 1 + 'en_us_007', # English US - Male 2 + 'en_us_009', # English US - Male 3 + 'en_us_010', ] -voices = nonhuman + human -noneng = [ - "fr_001", # French - Male 1 - "fr_002", # French - Male 2 - "de_001", # German - Female - "de_002", # German - Male - "es_002", # Spanish - Male +voices['non_eng'] = [ + 'fr_001', # French - Male 1 + 'fr_002', # French - Male 2 + 'de_001', # German - Female + 'de_002', # German - Male + 'es_002', # Spanish - Male # AMERICA VOICES - "es_mx_002", # Spanish MX - Male - "br_001", # Portuguese BR - Female 1 - "br_003", # Portuguese BR - Female 2 - "br_004", # Portuguese BR - Female 3 - "br_005", # Portuguese BR - Male + 'es_mx_002', # Spanish MX - Male + 'br_001', # Portuguese BR - Female 1 + 'br_003', # Portuguese BR - Female 2 + 'br_004', # Portuguese BR - Female 3 + 'br_005', # Portuguese BR - Male # ASIA VOICES - "id_001", # Indonesian - Female - "jp_001", # Japanese - Female 1 - "jp_003", # Japanese - Female 2 - "jp_005", # Japanese - Female 3 - "jp_006", # Japanese - Male - "kr_002", # Korean - Male 1 - "kr_003", # Korean - Female - "kr_004", # Korean - Male 2 + 'id_001', # Indonesian - Female + 'jp_001', # Japanese - Female 1 + 'jp_003', # Japanese - Female 2 + 'jp_005', # Japanese - Female 3 + 'jp_006', # Japanese - Male + 'kr_002', # Korean - Male 1 + 'kr_003', # Korean - Female + 'kr_004', # Korean - Male 2 ] -# good_voices = {'good': ['en_us_002', 'en_us_006'], -# 'ok': ['en_au_002', 'en_uk_001']} # less en_us_stormtrooper more less en_us_rocket en_us_ghostface +# good_voices: 'en_us_002', 'en_us_006' +# ok: 'en_au_002', 'en_uk_001' +# less: en_us_stormtrooper +# more or less: en_us_rocket, en_us_ghostface -class TikTok: # TikTok Text-to-Speech Wrapper - def __init__(self): - self.URI_BASE = ( - "https://api16-normal-useast5.us.tiktokv.com/media/api/text/speech/invoke/?text_speaker=" - ) - self.max_chars = 300 - self.voices = {"human": human, "nonhuman": nonhuman, "noneng": noneng} +@attrs(auto_attribs=True) +class TikTok(BaseApiTTS): # TikTok Text-to-Speech Wrapper + random_voice: bool = False + uri_base: str = attrib( + default='https://api16-normal-useast5.us.tiktokv.com/media/api/text/speech/invoke/', + kw_only=True, + ) + max_chars = 300 + decode_base64 = True - def run(self, text, filepath, random_voice: bool = False): - # if censor: - # req_text = pf.censor(req_text) - # pass - voice = ( - self.randomvoice() - if random_voice - else ( - settings.config["settings"]["tts"]["tiktok_voice"] - or random.choice(self.voices["human"]) - ) + def __attrs_post_init__(self): + self.voice = ( + get_random_voice(voices, 'human') + if self.random_voice + else str(settings.config['settings']['tts']['tiktok_voice']).lower() + if str(settings.config['settings']['tts']['tiktok_voice']).lower() in [ + voice.lower() for dict_title in voices for voice in voices[dict_title]] + else get_random_voice(voices, 'human') ) + + def make_request( + self, + text: str, + ): try: - r = requests.post(f"{self.URI_BASE}{voice}&req_text={text}&speaker_map_type=0") + r = requests.post( + self.uri_base, + params={ + 'text_speaker': self.voice, + 'req_text': text, + 'speaker_map_type': 0, + }) except requests.exceptions.SSLError: # https://stackoverflow.com/a/47475019/18516611 session = requests.Session() retry = Retry(connect=3, backoff_factor=0.5) adapter = HTTPAdapter(max_retries=retry) - session.mount("http://", adapter) - session.mount("https://", adapter) - r = session.post(f"{self.URI_BASE}{voice}&req_text={text}&speaker_map_type=0") + session.mount('http://', adapter) + session.mount('https://', adapter) + r = session.post(f'{self.uri_base}{self.voice}&req_text={text}&speaker_map_type=0') # print(r.text) - vstr = [r.json()["data"]["v_str"]][0] - b64d = base64.b64decode(vstr) - - - with open(filepath, "wb") as out: - out.write(b64d) - - def randomvoice(self): - return random.choice(self.voices["human"]) + return r.json()['data']['v_str'] diff --git a/TTS/aws_polly.py b/TTS/aws_polly.py index efd762b..9d52f6f 100644 --- a/TTS/aws_polly.py +++ b/TTS/aws_polly.py @@ -1,50 +1,58 @@ #!/usr/bin/env python3 from boto3 import Session from botocore.exceptions import BotoCoreError, ClientError, ProfileNotFound + import sys from utils import settings -import random +from attr import attrs + +from TTS.common import get_random_voice + voices = [ - "Brian", - "Emma", - "Russell", - "Joey", - "Matthew", - "Joanna", - "Kimberly", - "Amy", - "Geraint", - "Nicole", - "Justin", - "Ivy", - "Kendra", - "Salli", - "Raveena", + 'Brian', + 'Emma', + 'Russell', + 'Joey', + 'Matthew', + 'Joanna', + 'Kimberly', + 'Amy', + 'Geraint', + 'Nicole', + 'Justin', + 'Ivy', + 'Kendra', + 'Salli', + 'Raveena', ] +@attrs(auto_attribs=True) class AWSPolly: - def __init__(self): - self.max_chars = 0 - self.voices = voices + random_voice: bool = False + max_chars: int = 0 - def run(self, text, filepath, random_voice: bool = False): + def run( + self, + text, + filepath, + ): try: - session = Session(profile_name="polly") - polly = session.client("polly") - if random_voice: - voice = self.randomvoice() - else: - if not settings.config["settings"]["tts"]["aws_polly_voice"]: - raise ValueError( - f"Please set the TOML variable AWS_VOICE to a valid voice. options are: {voices}" - ) - voice = str(settings.config["settings"]["tts"]["aws_polly_voice"]).capitalize() + session = Session(profile_name='polly') + polly = session.client('polly') + voice = ( + get_random_voice(voices) + if self.random_voice + else str(settings.config['settings']['tts']['aws_polly_voice']).capitalize() + if str(settings.config['settings']['tts']['aws_polly_voice']).lower() in [voice.lower() for voice in + voices] + else get_random_voice(voices) + ) try: # Request speech synthesis response = polly.synthesize_speech( - Text=text, OutputFormat="mp3", VoiceId=voice, Engine="neural" + Text=text, OutputFormat='mp3', VoiceId=voice, Engine='neural' ) except (BotoCoreError, ClientError) as error: # The service returned an error, exit gracefully @@ -52,15 +60,15 @@ class AWSPolly: sys.exit(-1) # Access the audio stream from the response - if "AudioStream" in response: - file = open(filepath, "wb") - file.write(response["AudioStream"].read()) + if 'AudioStream' in response: + file = open(filepath, 'wb') + file.write(response['AudioStream'].read()) file.close() # print_substep(f"Saved Text {idx} to MP3 files successfully.", style="bold green") else: # The response didn't contain audio data, exit gracefully - print("Could not stream audio") + print('Could not stream audio') sys.exit(-1) except ProfileNotFound: print("You need to install the AWS CLI and configure your profile") @@ -71,6 +79,3 @@ class AWSPolly: """ ) sys.exit(-1) - - def randomvoice(self): - return random.choice(self.voices) diff --git a/TTS/common.py b/TTS/common.py index a56444e..73884f4 100644 --- a/TTS/common.py +++ b/TTS/common.py @@ -1,3 +1,61 @@ +import base64 +from random import choice +from typing import Union, Optional + + +class BaseApiTTS: + max_chars: int + decode_base64: bool = False + + @staticmethod + def text_len_sanitize( + text: str, + max_length: int, + ) -> list: + # Split by comma or dot (else you can lose intonations), if there is non, split by groups of 299 chars + if '.' in text and all([split_text.__len__() < max_length for split_text in text.split('.')]): + return text.split('.') + + if ',' in text and all([split_text.__len__() < max_length for split_text in text.split(',')]): + return text.split(',') + + return [text[i:i + max_length] for i in range(0, len(text), max_length)] + + def write_file( + self, + output_text: str, + filepath: str, + ) -> None: + decoded_text = base64.b64decode(output_text) if self.decode_base64 else output_text + + with open(filepath, 'wb') as out: + out.write(decoded_text) + + def run( + self, + text: str, + filepath: str, + ) -> None: + output_text = '' + if len(text) > self.max_chars: + for part in self.text_len_sanitize(text, self.max_chars): + if part: + output_text += self.make_request(part) + else: + output_text = self.make_request(text) + self.write_file(output_text, filepath) + + +def get_random_voice( + voices: Union[list, dict], + key: Optional[str] = None, +) -> str: + if isinstance(voices, list): + return choice(voices) + else: + return choice(voices[key]) + + def audio_length( path: str, ) -> float | int: diff --git a/TTS/engine_wrapper.py b/TTS/engine_wrapper.py index 762aa47..b968015 100644 --- a/TTS/engine_wrapper.py +++ b/TTS/engine_wrapper.py @@ -74,33 +74,6 @@ class TTSEngine: if condition ] - def split_post(self, text: str, idx: int): - split_files = [] - split_text = [ - x.group().strip() - for x in re.finditer( - r" *(((.|\n){0," + str(self.tts_module().max_chars) + "})(\.|.$))", text - ) - ] - offset = 0 - for idy, text_cut in enumerate(split_text): - # print(f"{idx}-{idy}: {text_cut}\n") - if not text_cut or text_cut.isspace(): - offset += 1 - continue - - self.call_tts(f"{idx}-{idy - offset}.part", text_cut) - split_files.append(AudioFileClip(f"{self.path}/{idx}-{idy - offset}.part.mp3")) - - CompositeAudioClip([concatenate_audioclips(split_files)]).write_audiofile( - f"{self.path}/{idx}.mp3", fps=44100, verbose=False, logger=None - ) - - for i in split_files: - name = i.filename - i.close() - Path(name).unlink() - def call_tts( self, filename: str, @@ -114,7 +87,7 @@ class TTSEngine: filepath=f'{self.path}/{filename}.mp3' ) - clip_length = audio_length(f'assets/temp/mp3/{filename}.mp3') + clip_length = audio_length(f'{self.path}/{filename}.mp3') if clip_length and self.__total_length + clip_length <= self.max_length: self.__total_length += clip_length diff --git a/TTS/streamlabs_polly.py b/TTS/streamlabs_polly.py index 75c4f49..d2b765a 100644 --- a/TTS/streamlabs_polly.py +++ b/TTS/streamlabs_polly.py @@ -1,62 +1,71 @@ -import random import requests from requests.exceptions import JSONDecodeError from utils import settings +from attr import attrs, attrib + +from TTS.common import BaseApiTTS, get_random_voice from utils.voice import check_ratelimit voices = [ - "Brian", - "Emma", - "Russell", - "Joey", - "Matthew", - "Joanna", - "Kimberly", - "Amy", - "Geraint", - "Nicole", - "Justin", - "Ivy", - "Kendra", - "Salli", - "Raveena", + 'Brian', + 'Emma', + 'Russell', + 'Joey', + 'Matthew', + 'Joanna', + 'Kimberly', + 'Amy', + 'Geraint', + 'Nicole', + 'Justin', + 'Ivy', + 'Kendra', + 'Salli', + 'Raveena', ] # valid voices https://lazypy.ro/tts/ -class StreamlabsPolly: - def __init__(self): - self.url = "https://streamlabs.com/polly/speak" - self.max_chars = 550 - self.voices = voices +@attrs(auto_attribs=True) +class StreamlabsPolly(BaseApiTTS): + random_voice: bool = False + url: str = attrib( + default='https://streamlabs.com/polly/speak', + kw_only=True, + ) - def run(self, text, filepath, random_voice: bool = False): - if random_voice: - voice = self.randomvoice() - else: - if not settings.config["settings"]["tts"]["streamlabs_polly_voice"]: - raise ValueError( - f"Please set the config variable STREAMLABS_POLLY_VOICE to a valid voice. options are: {voices}" - ) - voice = str(settings.config["settings"]["tts"]["streamlabs_polly_voice"]).capitalize() - body = {"voice": voice, "text": text, "service": "polly"} - response = requests.post(self.url, data=body) - if not check_ratelimit(response): - self.run(text, filepath, random_voice) + max_chars = 550 + def make_request( + self, + text, + ): + voice = ( + get_random_voice(voices) + if self.random_voice + else str(settings.config['settings']['tts']['streamlabs_polly_voice']).capitalize() + if str(settings.config['settings']['tts']['streamlabs_polly_voice']).lower() in [ + voice.lower() for voice in voices] + else get_random_voice(voices) + ) + response = requests.post( + self.url, + data={ + 'voice': voice, + 'text': text, + 'service': 'polly', + }) + if not check_ratelimit(response): + return self.make_request(text) else: try: - voice_data = requests.get(response.json()["speak_url"]) - with open(filepath, "wb") as f: - f.write(voice_data.content) + results = requests.get(response.json()['speak_url']) + return results except (KeyError, JSONDecodeError): try: - if response.json()["error"] == "No text specified!": - raise ValueError("Please specify a text to convert to speech.") + if response.json()['error'] == 'No text specified!': + raise ValueError('Please specify a text to convert to speech.') except (KeyError, JSONDecodeError): - print("Error occurred calling Streamlabs Polly") - - def randomvoice(self): - return random.choice(self.voices) + print('Error occurred calling Streamlabs Polly') diff --git a/utils/voice.py b/utils/voice.py index a0709fa..7d20b1b 100644 --- a/utils/voice.py +++ b/utils/voice.py @@ -10,7 +10,9 @@ if sys.version_info[0] >= 3: from datetime import timezone -def check_ratelimit(response: Response): +def check_ratelimit( + response: Response +): """ Checks if the response is a ratelimit response. If it is, it sleeps for the time specified in the response. From adced74a0cb40b548038a8b97cf368a84beed562 Mon Sep 17 00:00:00 2001 From: Drugsosos <44712637+Drugsosos@users.noreply.github.com> Date: Wed, 13 Jul 2022 00:20:08 +0300 Subject: [PATCH 14/39] fixes with min_comments --- .config.template.toml | 2 +- utils/subreddit.py | 2 +- video_creation/background.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.config.template.toml b/.config.template.toml index 18f1b00..08d654e 100644 --- a/.config.template.toml +++ b/.config.template.toml @@ -16,7 +16,7 @@ subreddit = { optional = false, regex = "[_0-9a-zA-Z]+$", nmin = 3, explanation post_id = { optional = true, default = "", regex = "^((?!://|://)[+a-zA-Z])*$", explanation = "Used if you want to use a specific post.", example = "urdtfx" } max_comment_length = { default = 500, optional = false, nmin = 10, nmax = 10000, type = "int", explanation = "max number of characters a comment can have. default is 500", example = 500, oob_error = "the max comment length should be between 10 and 10000" } post_lang = { default = "", optional = true, explanation = "The language you would like to translate to.", example = "es-cr" } -min_comments = { default = 20, optional = false, nmin = 15, type = "int", explanation = "The minimum number of comments a post should have to be included. default is 20", example = 29, oob_error = "the minimum number of comments should be between 15 and 999999" } +min_comments = { default = 20, optional = false, nmin = 0, type = "int", explanation = "The minimum number of comments a post should have to be included. default is 20", example = 29, oob_error = "the minimum number of comments should be between 15 and 999999" } [settings] allow_nsfw = { optional = false, type = "bool", default = false, example = false, options = [true, false, diff --git a/utils/subreddit.py b/utils/subreddit.py index 4eb0108..c1efd0c 100644 --- a/utils/subreddit.py +++ b/utils/subreddit.py @@ -34,7 +34,7 @@ def get_subreddit_undone(submissions: list, subreddit): if submission.stickied: print_substep("This post was pinned by moderators. Skipping...") continue - if submission.num_comments <= int(settings.config["reddit"]["thread"]["min_comments"]): + if submission.num_comments < int(settings.config["reddit"]["thread"]["min_comments"]): print_substep( f'This post has under the specified minimum of comments ({settings.config["reddit"]["thread"]["min_comments"]}). Skipping...' ) diff --git a/video_creation/background.py b/video_creation/background.py index be0f46c..09daece 100644 --- a/video_creation/background.py +++ b/video_creation/background.py @@ -71,7 +71,7 @@ def get_background_config(): # Handle default / not supported background using default option. # Default : pick random from supported background. - if not choice or choice not in background_options: + if choice not in background_options: choice = random.choice(list(background_options.keys())) return background_options[choice] From 1f5995e0b8654e7807afbab7dc4d669bba5a41a2 Mon Sep 17 00:00:00 2001 From: Drugsosos <44712637+Drugsosos@users.noreply.github.com> Date: Wed, 13 Jul 2022 00:25:29 +0300 Subject: [PATCH 15/39] improvement in chunks --- video_creation/screenshot_downloader.py | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/video_creation/screenshot_downloader.py b/video_creation/screenshot_downloader.py index 3cc4dd4..6fba183 100644 --- a/video_creation/screenshot_downloader.py +++ b/video_creation/screenshot_downloader.py @@ -294,10 +294,10 @@ class RedditScreenshot(Browser, Wait): await comment_page.goto(f'https://reddit.com{comment_obj["comment_url"]}') # Translates submission' comment - if settings.config["reddit"]["thread"]["post_lang"]: + if settings.config['reddit']['thread']['post_lang']: comment_tl = ts.google( - comment_obj["comment_body"], - to_language=settings.config["reddit"]["thread"]["post_lang"], + comment_obj['comment_body'], + to_language=settings.config['reddit']['thread']['post_lang'], ) await comment_page.evaluate( f'([tl_content, tl_id]) => document.querySelector(`#t1_{comment_obj["comment_id"]} > div:nth-child(2) ' @@ -349,7 +349,7 @@ class RedditScreenshot(Browser, Wait): texts_in_tl, ) else: - print_substep("Skipping translation...") + print_substep('Skipping translation...') async_tasks_primary = [ self.__collect_comment(self.reddit_object['comments'][idx], idx) for idx in @@ -364,19 +364,20 @@ class RedditScreenshot(Browser, Wait): ) ) + # Lots of tabs - lots of memory + # chunk needed to minimize memory required def chunks(lst, n): - """Yield successive n-sized chunks from lst.""" + """Yield successive n-sized chunks from list.""" for i in range(0, len(lst), n): yield lst[i:i + n] - for idx, tasks in enumerate( - [chunk for chunk in chunks(async_tasks_primary, 15)], + for idx, chunked_tasks in enumerate( + [chunk for chunk in chunks(async_tasks_primary, 10)], start=1, ): for task in track( - as_completed(tasks), - description=f'Downloading comments: Chunk {idx}', - total=tasks.__len__() + as_completed(chunked_tasks), + description=f'Downloading comments: Chunk {idx}/{chunked_tasks.__len__()}', ): await task From a9775f348af808f5f2c27cd41003f78692b23557 Mon Sep 17 00:00:00 2001 From: Drugsosos <44712637+Drugsosos@users.noreply.github.com> Date: Wed, 13 Jul 2022 00:36:02 +0300 Subject: [PATCH 16/39] cherry-picked commit with TTS/pictures offsets from async-tts-api --- video_creation/final_video.py | 72 ++++++++++++++++++++++++----------- 1 file changed, 49 insertions(+), 23 deletions(-) diff --git a/video_creation/final_video.py b/video_creation/final_video.py index 2dba251..1145e9f 100755 --- a/video_creation/final_video.py +++ b/video_creation/final_video.py @@ -28,6 +28,10 @@ console = Console() W, H = 1080, 1920 # TODO move to config max_length: int = 50 # TODO move to config +time_before_first_picture: float = 1 # TODO move to config +time_before_tts: float = 0.5 # TODO move to config +time_between_pictures: float = 1 # TODO move to config +delay_before_end: int = 1 # TODO move to config def name_normalize( @@ -61,7 +65,7 @@ def make_final_video( Gathers audio clips, gathers all screenshots, stitches them together and saves the final video to assets/temp Args: - indexes_of_clips (list): Indexes with created comments' + indexes_of_clips (list): Indexes of voiced comments reddit_obj (dict): The reddit object that contains the posts to read. background_config (Tuple[str, str, str, Any]): The background config to use. """ @@ -83,28 +87,31 @@ def make_final_video( # Gather all audio clips audio_clips = list() + correct_audio_offset = time_before_tts * 2 + time_between_pictures audio_title = create_audio_clip( 'title', - 0, + time_before_first_picture + time_before_tts, ) - video_duration += audio_title.duration + video_duration += audio_title.duration + time_before_first_picture + time_before_tts audio_clips.append(audio_title) indexes_for_videos = list() - for idx in track( - indexes_of_clips, + for idx, audio in track( + enumerate(indexes_of_clips, start=1), description='Gathering audio clips...', ): temp_audio_clip = create_audio_clip( - idx, - video_duration, + audio, + correct_audio_offset + video_duration, ) - if video_duration + temp_audio_clip.duration <= max_length: - video_duration += temp_audio_clip.duration + if video_duration + temp_audio_clip.duration + correct_audio_offset + delay_before_end <= max_length: + video_duration += temp_audio_clip.duration + correct_audio_offset audio_clips.append(temp_audio_clip) indexes_for_videos.append(idx) + video_duration += delay_before_end + audio_composite = concatenate_audioclips(audio_clips) console.log('[bold green] Video Will Be: %.2f Seconds Long' % video_duration) @@ -120,9 +127,9 @@ def make_final_video( ) -> 'ImageClip': return ( ImageClip(f'assets/temp/png/{image_title}.png') - .set_start(audio_start) - .set_end(audio_end) - .set_duration(audio_duration, change_end=False) + .set_start(audio_start - time_before_tts) + .set_end(audio_end + time_before_tts) + .set_duration(time_before_tts * 2 + audio_duration, change_end=False) .set_opacity(new_opacity) .resize(width=W - 100) ) @@ -139,14 +146,13 @@ def make_final_video( ) ) - for photo_idx in range(indexes_for_videos.__len__()): + for photo_idx in indexes_for_videos: image_clips.append( create_image_clip( - f'comment_{indexes_for_videos[photo_idx]}', - # + title clip - audio_clips[photo_idx + 1].start, - audio_clips[photo_idx + 1].end, - audio_clips[photo_idx + 1].duration + f'comment_{indexes_of_clips[photo_idx]}', + audio_clips[photo_idx].start, + audio_clips[photo_idx].end, + audio_clips[photo_idx].duration ) ) @@ -169,20 +175,40 @@ def make_final_video( background_clip = ( VideoFileClip('assets/temp/background.mp4') .set_start(0) - .set_end(video_duration) + .set_end(video_duration + delay_before_end) .without_audio() .resize(height=H) - .crop(x1=1166.6, y1=0, x2=2246.6, y2=1920) ) + back_video_width, back_video_height = background_clip.size + + # Fix for crop with vertical videos + if back_video_width < H: + background_clip = ( + background_clip + .resize(width=W) + ) + back_video_width, back_video_height = background_clip.size + background_clip = background_clip.crop( + x1=0, + x2=back_video_width, + y1=back_video_height / 2 - H / 2, + y2=back_video_height / 2 + H / 2 + ) + else: + background_clip = background_clip.crop( + x1=back_video_width / 2 - W / 2, + x2=back_video_width / 2 + W / 2, + y1=0, + y2=back_video_height + ) + final = CompositeVideoClip([background_clip, image_concat]) title = re.sub(r'[^\w\s-]', '', reddit_obj['thread_title']) idx = re.sub(r'[^\w\s-]', '', reddit_obj['thread_id']) filename = f'{name_normalize(title)}.mp4' - subreddit = settings.config['reddit']['thread']['subreddit'] - - save_data(subreddit, filename, title, idx, background_config[2]) + subreddit = str(settings.config['reddit']['thread']['subreddit']) if not exists(f'./results/{subreddit}'): print_substep('The results folder didn\'t exist so I made it') From 0b99c6911079ec4a9f940f166eeac0e203a33c03 Mon Sep 17 00:00:00 2001 From: Drugsosos <44712637+Drugsosos@users.noreply.github.com> Date: Wed, 13 Jul 2022 02:09:00 +0300 Subject: [PATCH 17/39] moved vars in config, fixes in clip indexes & cal_tts in audio length --- .config.template.toml | 9 +++- TTS/engine_wrapper.py | 33 +++++++----- main.py | 2 - utils/settings.py | 2 +- video_creation/final_video.py | 67 ++++++++++++++----------- video_creation/screenshot_downloader.py | 3 +- 6 files changed, 70 insertions(+), 46 deletions(-) diff --git a/.config.template.toml b/.config.template.toml index 08d654e..6fcb0b0 100644 --- a/.config.template.toml +++ b/.config.template.toml @@ -25,10 +25,17 @@ theme = { optional = false, default = "dark", example = "light", options = ["dar "light", ], explanation = "sets the Reddit theme, either LIGHT or DARK" } times_to_run = { optional = false, default = 1, example = 2, explanation = "used if you want to run multiple times. set to an int e.g. 4 or 29 or 1", type = "int", nmin = 1, oob_error = "It's very hard to run something less than once." } -opacity = { optional = false, default = 0.9, example = 0.8, explanation = "Sets the opacity of the comments when overlayed over the background", type = "float", nmin = 0, nmax = 1, oob_error = "The opacity HAS to be between 0 and 1", input_error = "The opacity HAS to be a decimal number between 0 and 1" } +opacity = { optional = false, default = 90, example = 80, explanation = "Sets the opacity (in percents) of the comments when overlayed over the background", type = "int", nmin = 10, nmax = 100, oob_error = "The opacity HAS to be between 10 and 100 percents", input_error = "The opacity HAS to be a number between 10 and 100" } storymode = { optional = true, type = "bool", default = false, example = false, options = [true, false, ], explanation = "not yet implemented" } +video_length = { optional = false, default = 50, example = 60, explanation = "Approximated final video length", type = "int", nmin = 15, oob_error = "15 seconds is short enought" } +time_before_first_picture = { optional = false, default = 0.5, example = 1.0, explanation = "Deley before first screenshot apears", type = "float", nmin = 0, oob_error = "Choose at least 0 second" } +time_before_tts = { optional = false, default = 0.5, example = 1.0, explanation = "Deley between screenshot and TTS", type = "float", nmin = 0, oob_error = "Choose at least 0 second" } +time_between_pictures = { optional = false, default = 0.5, example = 1.0, explanation = "Time between every screenshot", type = "float", nmin = 0, oob_error = "Choose at least 0 second" } +delay_before_end = { optional = false, default = 0.5, example = 1.0, explanation = "Deley before video ends", type = "float", nmin = 0, oob_error = "Choose at least 0 second" } +video_width = { optional = true, default = 1080, example = 1080, explanation = "Final video width", type = "int", nmin = 600, oob_error = "Choose at least 600 pixels wide" } +video_height = { optional = true, default = 1920, example = 1920, explanation = "Final video height", type = "int", nmin = 800, oob_error = "Choose at least 800 pixels long" } [settings.background] background_choice = { optional = true, default = "minecraft", example = "minecraft", options = ["minecraft", "gta", "rocket-league", "motor-gta", ""], explanation = "Sets the background for the video" } diff --git a/TTS/engine_wrapper.py b/TTS/engine_wrapper.py index b968015..64e439a 100644 --- a/TTS/engine_wrapper.py +++ b/TTS/engine_wrapper.py @@ -1,22 +1,21 @@ #!/usr/bin/env python3 from pathlib import Path -from typing import Tuple -import re +from typing import Union -# import sox -# from mutagen import MutagenError -# from mutagen.mp3 import MP3, HeaderNotFoundError import translators as ts from rich.progress import track from attr import attrs, attrib -from moviepy.editor import AudioFileClip, CompositeAudioClip, concatenate_audioclips - from utils.console import print_step, print_substep from utils.voice import sanitize_text from utils import settings from TTS.common import audio_length +from TTS.GTTS import GTTS +from TTS.streamlabs_polly import StreamlabsPolly +from TTS.TikTok import TikTok +from TTS.aws_polly import AWSPolly + @attrs(auto_attribs=True) class TTSEngine: @@ -31,15 +30,24 @@ class TTSEngine: Notes: tts_module must take the arguments text and filepath. """ - tts_module: object + tts_module: Union[GTTS, StreamlabsPolly, TikTok, AWSPolly] reddit_object: dict path: str = 'assets/temp/mp3' - max_length: int = 50 # TODO move to config __total_length: int = attrib( default=0, kw_only=True ) + def __attrs_post_init__(self): + self.tts_module = self.tts_module() + self.max_length: int = settings.config['settings']['video_length'] + self.time_before_tts: float = settings.config['settings']['time_before_tts'] + self.time_between_pictures: float = settings.config['settings']['time_between_pictures'] + self.__total_length = ( + settings.config['settings']['time_before_first_picture'] + + settings.config['settings']['delay_before_end'] + ) + def run( self ) -> list: @@ -82,15 +90,16 @@ class TTSEngine: if not text: return False - self.tts_module().run( + self.tts_module.run( text=self.process_text(text), filepath=f'{self.path}/{filename}.mp3' ) clip_length = audio_length(f'{self.path}/{filename}.mp3') + clip_offset = self.time_between_pictures + self.time_before_tts * 2 - if clip_length and self.__total_length + clip_length <= self.max_length: - self.__total_length += clip_length + if clip_length and self.__total_length + clip_length + clip_offset <= self.max_length: + self.__total_length += clip_length + clip_offset return True return False diff --git a/main.py b/main.py index 82d459b..2554514 100755 --- a/main.py +++ b/main.py @@ -9,8 +9,6 @@ from utils import settings # from utils.checker import envUpdate from video_creation.background import ( - download_background, - chop_background_video, get_background_config, ) from video_creation.final_video import make_final_video diff --git a/utils/settings.py b/utils/settings.py index a36f63e..1c77eba 100755 --- a/utils/settings.py +++ b/utils/settings.py @@ -9,7 +9,7 @@ from utils.console import handle_input console = Console() -config = dict # autocomplete +config = dict() # calling instance of a dict to calm lint down def crawl(obj: dict, func=lambda x, y: print(x, y, end="\n"), path=None): diff --git a/video_creation/final_video.py b/video_creation/final_video.py index 1145e9f..7804194 100755 --- a/video_creation/final_video.py +++ b/video_creation/final_video.py @@ -10,7 +10,7 @@ from moviepy.editor import ( AudioFileClip, ImageClip, concatenate_videoclips, - concatenate_audioclips, + CompositeAudioClip, CompositeVideoClip, ) from moviepy.video.io.ffmpeg_tools import ffmpeg_merge_video_audio, ffmpeg_extract_subclip @@ -25,14 +25,6 @@ from video_creation.background import download_background, chop_background_video console = Console() -W, H = 1080, 1920 # TODO move to config - -max_length: int = 50 # TODO move to config -time_before_first_picture: float = 1 # TODO move to config -time_before_tts: float = 0.5 # TODO move to config -time_between_pictures: float = 1 # TODO move to config -delay_before_end: int = 1 # TODO move to config - def name_normalize( name: str @@ -69,10 +61,22 @@ def make_final_video( reddit_obj (dict): The reddit object that contains the posts to read. background_config (Tuple[str, str, str, Any]): The background config to use. """ + W: int = int(settings.config['settings']['video_width']) + H: int = int(settings.config['settings']['video_height']) + + if not W or not H: + W, H = 1080, 1920 + + max_length: int = int(settings.config['settings']['video_length']) + time_before_first_picture: float = settings.config['settings']['time_before_first_picture'] + time_before_tts: float = settings.config['settings']['time_before_tts'] + time_between_pictures: float = settings.config['settings']['time_between_pictures'] + delay_before_end: float = settings.config['settings']['delay_before_end'] + print_step('Creating the final video 🎥') VideoFileClip.reW = lambda clip: clip.resize(width=W) VideoFileClip.reH = lambda clip: clip.resize(width=H) - opacity = settings.config['settings']['opacity'] + opacity = settings.config['settings']['opacity'] / 100 def create_audio_clip( clip_title: str | int, @@ -97,39 +101,38 @@ def make_final_video( audio_clips.append(audio_title) indexes_for_videos = list() - for idx, audio in track( - enumerate(indexes_of_clips, start=1), + for audio_title in track( + indexes_of_clips, description='Gathering audio clips...', ): temp_audio_clip = create_audio_clip( - audio, + audio_title, correct_audio_offset + video_duration, ) if video_duration + temp_audio_clip.duration + correct_audio_offset + delay_before_end <= max_length: video_duration += temp_audio_clip.duration + correct_audio_offset audio_clips.append(temp_audio_clip) - indexes_for_videos.append(idx) + indexes_for_videos.append(audio_title) - video_duration += delay_before_end + video_duration += delay_before_end + time_before_tts - audio_composite = concatenate_audioclips(audio_clips) + # Can't use concatenate_audioclips here, it resets clips' start point + audio_composite = CompositeAudioClip(audio_clips) console.log('[bold green] Video Will Be: %.2f Seconds Long' % video_duration) # Gather all images - new_opacity = 1 if opacity is None or float(opacity) >= 1 else float(opacity) # TODO move to pydentic and percents + new_opacity = 1 if opacity is None or opacity >= 1 else opacity def create_image_clip( image_title: str | int, audio_start: float, - audio_end: float, audio_duration: float, ) -> 'ImageClip': return ( ImageClip(f'assets/temp/png/{image_title}.png') .set_start(audio_start - time_before_tts) - .set_end(audio_end + time_before_tts) - .set_duration(time_before_tts * 2 + audio_duration, change_end=False) + .set_duration(time_before_tts * 2 + audio_duration) .set_opacity(new_opacity) .resize(width=W - 100) ) @@ -137,22 +140,26 @@ def make_final_video( # add title to video image_clips = list() + # Accounting for title and other stuff if audio_clips + index_offset = 1 + image_clips.append( create_image_clip( 'title', audio_clips[0].start, - audio_clips[0].end, audio_clips[0].duration ) ) - for photo_idx in indexes_for_videos: + for idx, photo_idx in enumerate( + indexes_for_videos, + start=index_offset, + ): image_clips.append( create_image_clip( f'comment_{indexes_of_clips[photo_idx]}', - audio_clips[photo_idx].start, - audio_clips[photo_idx].end, - audio_clips[photo_idx].duration + audio_clips[idx].start, + audio_clips[idx].duration ) ) @@ -166,16 +173,14 @@ def make_final_video( # .set_opacity(float(opacity)), # ) # else: story mode stuff - img_clip_pos = background_config[3] - image_concat = concatenate_videoclips(image_clips).set_position(img_clip_pos) - image_concat.audio = audio_composite + image_concat = concatenate_videoclips(image_clips).set_position(background_config[3]) download_background(background_config) chop_background_video(background_config, video_duration) background_clip = ( VideoFileClip('assets/temp/background.mp4') .set_start(0) - .set_end(video_duration + delay_before_end) + .set_end(video_duration) .without_audio() .resize(height=H) ) @@ -203,7 +208,11 @@ def make_final_video( y2=back_video_height ) + [print(image.start, audio.start, '|', audio.end, image.end, end=f'\n{"-" * 10}\n') for + audio, image in zip(audio_clips, image_clips)] final = CompositeVideoClip([background_clip, image_concat]) + final.audio = audio_composite + title = re.sub(r'[^\w\s-]', '', reddit_obj['thread_title']) idx = re.sub(r'[^\w\s-]', '', reddit_obj['thread_id']) diff --git a/video_creation/screenshot_downloader.py b/video_creation/screenshot_downloader.py index 6fba183..60aaa4f 100644 --- a/video_creation/screenshot_downloader.py +++ b/video_creation/screenshot_downloader.py @@ -375,9 +375,10 @@ class RedditScreenshot(Browser, Wait): [chunk for chunk in chunks(async_tasks_primary, 10)], start=1, ): + chunk_list = async_tasks_primary.__len__() // 10 + (1 if async_tasks_primary.__len__() % 10 != 0 else 0) for task in track( as_completed(chunked_tasks), - description=f'Downloading comments: Chunk {idx}/{chunked_tasks.__len__()}', + description=f'Downloading comments: Chunk {idx}/{chunk_list}', ): await task From 0e35c5a534ce1585e6aa13e265332dad0f9f0e9e Mon Sep 17 00:00:00 2001 From: Drugsosos <44712637+Drugsosos@users.noreply.github.com> Date: Thu, 14 Jul 2022 22:54:13 +0300 Subject: [PATCH 18/39] resolved Jason`s comments --- .config.template.toml | 2 +- main.py | 6 ++++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/.config.template.toml b/.config.template.toml index 6fcb0b0..90096b6 100644 --- a/.config.template.toml +++ b/.config.template.toml @@ -16,7 +16,7 @@ subreddit = { optional = false, regex = "[_0-9a-zA-Z]+$", nmin = 3, explanation post_id = { optional = true, default = "", regex = "^((?!://|://)[+a-zA-Z])*$", explanation = "Used if you want to use a specific post.", example = "urdtfx" } max_comment_length = { default = 500, optional = false, nmin = 10, nmax = 10000, type = "int", explanation = "max number of characters a comment can have. default is 500", example = 500, oob_error = "the max comment length should be between 10 and 10000" } post_lang = { default = "", optional = true, explanation = "The language you would like to translate to.", example = "es-cr" } -min_comments = { default = 20, optional = false, nmin = 0, type = "int", explanation = "The minimum number of comments a post should have to be included. default is 20", example = 29, oob_error = "the minimum number of comments should be between 15 and 999999" } +min_comments = { default = 20, optional = false, nmin = 1, type = "int", explanation = "The minimum number of comments a post should have to be included. default is 20", example = 29, oob_error = "the minimum number of comments should be between 1 and 999999" } [settings] allow_nsfw = { optional = false, type = "bool", default = false, example = false, options = [true, false, diff --git a/main.py b/main.py index 2554514..a72246c 100755 --- a/main.py +++ b/main.py @@ -15,7 +15,9 @@ from video_creation.final_video import make_final_video from video_creation.screenshot_downloader import RedditScreenshot from video_creation.voices import save_text_to_mp3 -VERSION = "2.2.9" +__VERSION__ = "2.3" +__BRANCH__ = "master" + print( """ ██████╗ ███████╗██████╗ ██████╗ ██╗████████╗ ██╗ ██╗██╗██████╗ ███████╗ ██████╗ ███╗ ███╗ █████╗ ██╗ ██╗███████╗██████╗ @@ -30,7 +32,7 @@ print( print_markdown( "### Thanks for using this tool! [Feel free to contribute to this project on GitHub!](https://lewismenelaws.com) If you have any questions, feel free to reach out to me on Twitter or submit a GitHub issue. You can find solutions to many common problems in the [Documentation](https://luka-hietala.gitbook.io/documentation-for-the-reddit-bot/)" ) -print_step(f"You are using V{VERSION} of the bot") +print_step(f"You are using v{__VERSION__} of the bot") async def main( From acf679bfb6838733dc25a26204454aad4abe4fd6 Mon Sep 17 00:00:00 2001 From: Drugsosos <44712637+Drugsosos@users.noreply.github.com> Date: Fri, 15 Jul 2022 00:01:37 +0300 Subject: [PATCH 19/39] self review: improved typing & logging, removed unused imports, fixes in README --- README.md | 1 - TTS/GTTS.py | 8 ++++ TTS/TikTok.py | 27 +++++++----- TTS/aws_polly.py | 9 +++- TTS/common.py | 57 ++++++++++++++++++++++++- TTS/engine_wrapper.py | 26 +++++++++++ TTS/streamlabs_polly.py | 9 ++++ main.py | 1 - utils/settings.py | 2 +- utils/voice.py | 2 +- video_creation/data/videos.json | 2 +- video_creation/final_video.py | 4 +- video_creation/screenshot_downloader.py | 12 +++--- video_creation/voices.py | 3 +- 14 files changed, 136 insertions(+), 27 deletions(-) diff --git a/README.md b/README.md index cb82738..8aaf3d1 100644 --- a/README.md +++ b/README.md @@ -33,7 +33,6 @@ The only original thing being done is the editing and gathering of all materials ## Requirements - Python 3.9+ -- Playwright (this should install automatically in installation) ## Installation 👩‍💻 diff --git a/TTS/GTTS.py b/TTS/GTTS.py index c8d6ae8..8974ddc 100644 --- a/TTS/GTTS.py +++ b/TTS/GTTS.py @@ -5,12 +5,20 @@ from gtts import gTTS class GTTS: max_chars = 0 + # voices = [] @staticmethod async def run( text, filepath ) -> None: + """ + Calls for TTS api + + Args: + text: text to be voiced over + filepath: name of the audio file + """ tts = gTTS( text=text, lang=settings.config["reddit"]["thread"]["post_lang"] or "en", diff --git a/TTS/TikTok.py b/TTS/TikTok.py index 6a23bb8..83521b3 100644 --- a/TTS/TikTok.py +++ b/TTS/TikTok.py @@ -1,10 +1,8 @@ -import base64 from utils import settings import requests from requests.adapters import HTTPAdapter, Retry from attr import attrs, attrib -from attr.validators import instance_of from TTS.common import BaseApiTTS, get_random_voice @@ -74,8 +72,20 @@ class TikTok(BaseApiTTS): # TikTok Text-to-Speech Wrapper max_chars = 300 decode_base64 = True - def __attrs_post_init__(self): - self.voice = ( + def make_request( + self, + text: str, + ): + """ + Makes a requests to remote TTS service + + Args: + text: text to be voice over + + Returns: + Request's response + """ + voice = ( get_random_voice(voices, 'human') if self.random_voice else str(settings.config['settings']['tts']['tiktok_voice']).lower() @@ -83,16 +93,11 @@ class TikTok(BaseApiTTS): # TikTok Text-to-Speech Wrapper voice.lower() for dict_title in voices for voice in voices[dict_title]] else get_random_voice(voices, 'human') ) - - def make_request( - self, - text: str, - ): try: r = requests.post( self.uri_base, params={ - 'text_speaker': self.voice, + 'text_speaker': voice, 'req_text': text, 'speaker_map_type': 0, }) @@ -103,6 +108,6 @@ class TikTok(BaseApiTTS): # TikTok Text-to-Speech Wrapper adapter = HTTPAdapter(max_retries=retry) session.mount('http://', adapter) session.mount('https://', adapter) - r = session.post(f'{self.uri_base}{self.voice}&req_text={text}&speaker_map_type=0') + r = session.post(f'{self.uri_base}{voice}&req_text={text}&speaker_map_type=0') # print(r.text) return r.json()['data']['v_str'] diff --git a/TTS/aws_polly.py b/TTS/aws_polly.py index 9d52f6f..f8c28cd 100644 --- a/TTS/aws_polly.py +++ b/TTS/aws_polly.py @@ -37,7 +37,14 @@ class AWSPolly: self, text, filepath, - ): + ) -> None: + """ + Calls for TTS api + + Args: + text: text to be voiced over + filepath: name of the audio file + """ try: session = Session(profile_name='polly') polly = session.client('polly') diff --git a/TTS/common.py b/TTS/common.py index 73884f4..d4d0200 100644 --- a/TTS/common.py +++ b/TTS/common.py @@ -12,6 +12,16 @@ class BaseApiTTS: text: str, max_length: int, ) -> list: + """ + Splits text if it's too long to be a query + + Args: + text: text to be sanitized + max_length: maximum length of the query + + Returns: + Split text as a list + """ # Split by comma or dot (else you can lose intonations), if there is non, split by groups of 299 chars if '.' in text and all([split_text.__len__() < max_length for split_text in text.split('.')]): return text.split('.') @@ -26,6 +36,13 @@ class BaseApiTTS: output_text: str, filepath: str, ) -> None: + """ + Writes and decodes TTS responses in files + + Args: + output_text: text to be written + filepath: path/name of the file + """ decoded_text = base64.b64decode(output_text) if self.decode_base64 else output_text with open(filepath, 'wb') as out: @@ -36,6 +53,16 @@ class BaseApiTTS: text: str, filepath: str, ) -> None: + """ + Calls for TTS api and writes audio file + + Args: + text: text to be voice over + filepath: path/name of the file + + Returns: + + """ output_text = '' if len(text) > self.max_chars: for part in self.text_len_sanitize(text, self.max_chars): @@ -50,19 +77,45 @@ def get_random_voice( voices: Union[list, dict], key: Optional[str] = None, ) -> str: + """ + Return random voice from list or dict + + Args: + voices: list or dict of voices + key: key of a dict if you are using one + + Returns: + random voice as a str + """ if isinstance(voices, list): return choice(voices) else: - return choice(voices[key]) + return choice(voices[key] if key else list(voices.values())[0]) def audio_length( path: str, ) -> float | int: + """ + Gets the length of the audio file + + Args: + path: audio file path + + Returns: + length in seconds as an int + """ from mutagen.mp3 import MP3 try: audio = MP3(path) return audio.info.length - except Exception as e: # TODO add logging + except Exception as e: + import logging + + logger = logging.getLogger('spam_application') + logger.setLevel(logging.DEBUG) + handler = logging.FileHandler('tts_log', mode='a+', encoding='utf-8') + logger.addHandler(handler) + logger.error('Error occurred in audio_length:', e) return 0 diff --git a/TTS/engine_wrapper.py b/TTS/engine_wrapper.py index 64e439a..0733198 100644 --- a/TTS/engine_wrapper.py +++ b/TTS/engine_wrapper.py @@ -39,7 +39,9 @@ class TTSEngine: ) def __attrs_post_init__(self): + # Calls an instance of the tts_module class self.tts_module = self.tts_module() + # Loading settings from the config self.max_length: int = settings.config['settings']['video_length'] self.time_before_tts: float = settings.config['settings']['time_before_tts'] self.time_between_pictures: float = settings.config['settings']['time_between_pictures'] @@ -51,7 +53,12 @@ class TTSEngine: def run( self ) -> list: + """ + Voices over comments & title of the submission + Returns: + Indexes of comments to be used in the final video + """ Path(self.path).mkdir(parents=True, exist_ok=True) # This file needs to be removed in case this post does not use post text @@ -87,6 +94,16 @@ class TTSEngine: filename: str, text: str ) -> bool: + """ + Calls for TTS api from the factory + + Args: + filename: name of audio file w/o .mp3 + text: text to be voiced over + + Returns: + True if audio files not exceeding the maximum length else false + """ if not text: return False @@ -107,6 +124,15 @@ class TTSEngine: def process_text( text: str, ) -> str: + """ + Sanitizes text for illegal characters and translates text + + Args: + text: text to be sanitized & translated + + Returns: + Processed text as a str + """ lang = settings.config['reddit']['thread']['post_lang'] new_text = sanitize_text(text) if lang: diff --git a/TTS/streamlabs_polly.py b/TTS/streamlabs_polly.py index d2b765a..ca6102b 100644 --- a/TTS/streamlabs_polly.py +++ b/TTS/streamlabs_polly.py @@ -42,6 +42,15 @@ class StreamlabsPolly(BaseApiTTS): self, text, ): + """ + Makes a requests to remote TTS service + + Args: + text: text to be voice over + + Returns: + Request's response + """ voice = ( get_random_voice(voices) if self.random_voice diff --git a/main.py b/main.py index a72246c..6d6f04a 100755 --- a/main.py +++ b/main.py @@ -7,7 +7,6 @@ from utils.cleanup import cleanup from utils.console import print_markdown, print_step from utils import settings -# from utils.checker import envUpdate from video_creation.background import ( get_background_config, ) diff --git a/utils/settings.py b/utils/settings.py index 1c77eba..8acae2a 100755 --- a/utils/settings.py +++ b/utils/settings.py @@ -9,7 +9,7 @@ from utils.console import handle_input console = Console() -config = dict() # calling instance of a dict to calm lint down +config = dict() # calling instance of a dict to calm lint down (dict[any] will work as well) def crawl(obj: dict, func=lambda x, y: print(x, y, end="\n"), path=None): diff --git a/utils/voice.py b/utils/voice.py index 7d20b1b..3113227 100644 --- a/utils/voice.py +++ b/utils/voice.py @@ -11,7 +11,7 @@ if sys.version_info[0] >= 3: def check_ratelimit( - response: Response + response: Response, ): """ Checks if the response is a ratelimit response. diff --git a/video_creation/data/videos.json b/video_creation/data/videos.json index 0637a08..fe51488 100644 --- a/video_creation/data/videos.json +++ b/video_creation/data/videos.json @@ -1 +1 @@ -[] \ No newline at end of file +[] diff --git a/video_creation/final_video.py b/video_creation/final_video.py index 454b675..578eaab 100755 --- a/video_creation/final_video.py +++ b/video_creation/final_video.py @@ -13,7 +13,7 @@ from moviepy.editor import ( CompositeAudioClip, CompositeVideoClip, ) -from moviepy.video.io.ffmpeg_tools import ffmpeg_merge_video_audio, ffmpeg_extract_subclip +from moviepy.video.io.ffmpeg_tools import ffmpeg_extract_subclip from rich.console import Console from rich.progress import track @@ -35,7 +35,7 @@ def name_normalize( name = re.sub(r'(\d+)\s?\/\s?(\d+)', r'\1 of \2', name) name = re.sub(r'(\w+)\s?\/\s?(\w+)', r'\1 or \2', name) name = re.sub(r'\/', '', name) - name[:30] + name[:30] # the hell this little guy does? lang = settings.config['reddit']['thread']['post_lang'] if lang: diff --git a/video_creation/screenshot_downloader.py b/video_creation/screenshot_downloader.py index 60aaa4f..a779f6f 100644 --- a/video_creation/screenshot_downloader.py +++ b/video_creation/screenshot_downloader.py @@ -25,7 +25,7 @@ _exceptions = TypeVar('_exceptions', bound=Optional[Union[type, tuple, list]]) @attrs class ExceptionDecorator: """ - Factory for decorating functions + Decorator factory for catching exceptions and writing logs """ exception: Optional[_exceptions] = attrib(default=None) __default_exception: _exceptions = attrib(default=BrowserTimeoutError) @@ -45,15 +45,17 @@ class ExceptionDecorator: except Exception as caughtException: import logging - logging.basicConfig(filename='.webdriver.log', filemode='a+', - encoding='utf-8', level=logging.ERROR) + logger = logging.getLogger('webdriver_log') + logger.setLevel(logging.DEBUG) + handler = logging.FileHandler('.webdriver.log', mode='a+', encoding='utf-8') + logger.addHandler(handler) if isinstance(self.exception, type): if not type(caughtException) == self.exception: - logging.error(f'unexpected error - {caughtException}') + logger.error(f'unexpected error - {caughtException}') else: if not type(caughtException) in self.exception: - logging.error(f'unexpected error - {caughtException}') + logger.error(f'unexpected error - {caughtException}') return wrapper diff --git a/video_creation/voices.py b/video_creation/voices.py index 95f0b2b..7d78e5f 100644 --- a/video_creation/voices.py +++ b/video_creation/voices.py @@ -29,7 +29,7 @@ def save_text_to_mp3( """ voice = settings.config['settings']['tts']['choice'] - if str(voice).casefold() not in map(lambda _: _.casefold(), TTSProviders): + if voice.casefold() not in map(lambda _: _.casefold(), TTSProviders): while True: print_step('Please choose one of the following TTS providers: ') print_table(TTSProviders) @@ -45,6 +45,7 @@ def get_case_insensitive_key_value( input_dict, key, ) -> object: + # TODO add a factory later return next( (value for dict_key, value in input_dict.items() if dict_key.lower() == key.lower()), None, From c921a72a571b9178a0636e5eee20d3383cdac2f8 Mon Sep 17 00:00:00 2001 From: Drugsosos <44712637+Drugsosos@users.noreply.github.com> Date: Fri, 15 Jul 2022 01:06:13 +0300 Subject: [PATCH 20/39] fixes in text_len_sanitize --- TTS/common.py | 33 ++++++++++++++++++++++++++------- 1 file changed, 26 insertions(+), 7 deletions(-) diff --git a/TTS/common.py b/TTS/common.py index d4d0200..b0d8898 100644 --- a/TTS/common.py +++ b/TTS/common.py @@ -23,13 +23,32 @@ class BaseApiTTS: Split text as a list """ # Split by comma or dot (else you can lose intonations), if there is non, split by groups of 299 chars - if '.' in text and all([split_text.__len__() < max_length for split_text in text.split('.')]): - return text.split('.') - - if ',' in text and all([split_text.__len__() < max_length for split_text in text.split(',')]): - return text.split(',') - - return [text[i:i + max_length] for i in range(0, len(text), max_length)] + split_text = '' + + split_text = list( + map(lambda x: x.strip() if x.strip()[-1] != '.' else x.strip()[:-1], + filter(lambda x: True if x else False, text.split('.'))) + ) + if split_text and all([chunk.__len__() < max_length for chunk in split_text]): + return split_text + + split_text = list( + map(lambda x: x.strip() if x.strip()[-1] != ',' else x.strip()[:-1], + filter(lambda x: True if x else False, text.split(',')) + ) + ) + if split_text and all([chunk.__len__() < max_length for chunk in split_text]): + return split_text + + return list( + map( + lambda x: x.strip() if x.strip()[-1] != '.' or x.strip()[-1] != ',' else x.strip()[:-1], + filter( + lambda x: True if x else False, + [text[i:i + max_length] for i in range(0, len(text), max_length)] + ) + ) + ) def write_file( self, From 38a9354f668b271019b0e2c9e3388d4c60bd69f9 Mon Sep 17 00:00:00 2001 From: Drugsosos <44712637+Drugsosos@users.noreply.github.com> Date: Fri, 15 Jul 2022 02:37:46 +0300 Subject: [PATCH 21/39] hotfix in indexes_for_videos --- video_creation/final_video.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/video_creation/final_video.py b/video_creation/final_video.py index 578eaab..ff12862 100755 --- a/video_creation/final_video.py +++ b/video_creation/final_video.py @@ -158,7 +158,7 @@ def make_final_video( ): image_clips.append( create_image_clip( - f'comment_{indexes_of_clips[photo_idx]}', + f'comment_{photo_idx}', audio_clips[idx].start, audio_clips[idx].duration ) From 6a2b448c73cf0868da991b37006d54c5467e0476 Mon Sep 17 00:00:00 2001 From: Drugsosos <44712637+Drugsosos@users.noreply.github.com> Date: Fri, 15 Jul 2022 03:52:27 +0300 Subject: [PATCH 22/39] more fixes --- TTS/common.py | 8 ++++---- video_creation/data/cookie-dark-mode.json | 14 -------------- video_creation/data/cookie-light-mode.json | 8 -------- video_creation/final_video.py | 10 ++++------ video_creation/screenshot_downloader.py | 4 ++-- 5 files changed, 10 insertions(+), 34 deletions(-) delete mode 100644 video_creation/data/cookie-dark-mode.json delete mode 100644 video_creation/data/cookie-light-mode.json diff --git a/TTS/common.py b/TTS/common.py index b0d8898..9f71090 100644 --- a/TTS/common.py +++ b/TTS/common.py @@ -114,7 +114,7 @@ def get_random_voice( def audio_length( path: str, -) -> float | int: +) -> Union[float, int]: """ Gets the length of the audio file @@ -132,9 +132,9 @@ def audio_length( except Exception as e: import logging - logger = logging.getLogger('spam_application') - logger.setLevel(logging.DEBUG) - handler = logging.FileHandler('tts_log', mode='a+', encoding='utf-8') + logger = logging.getLogger('tts_logger') + logger.setLevel(logging.ERROR) + handler = logging.FileHandler('.tts.log', mode='a+', encoding='utf-8') logger.addHandler(handler) logger.error('Error occurred in audio_length:', e) return 0 diff --git a/video_creation/data/cookie-dark-mode.json b/video_creation/data/cookie-dark-mode.json deleted file mode 100644 index 774f4cc..0000000 --- a/video_creation/data/cookie-dark-mode.json +++ /dev/null @@ -1,14 +0,0 @@ -[ - { - "name": "USER", - "value": "eyJwcmVmcyI6eyJ0b3BDb250ZW50RGlzbWlzc2FsVGltZSI6MCwiZ2xvYmFsVGhlbWUiOiJSRURESVQiLCJuaWdodG1vZGUiOnRydWUsImNvbGxhcHNlZFRyYXlTZWN0aW9ucyI6eyJmYXZvcml0ZXMiOmZhbHNlLCJtdWx0aXMiOmZhbHNlLCJtb2RlcmF0aW5nIjpmYWxzZSwic3Vic2NyaXB0aW9ucyI6ZmFsc2UsInByb2ZpbGVzIjpmYWxzZX0sInRvcENvbnRlbnRUaW1lc0Rpc21pc3NlZCI6MH19", - "domain": ".reddit.com", - "path": "/" - }, - { - "name": "eu_cookie", - "value": "{%22opted%22:true%2C%22nonessential%22:false}", - "domain": ".reddit.com", - "path": "/" - } -] diff --git a/video_creation/data/cookie-light-mode.json b/video_creation/data/cookie-light-mode.json deleted file mode 100644 index 048a3e3..0000000 --- a/video_creation/data/cookie-light-mode.json +++ /dev/null @@ -1,8 +0,0 @@ -[ - { - "name": "eu_cookie", - "value": "{%22opted%22:true%2C%22nonessential%22:false}", - "domain": ".reddit.com", - "path": "/" - } -] diff --git a/video_creation/final_video.py b/video_creation/final_video.py index ff12862..b92acb5 100755 --- a/video_creation/final_video.py +++ b/video_creation/final_video.py @@ -3,7 +3,7 @@ import multiprocessing import os import re from os.path import exists -from typing import Tuple, Any +from typing import Tuple, Any, Union from moviepy.editor import ( VideoFileClip, @@ -80,7 +80,7 @@ def make_final_video( opacity = settings.config['settings']['opacity'] / 100 def create_audio_clip( - clip_title: str | int, + clip_title: Union[str, int], clip_start: float, ) -> 'AudioFileClip': return ( @@ -126,7 +126,7 @@ def make_final_video( new_opacity = 1 if opacity is None or opacity >= 1 else opacity def create_image_clip( - image_title: str | int, + image_title: Union[str, int], audio_start: float, audio_duration: float, ) -> 'ImageClip': @@ -174,7 +174,7 @@ def make_final_video( # .set_opacity(float(opacity)), # ) # else: story mode stuff - image_concat = concatenate_videoclips(image_clips).set_position(background_config[3]) + image_concat = CompositeVideoClip(image_clips).set_position(background_config[3]) download_background(background_config) chop_background_video(background_config, video_duration) @@ -209,8 +209,6 @@ def make_final_video( y2=back_video_height ) - [print(image.start, audio.start, '|', audio.end, image.end, end=f'\n{"-" * 10}\n') for - audio, image in zip(audio_clips, image_clips)] final = CompositeVideoClip([background_clip, image_concat]) final.audio = audio_composite diff --git a/video_creation/screenshot_downloader.py b/video_creation/screenshot_downloader.py index a779f6f..19d958f 100644 --- a/video_creation/screenshot_downloader.py +++ b/video_creation/screenshot_downloader.py @@ -63,7 +63,7 @@ class ExceptionDecorator: def catch_exception( func: Optional[_function], exception: Optional[_exceptions] = None, -) -> ExceptionDecorator | _function: +) -> Union[ExceptionDecorator, _function]: """ Decorator for catching exceptions and writing logs @@ -91,7 +91,7 @@ class Browser: default={ 'defaultViewport': { 'width': 500, - 'height': 900, + 'height': 1200, }, }, kw_only=True, From 720511168cab41c6894d665a94f28a0c5620e7db Mon Sep 17 00:00:00 2001 From: Drugsosos <44712637+Drugsosos@users.noreply.github.com> Date: Sun, 17 Jul 2022 00:10:26 +0300 Subject: [PATCH 23/39] fixes with attrs.attrib mostly --- TTS/TikTok.py | 15 ++++++++------- TTS/aws_polly.py | 10 +++++++--- TTS/engine_wrapper.py | 23 +++++++++-------------- TTS/streamlabs_polly.py | 14 +++++++------- video_creation/final_video.py | 7 ++----- video_creation/screenshot_downloader.py | 2 +- 6 files changed, 34 insertions(+), 37 deletions(-) diff --git a/TTS/TikTok.py b/TTS/TikTok.py index 83521b3..5561ac4 100644 --- a/TTS/TikTok.py +++ b/TTS/TikTok.py @@ -3,6 +3,7 @@ import requests from requests.adapters import HTTPAdapter, Retry from attr import attrs, attrib +from attr.validators import instance_of from TTS.common import BaseApiTTS, get_random_voice @@ -62,15 +63,15 @@ voices['non_eng'] = [ # more or less: en_us_rocket, en_us_ghostface -@attrs(auto_attribs=True) +@attrs class TikTok(BaseApiTTS): # TikTok Text-to-Speech Wrapper - random_voice: bool = False - uri_base: str = attrib( - default='https://api16-normal-useast5.us.tiktokv.com/media/api/text/speech/invoke/', - kw_only=True, + random_voice: bool = attrib( + validator=instance_of(bool), + default=False ) - max_chars = 300 - decode_base64 = True + uri_base: str = 'https://api16-normal-useast5.us.tiktokv.com/media/api/text/speech/invoke/' + max_chars: int = 300 + decode_base64: bool = True def make_request( self, diff --git a/TTS/aws_polly.py b/TTS/aws_polly.py index f8c28cd..f63ce61 100644 --- a/TTS/aws_polly.py +++ b/TTS/aws_polly.py @@ -4,7 +4,8 @@ from botocore.exceptions import BotoCoreError, ClientError, ProfileNotFound import sys from utils import settings -from attr import attrs +from attr import attrs, attrib +from attr.validators import instance_of from TTS.common import get_random_voice @@ -28,9 +29,12 @@ voices = [ ] -@attrs(auto_attribs=True) +@attrs class AWSPolly: - random_voice: bool = False + random_voice: bool = attrib( + validator=instance_of(bool), + default=False + ) max_chars: int = 0 def run( diff --git a/TTS/engine_wrapper.py b/TTS/engine_wrapper.py index 0733198..5534269 100644 --- a/TTS/engine_wrapper.py +++ b/TTS/engine_wrapper.py @@ -17,26 +17,21 @@ from TTS.TikTok import TikTok from TTS.aws_polly import AWSPolly -@attrs(auto_attribs=True) +@attrs class TTSEngine: """Calls the given TTS engine to reduce code duplication and allow multiple TTS engines. Args: tts_module : The TTS module. Your module should handle the TTS itself and saving to the given path under the run method. reddit_object : The reddit object that contains the posts to read. - path (Optional) : The unix style path to save the mp3 files to. This must not have leading or trailing slashes. - max_length (Optional) : The maximum length of the mp3 files in total. Notes: tts_module must take the arguments text and filepath. """ - tts_module: Union[GTTS, StreamlabsPolly, TikTok, AWSPolly] - reddit_object: dict - path: str = 'assets/temp/mp3' - __total_length: int = attrib( - default=0, - kw_only=True - ) + tts_module: Union[GTTS, StreamlabsPolly, TikTok, AWSPolly] = attrib() + reddit_object: dict = attrib() + __path: str = 'assets/temp/mp3' + __total_length: int = 0 def __attrs_post_init__(self): # Calls an instance of the tts_module class @@ -59,12 +54,12 @@ class TTSEngine: Returns: Indexes of comments to be used in the final video """ - Path(self.path).mkdir(parents=True, exist_ok=True) + Path(self.__path).mkdir(parents=True, exist_ok=True) # This file needs to be removed in case this post does not use post text # so that it won't appear in the final video try: - Path(f'{self.path}/posttext.mp3').unlink() + Path(f'{self.__path}/posttext.mp3').unlink() except OSError: pass @@ -109,10 +104,10 @@ class TTSEngine: self.tts_module.run( text=self.process_text(text), - filepath=f'{self.path}/{filename}.mp3' + filepath=f'{self.__path}/{filename}.mp3' ) - clip_length = audio_length(f'{self.path}/{filename}.mp3') + clip_length = audio_length(f'{self.__path}/{filename}.mp3') clip_offset = self.time_between_pictures + self.time_before_tts * 2 if clip_length and self.__total_length + clip_length + clip_offset <= self.max_length: diff --git a/TTS/streamlabs_polly.py b/TTS/streamlabs_polly.py index ca6102b..7d2ca80 100644 --- a/TTS/streamlabs_polly.py +++ b/TTS/streamlabs_polly.py @@ -2,6 +2,7 @@ import requests from requests.exceptions import JSONDecodeError from utils import settings from attr import attrs, attrib +from attr.validators import instance_of from TTS.common import BaseApiTTS, get_random_voice from utils.voice import check_ratelimit @@ -28,15 +29,14 @@ voices = [ # valid voices https://lazypy.ro/tts/ -@attrs(auto_attribs=True) +@attrs class StreamlabsPolly(BaseApiTTS): - random_voice: bool = False - url: str = attrib( - default='https://streamlabs.com/polly/speak', - kw_only=True, + random_voice: bool = attrib( + validator=instance_of(bool), + default=False ) - - max_chars = 550 + url: str = 'https://streamlabs.com/polly/speak', + max_chars: int = 550 def make_request( self, diff --git a/video_creation/final_video.py b/video_creation/final_video.py index b92acb5..8904f87 100755 --- a/video_creation/final_video.py +++ b/video_creation/final_video.py @@ -9,7 +9,6 @@ from moviepy.editor import ( VideoFileClip, AudioFileClip, ImageClip, - concatenate_videoclips, CompositeAudioClip, CompositeVideoClip, ) @@ -35,7 +34,7 @@ def name_normalize( name = re.sub(r'(\d+)\s?\/\s?(\d+)', r'\1 of \2', name) name = re.sub(r'(\w+)\s?\/\s?(\w+)', r'\1 or \2', name) name = re.sub(r'\/', '', name) - name[:30] # the hell this little guy does? + # name[:30] # the hell this little guy does? commented until explained lang = settings.config['reddit']['thread']['post_lang'] if lang: @@ -44,9 +43,7 @@ def name_normalize( print_substep('Translating filename...') translated_name = ts.google(name, to_language=lang) return translated_name - - else: - return name + return name def make_final_video( diff --git a/video_creation/screenshot_downloader.py b/video_creation/screenshot_downloader.py index 19d958f..de7a43d 100644 --- a/video_creation/screenshot_downloader.py +++ b/video_creation/screenshot_downloader.py @@ -46,7 +46,7 @@ class ExceptionDecorator: import logging logger = logging.getLogger('webdriver_log') - logger.setLevel(logging.DEBUG) + logger.setLevel(logging.ERROR) handler = logging.FileHandler('.webdriver.log', mode='a+', encoding='utf-8') logger.addHandler(handler) From c69fb1b55ee1e6afde36462528be7c5b911f4153 Mon Sep 17 00:00:00 2001 From: Drugsosos <44712637+Drugsosos@users.noreply.github.com> Date: Sun, 17 Jul 2022 00:25:24 +0300 Subject: [PATCH 24/39] review fixes --- .config.template.toml | 4 ++-- TTS/common.py | 9 ++++++--- TTS/engine_wrapper.py | 6 ++++-- 3 files changed, 12 insertions(+), 7 deletions(-) diff --git a/.config.template.toml b/.config.template.toml index 90096b6..877e45d 100644 --- a/.config.template.toml +++ b/.config.template.toml @@ -16,7 +16,7 @@ subreddit = { optional = false, regex = "[_0-9a-zA-Z]+$", nmin = 3, explanation post_id = { optional = true, default = "", regex = "^((?!://|://)[+a-zA-Z])*$", explanation = "Used if you want to use a specific post.", example = "urdtfx" } max_comment_length = { default = 500, optional = false, nmin = 10, nmax = 10000, type = "int", explanation = "max number of characters a comment can have. default is 500", example = 500, oob_error = "the max comment length should be between 10 and 10000" } post_lang = { default = "", optional = true, explanation = "The language you would like to translate to.", example = "es-cr" } -min_comments = { default = 20, optional = false, nmin = 1, type = "int", explanation = "The minimum number of comments a post should have to be included. default is 20", example = 29, oob_error = "the minimum number of comments should be between 1 and 999999" } +min_comments = { default = 20, optional = false, nmin = 15, type = "int", explanation = "The minimum number of comments a post should have to be included. default is 20", example = 29, oob_error = "the minimum number of comments should be between 1 and 999999" } [settings] allow_nsfw = { optional = false, type = "bool", default = false, example = false, options = [true, false, @@ -25,7 +25,7 @@ theme = { optional = false, default = "dark", example = "light", options = ["dar "light", ], explanation = "sets the Reddit theme, either LIGHT or DARK" } times_to_run = { optional = false, default = 1, example = 2, explanation = "used if you want to run multiple times. set to an int e.g. 4 or 29 or 1", type = "int", nmin = 1, oob_error = "It's very hard to run something less than once." } -opacity = { optional = false, default = 90, example = 80, explanation = "Sets the opacity (in percents) of the comments when overlayed over the background", type = "int", nmin = 10, nmax = 100, oob_error = "The opacity HAS to be between 10 and 100 percents", input_error = "The opacity HAS to be a number between 10 and 100" } +opacity = { optional = false, default = 0.9, example = 0.8, explanation = "Sets the opacity of the comments when overlayed over the background", type = "float", nmin = 0, nmax = 1, oob_error = "The opacity HAS to be between 0 and 1", input_error = "The opacity HAS to be a decimal number between 0 and 1" } storymode = { optional = true, type = "bool", default = false, example = false, options = [true, false, ], explanation = "not yet implemented" } diff --git a/TTS/common.py b/TTS/common.py index 9f71090..5e6f629 100644 --- a/TTS/common.py +++ b/TTS/common.py @@ -124,11 +124,14 @@ def audio_length( Returns: length in seconds as an int """ - from mutagen.mp3 import MP3 + from moviepy.editor import AudioFileClip try: - audio = MP3(path) - return audio.info.length + # please use something else here in the future + audio_clip = AudioFileClip(path) + audio_duration = audio_clip.duration + audio_clip.close() + return audio_duration except Exception as e: import logging diff --git a/TTS/engine_wrapper.py b/TTS/engine_wrapper.py index 5534269..2a0986b 100644 --- a/TTS/engine_wrapper.py +++ b/TTS/engine_wrapper.py @@ -72,8 +72,10 @@ class TTSEngine: sync_tasks_primary = [ self.call_tts(str(idx), comment['comment_body']) - for idx, comment in track(enumerate(self.reddit_object['comments']), description='Saving...') - # Crunch, there will be fix in async TTS api + for idx, comment in track( + enumerate(self.reddit_object['comments']), + description='Saving...') + # Crunch, there will be fix in async TTS api, maybe if self.__total_length + self.__total_length * 0.05 < self.max_length ] From 1d968ee7b7d62550bad6549b0674222fc0a1014d Mon Sep 17 00:00:00 2001 From: Drugsosos <44712637+Drugsosos@users.noreply.github.com> Date: Sun, 17 Jul 2022 01:33:35 +0300 Subject: [PATCH 25/39] more fixes after review --- TTS/TikTok.py | 98 ++++++++++++------------- TTS/aws_polly.py | 48 ++++++------ TTS/common.py | 22 +++--- TTS/engine_wrapper.py | 41 ++++++----- TTS/streamlabs_polly.py | 50 ++++++------- reddit/subreddit.py | 2 +- utils/subreddit.py | 2 +- video_creation/final_video.py | 90 ++++++++++++----------- video_creation/screenshot_downloader.py | 79 ++++++++++---------- video_creation/voices.py | 16 ++-- 10 files changed, 227 insertions(+), 221 deletions(-) diff --git a/TTS/TikTok.py b/TTS/TikTok.py index 5561ac4..c321b89 100644 --- a/TTS/TikTok.py +++ b/TTS/TikTok.py @@ -11,49 +11,49 @@ from TTS.common import BaseApiTTS, get_random_voice voices = dict() -voices['nonhuman'] = [ # DISNEY VOICES - 'en_us_ghostface', # Ghost Face - 'en_us_chewbacca', # Chewbacca - 'en_us_c3po', # C3PO - 'en_us_stitch', # Stitch - 'en_us_stormtrooper', # Stormtrooper - 'en_us_rocket', # Rocket +voices["nonhuman"] = [ # DISNEY VOICES + "en_us_ghostface", # Ghost Face + "en_us_chewbacca", # Chewbacca + "en_us_c3po", # C3PO + "en_us_stitch", # Stitch + "en_us_stormtrooper", # Stormtrooper + "en_us_rocket", # Rocket # ENGLISH VOICES ] -voices['human'] = [ - 'en_au_001', # English AU - Female - 'en_au_002', # English AU - Male - 'en_uk_001', # English UK - Male 1 - 'en_uk_003', # English UK - Male 2 - 'en_us_001', # English US - Female (Int. 1) - 'en_us_002', # English US - Female (Int. 2) - 'en_us_006', # English US - Male 1 - 'en_us_007', # English US - Male 2 - 'en_us_009', # English US - Male 3 - 'en_us_010', +voices["human"] = [ + "en_au_001", # English AU - Female + "en_au_002", # English AU - Male + "en_uk_001", # English UK - Male 1 + "en_uk_003", # English UK - Male 2 + "en_us_001", # English US - Female (Int. 1) + "en_us_002", # English US - Female (Int. 2) + "en_us_006", # English US - Male 1 + "en_us_007", # English US - Male 2 + "en_us_009", # English US - Male 3 + "en_us_010", ] -voices['non_eng'] = [ - 'fr_001', # French - Male 1 - 'fr_002', # French - Male 2 - 'de_001', # German - Female - 'de_002', # German - Male - 'es_002', # Spanish - Male +voices["non_eng"] = [ + "fr_001", # French - Male 1 + "fr_002", # French - Male 2 + "de_001", # German - Female + "de_002", # German - Male + "es_002", # Spanish - Male # AMERICA VOICES - 'es_mx_002', # Spanish MX - Male - 'br_001', # Portuguese BR - Female 1 - 'br_003', # Portuguese BR - Female 2 - 'br_004', # Portuguese BR - Female 3 - 'br_005', # Portuguese BR - Male + "es_mx_002", # Spanish MX - Male + "br_001", # Portuguese BR - Female 1 + "br_003", # Portuguese BR - Female 2 + "br_004", # Portuguese BR - Female 3 + "br_005", # Portuguese BR - Male # ASIA VOICES - 'id_001', # Indonesian - Female - 'jp_001', # Japanese - Female 1 - 'jp_003', # Japanese - Female 2 - 'jp_005', # Japanese - Female 3 - 'jp_006', # Japanese - Male - 'kr_002', # Korean - Male 1 - 'kr_003', # Korean - Female - 'kr_004', # Korean - Male 2 + "id_001", # Indonesian - Female + "jp_001", # Japanese - Female 1 + "jp_003", # Japanese - Female 2 + "jp_005", # Japanese - Female 3 + "jp_006", # Japanese - Male + "kr_002", # Korean - Male 1 + "kr_003", # Korean - Female + "kr_004", # Korean - Male 2 ] @@ -69,7 +69,7 @@ class TikTok(BaseApiTTS): # TikTok Text-to-Speech Wrapper validator=instance_of(bool), default=False ) - uri_base: str = 'https://api16-normal-useast5.us.tiktokv.com/media/api/text/speech/invoke/' + uri_base: str = "https://api16-normal-useast5.us.tiktokv.com/media/api/text/speech/invoke/" max_chars: int = 300 decode_base64: bool = True @@ -87,28 +87,28 @@ class TikTok(BaseApiTTS): # TikTok Text-to-Speech Wrapper Request's response """ voice = ( - get_random_voice(voices, 'human') + get_random_voice(voices, "human") if self.random_voice - else str(settings.config['settings']['tts']['tiktok_voice']).lower() - if str(settings.config['settings']['tts']['tiktok_voice']).lower() in [ + else str(settings.config["settings"]["tts"]["tiktok_voice"]).lower() + if str(settings.config["settings"]["tts"]["tiktok_voice"]).lower() in [ voice.lower() for dict_title in voices for voice in voices[dict_title]] - else get_random_voice(voices, 'human') + else get_random_voice(voices, "human") ) try: r = requests.post( self.uri_base, params={ - 'text_speaker': voice, - 'req_text': text, - 'speaker_map_type': 0, + "text_speaker": voice, + "req_text": text, + "speaker_map_type": 0, }) except requests.exceptions.SSLError: # https://stackoverflow.com/a/47475019/18516611 session = requests.Session() retry = Retry(connect=3, backoff_factor=0.5) adapter = HTTPAdapter(max_retries=retry) - session.mount('http://', adapter) - session.mount('https://', adapter) - r = session.post(f'{self.uri_base}{voice}&req_text={text}&speaker_map_type=0') + session.mount("http://", adapter) + session.mount("https://", adapter) + r = session.post(f"{self.uri_base}{voice}&req_text={text}&speaker_map_type=0") # print(r.text) - return r.json()['data']['v_str'] + return r.json()["data"]["v_str"] diff --git a/TTS/aws_polly.py b/TTS/aws_polly.py index f63ce61..1a9c87b 100644 --- a/TTS/aws_polly.py +++ b/TTS/aws_polly.py @@ -11,21 +11,21 @@ from TTS.common import get_random_voice voices = [ - 'Brian', - 'Emma', - 'Russell', - 'Joey', - 'Matthew', - 'Joanna', - 'Kimberly', - 'Amy', - 'Geraint', - 'Nicole', - 'Justin', - 'Ivy', - 'Kendra', - 'Salli', - 'Raveena', + "Brian", + "Emma", + "Russell", + "Joey", + "Matthew", + "Joanna", + "Kimberly", + "Amy", + "Geraint", + "Nicole", + "Justin", + "Ivy", + "Kendra", + "Salli", + "Raveena", ] @@ -50,20 +50,20 @@ class AWSPolly: filepath: name of the audio file """ try: - session = Session(profile_name='polly') - polly = session.client('polly') + session = Session(profile_name="polly") + polly = session.client("polly") voice = ( get_random_voice(voices) if self.random_voice - else str(settings.config['settings']['tts']['aws_polly_voice']).capitalize() - if str(settings.config['settings']['tts']['aws_polly_voice']).lower() in [voice.lower() for voice in + else str(settings.config["settings"]["tts"]["aws_polly_voice"]).capitalize() + if str(settings.config["settings"]["tts"]["aws_polly_voice"]).lower() in [voice.lower() for voice in voices] else get_random_voice(voices) ) try: # Request speech synthesis response = polly.synthesize_speech( - Text=text, OutputFormat='mp3', VoiceId=voice, Engine='neural' + Text=text, OutputFormat="mp3", VoiceId=voice, Engine="neural" ) except (BotoCoreError, ClientError) as error: # The service returned an error, exit gracefully @@ -71,15 +71,15 @@ class AWSPolly: sys.exit(-1) # Access the audio stream from the response - if 'AudioStream' in response: - file = open(filepath, 'wb') - file.write(response['AudioStream'].read()) + if "AudioStream" in response: + file = open(filepath, "wb") + file.write(response["AudioStream"].read()) file.close() # print_substep(f"Saved Text {idx} to MP3 files successfully.", style="bold green") else: # The response didn't contain audio data, exit gracefully - print('Could not stream audio') + print("Could not stream audio") sys.exit(-1) except ProfileNotFound: print("You need to install the AWS CLI and configure your profile") diff --git a/TTS/common.py b/TTS/common.py index 5e6f629..f355a89 100644 --- a/TTS/common.py +++ b/TTS/common.py @@ -23,18 +23,16 @@ class BaseApiTTS: Split text as a list """ # Split by comma or dot (else you can lose intonations), if there is non, split by groups of 299 chars - split_text = '' - split_text = list( - map(lambda x: x.strip() if x.strip()[-1] != '.' else x.strip()[:-1], - filter(lambda x: True if x else False, text.split('.'))) + map(lambda x: x.strip() if x.strip()[-1] != "." else x.strip()[:-1], + filter(lambda x: True if x else False, text.split("."))) ) if split_text and all([chunk.__len__() < max_length for chunk in split_text]): return split_text split_text = list( - map(lambda x: x.strip() if x.strip()[-1] != ',' else x.strip()[:-1], - filter(lambda x: True if x else False, text.split(',')) + map(lambda x: x.strip() if x.strip()[-1] != "," else x.strip()[:-1], + filter(lambda x: True if x else False, text.split(",")) ) ) if split_text and all([chunk.__len__() < max_length for chunk in split_text]): @@ -42,7 +40,7 @@ class BaseApiTTS: return list( map( - lambda x: x.strip() if x.strip()[-1] != '.' or x.strip()[-1] != ',' else x.strip()[:-1], + lambda x: x.strip() if x.strip()[-1] != "." or x.strip()[-1] != "," else x.strip()[:-1], filter( lambda x: True if x else False, [text[i:i + max_length] for i in range(0, len(text), max_length)] @@ -64,7 +62,7 @@ class BaseApiTTS: """ decoded_text = base64.b64decode(output_text) if self.decode_base64 else output_text - with open(filepath, 'wb') as out: + with open(filepath, "wb") as out: out.write(decoded_text) def run( @@ -82,7 +80,7 @@ class BaseApiTTS: Returns: """ - output_text = '' + output_text = "" if len(text) > self.max_chars: for part in self.text_len_sanitize(text, self.max_chars): if part: @@ -135,9 +133,9 @@ def audio_length( except Exception as e: import logging - logger = logging.getLogger('tts_logger') + logger = logging.getLogger("tts_logger") logger.setLevel(logging.ERROR) - handler = logging.FileHandler('.tts.log', mode='a+', encoding='utf-8') + handler = logging.FileHandler(".tts.log", mode="a+", encoding="utf-8") logger.addHandler(handler) - logger.error('Error occurred in audio_length:', e) + logger.error("Error occurred in audio_length:", e) return 0 diff --git a/TTS/engine_wrapper.py b/TTS/engine_wrapper.py index 2a0986b..af45d38 100644 --- a/TTS/engine_wrapper.py +++ b/TTS/engine_wrapper.py @@ -30,19 +30,19 @@ class TTSEngine: """ tts_module: Union[GTTS, StreamlabsPolly, TikTok, AWSPolly] = attrib() reddit_object: dict = attrib() - __path: str = 'assets/temp/mp3' + __path: str = "assets/temp/mp3" __total_length: int = 0 def __attrs_post_init__(self): # Calls an instance of the tts_module class self.tts_module = self.tts_module() # Loading settings from the config - self.max_length: int = settings.config['settings']['video_length'] - self.time_before_tts: float = settings.config['settings']['time_before_tts'] - self.time_between_pictures: float = settings.config['settings']['time_between_pictures'] + self.max_length: int = settings.config["settings"]["video_length"] + self.time_before_tts: float = settings.config["settings"]["time_before_tts"] + self.time_between_pictures: float = settings.config["settings"]["time_between_pictures"] self.__total_length = ( - settings.config['settings']['time_before_first_picture'] + - settings.config['settings']['delay_before_end'] + settings.config["settings"]["time_before_first_picture"] + + settings.config["settings"]["delay_before_end"] ) def run( @@ -59,30 +59,31 @@ class TTSEngine: # This file needs to be removed in case this post does not use post text # so that it won't appear in the final video try: - Path(f'{self.__path}/posttext.mp3').unlink() + Path(f"{self.__path}/posttext.mp3").unlink() except OSError: pass - print_step('Saving Text to MP3 files...') + print_step("Saving Text to MP3 files...") - self.call_tts('title', self.reddit_object['thread_title']) + self.call_tts("title", self.reddit_object["thread_title"]) - if self.reddit_object['thread_post'] and settings.config['settings']['storymode']: - self.call_tts('posttext', self.reddit_object['thread_post']) + if self.reddit_object["thread_post"] and settings.config["settings"]["storymode"]: + self.call_tts("posttext", self.reddit_object["thread_post"]) sync_tasks_primary = [ - self.call_tts(str(idx), comment['comment_body']) + self.call_tts(str(idx), comment["comment_body"]) for idx, comment in track( - enumerate(self.reddit_object['comments']), - description='Saving...') + enumerate(self.reddit_object["comments"]), + description="Saving...", + total=self.reddit_object["comments"].__len__()) # Crunch, there will be fix in async TTS api, maybe if self.__total_length + self.__total_length * 0.05 < self.max_length ] - print_substep('Saved Text to MP3 files successfully.', style='bold green') + print_substep("Saved Text to MP3 files successfully.", style="bold green") return [ comments for comments, condition in - zip(range(self.reddit_object['comments'].__len__()), sync_tasks_primary) + zip(range(self.reddit_object["comments"].__len__()), sync_tasks_primary) if condition ] @@ -106,10 +107,10 @@ class TTSEngine: self.tts_module.run( text=self.process_text(text), - filepath=f'{self.__path}/{filename}.mp3' + filepath=f"{self.__path}/{filename}.mp3" ) - clip_length = audio_length(f'{self.__path}/{filename}.mp3') + clip_length = audio_length(f"{self.__path}/{filename}.mp3") clip_offset = self.time_between_pictures + self.time_before_tts * 2 if clip_length and self.__total_length + clip_length + clip_offset <= self.max_length: @@ -130,10 +131,10 @@ class TTSEngine: Returns: Processed text as a str """ - lang = settings.config['reddit']['thread']['post_lang'] + lang = settings.config["reddit"]["thread"]["post_lang"] new_text = sanitize_text(text) if lang: - print_substep('Translating Text...') + print_substep("Translating Text...") translated_text = ts.google(text, to_language=lang) new_text = sanitize_text(translated_text) return new_text diff --git a/TTS/streamlabs_polly.py b/TTS/streamlabs_polly.py index 7d2ca80..a0b7e19 100644 --- a/TTS/streamlabs_polly.py +++ b/TTS/streamlabs_polly.py @@ -8,21 +8,21 @@ from TTS.common import BaseApiTTS, get_random_voice from utils.voice import check_ratelimit voices = [ - 'Brian', - 'Emma', - 'Russell', - 'Joey', - 'Matthew', - 'Joanna', - 'Kimberly', - 'Amy', - 'Geraint', - 'Nicole', - 'Justin', - 'Ivy', - 'Kendra', - 'Salli', - 'Raveena', + "Brian", + "Emma", + "Russell", + "Joey", + "Matthew", + "Joanna", + "Kimberly", + "Amy", + "Geraint", + "Nicole", + "Justin", + "Ivy", + "Kendra", + "Salli", + "Raveena", ] @@ -35,7 +35,7 @@ class StreamlabsPolly(BaseApiTTS): validator=instance_of(bool), default=False ) - url: str = 'https://streamlabs.com/polly/speak', + url: str = "https://streamlabs.com/polly/speak" max_chars: int = 550 def make_request( @@ -54,27 +54,27 @@ class StreamlabsPolly(BaseApiTTS): voice = ( get_random_voice(voices) if self.random_voice - else str(settings.config['settings']['tts']['streamlabs_polly_voice']).capitalize() - if str(settings.config['settings']['tts']['streamlabs_polly_voice']).lower() in [ + else str(settings.config["settings"]["tts"]["streamlabs_polly_voice"]).capitalize() + if str(settings.config["settings"]["tts"]["streamlabs_polly_voice"]).lower() in [ voice.lower() for voice in voices] else get_random_voice(voices) ) response = requests.post( self.url, data={ - 'voice': voice, - 'text': text, - 'service': 'polly', + "voice": voice, + "text": text, + "service": "polly", }) if not check_ratelimit(response): return self.make_request(text) else: try: - results = requests.get(response.json()['speak_url']) + results = requests.get(response.json()["speak_url"]) return results except (KeyError, JSONDecodeError): try: - if response.json()['error'] == 'No text specified!': - raise ValueError('Please specify a text to convert to speech.') + if response.json()["error"] == "No text specified!": + raise ValueError("Please specify a text to convert to speech.") except (KeyError, JSONDecodeError): - print('Error occurred calling Streamlabs Polly') + print("Error occurred calling Streamlabs Polly") diff --git a/reddit/subreddit.py b/reddit/subreddit.py index 651e9a1..50c1fb9 100644 --- a/reddit/subreddit.py +++ b/reddit/subreddit.py @@ -89,7 +89,7 @@ def get_subreddit_threads( content["thread_title"] = submission.title content["thread_post"] = submission.selftext content["thread_id"] = submission.id - content["is_nsfw"] = 'nsfw' in submission.whitelist_status + content["is_nsfw"] = "nsfw" in submission.whitelist_status content["comments"] = [] for top_level_comment in submission.comments: diff --git a/utils/subreddit.py b/utils/subreddit.py index 24f3956..0a6b1e6 100644 --- a/utils/subreddit.py +++ b/utils/subreddit.py @@ -37,7 +37,7 @@ def get_subreddit_undone(submissions: list, subreddit, times_checked=0): continue if submission.num_comments < int(settings.config["reddit"]["thread"]["min_comments"]): print_substep( - 'This post has under the specified minimum of comments' + "This post has under the specified minimum of comments" f'({settings.config["reddit"]["thread"]["min_comments"]}). Skipping...' ) continue diff --git a/video_creation/final_video.py b/video_creation/final_video.py index f6e21c3..80f4282 100755 --- a/video_creation/final_video.py +++ b/video_creation/final_video.py @@ -30,18 +30,18 @@ def name_normalize( name: str ) -> str: name = re.sub(r'[?\\"%*:|<>]', "", name) - name = re.sub(r'( [w,W]\s?\/\s?[o,O,0])', r' without', name) - name = re.sub(r'( [w,W]\s?\/)', r' with', name) - name = re.sub(r'(\d+)\s?\/\s?(\d+)', r'\1 of \2', name) - name = re.sub(r'(\w+)\s?\/\s?(\w+)', r'\1 or \2', name) - name = re.sub(r'\/', '', name) + name = re.sub(r"( [w,W]\s?\/\s?[o,O,0])", r" without", name) + name = re.sub(r"( [w,W]\s?\/)", r" with", name) + name = re.sub(r"(\d+)\s?\/\s?(\d+)", r"\1 of \2", name) + name = re.sub(r"(\w+)\s?\/\s?(\w+)", r"\1 or \2", name) + name = re.sub(r"\/", "", name) # name[:30] # the hell this little guy does? commented until explained - lang = settings.config['reddit']['thread']['post_lang'] + lang = settings.config["reddit"]["thread"]["post_lang"] if lang: import translators as ts - print_substep('Translating filename...') + print_substep("Translating filename...") translated_name = ts.google(name, to_language=lang) return translated_name return name @@ -60,29 +60,29 @@ def make_final_video( reddit_obj (dict): The reddit object that contains the posts to read. background_config (Tuple[str, str, str, Any]): The background config to use. """ - W: int = int(settings.config['settings']['video_width']) - H: int = int(settings.config['settings']['video_height']) + W: int = int(settings.config["settings"]["video_width"]) + H: int = int(settings.config["settings"]["video_height"]) if not W or not H: W, H = 1080, 1920 - max_length: int = int(settings.config['settings']['video_length']) - time_before_first_picture: float = settings.config['settings']['time_before_first_picture'] - time_before_tts: float = settings.config['settings']['time_before_tts'] - time_between_pictures: float = settings.config['settings']['time_between_pictures'] - delay_before_end: float = settings.config['settings']['delay_before_end'] + max_length: int = int(settings.config["settings"]["video_length"]) + time_before_first_picture: float = settings.config["settings"]["time_before_first_picture"] + time_before_tts: float = settings.config["settings"]["time_before_tts"] + time_between_pictures: float = settings.config["settings"]["time_between_pictures"] + delay_before_end: float = settings.config["settings"]["delay_before_end"] - print_step('Creating the final video 🎥') + print_step("Creating the final video 🎥") VideoFileClip.reW = lambda clip: clip.resize(width=W) VideoFileClip.reH = lambda clip: clip.resize(width=H) - opacity = settings.config['settings']['opacity'] / 100 + opacity = settings.config["settings"]["opacity"] / 100 def create_audio_clip( clip_title: Union[str, int], clip_start: float, ) -> 'AudioFileClip': return ( - AudioFileClip(f'assets/temp/mp3/{clip_title}.mp3') + AudioFileClip(f"assets/temp/mp3/{clip_title}.mp3") .set_start(clip_start) ) @@ -93,7 +93,7 @@ def make_final_video( correct_audio_offset = time_before_tts * 2 + time_between_pictures audio_title = create_audio_clip( - 'title', + "title", time_before_first_picture + time_before_tts, ) video_duration += audio_title.duration + time_before_first_picture + time_before_tts @@ -102,7 +102,8 @@ def make_final_video( for audio_title in track( indexes_of_clips, - description='Gathering audio clips...', + description="Gathering audio clips...", + total=indexes_of_clips.__len__() ): temp_audio_clip = create_audio_clip( audio_title, @@ -118,7 +119,7 @@ def make_final_video( # Can't use concatenate_audioclips here, it resets clips' start point audio_composite = CompositeAudioClip(audio_clips) - console.log('[bold green] Video Will Be: %.2f Seconds Long' % video_duration) + console.log("[bold green] Video Will Be: %.2f Seconds Long" % video_duration) # Gather all images new_opacity = 1 if opacity is None or opacity >= 1 else opacity @@ -129,7 +130,7 @@ def make_final_video( audio_duration: float, ) -> 'ImageClip': return ( - ImageClip(f'assets/temp/png/{image_title}.png') + ImageClip(f"assets/temp/png/{image_title}.png") .set_start(audio_start - time_before_tts) .set_duration(time_before_tts * 2 + audio_duration) .set_opacity(new_opacity) @@ -144,19 +145,23 @@ def make_final_video( image_clips.append( create_image_clip( - 'title', + "title", audio_clips[0].start, audio_clips[0].duration ) ) - for idx, photo_idx in enumerate( - indexes_for_videos, - start=index_offset, + for idx, photo_idx in track( + enumerate( + indexes_for_videos, + start=index_offset, + ), + description="Gathering audio clips...", + total=indexes_for_videos[index_offset:].__len__() ): image_clips.append( create_image_clip( - f'comment_{photo_idx}', + f"comment_{photo_idx}", audio_clips[idx].start, audio_clips[idx].duration ) @@ -174,12 +179,13 @@ def make_final_video( # else: story mode stuff # Can't use concatenate_videoclips here, it resets clips' start point - image_concat = CompositeVideoClip(image_clips).set_position(background_config[3]) + image_concat = CompositeVideoClip(image_clips) + image_concat.set_position(background_config[3]) download_background(background_config) chop_background_video(background_config, video_duration) background_clip = ( - VideoFileClip('assets/temp/background.mp4') + VideoFileClip("assets/temp/background.mp4") .set_start(0) .set_end(video_duration) .without_audio() @@ -212,15 +218,15 @@ def make_final_video( final = CompositeVideoClip([background_clip, image_concat]) final.audio = audio_composite - title = re.sub(r'[^\w\s-]', '', reddit_obj['thread_title']) - idx = re.sub(r'[^\w\s-]', '', reddit_obj['thread_id']) + title = re.sub(r"[^\w\s-]", "", reddit_obj["thread_title"]) + idx = re.sub(r"[^\w\s-]", "", reddit_obj["thread_id"]) - filename = f'{name_normalize(title)}.mp4' - subreddit = str(settings.config['reddit']['thread']['subreddit']) + filename = f"{name_normalize(title)}.mp4" + subreddit = str(settings.config["reddit"]["thread"]["subreddit"]) - if not exists(f'./results/{subreddit}'): - print_substep('The results folder didn\'t exist so I made it') - os.makedirs(f'./results/{subreddit}') + if not exists(f"./results/{subreddit}"): + print_substep("The results folder didn't exist so I made it") + os.makedirs(f"./results/{subreddit}") # if settings.config["settings"]['background']["background_audio"] and exists(f"assets/backgrounds/background.mp3"): # audioclip = mpe.AudioFileClip(f"assets/backgrounds/background.mp3").set_duration(final.duration) @@ -235,10 +241,10 @@ def make_final_video( ) final.write_videofile( - 'assets/temp/temp.mp4', + "assets/temp/temp.mp4", fps=30, - audio_codec='aac', - audio_bitrate='192k', + audio_codec="aac", + audio_bitrate="192k", verbose=False, threads=multiprocessing.cpu_count(), ) @@ -246,13 +252,13 @@ def make_final_video( "assets/temp/temp.mp4", 0, video_duration, - targetname=f'results/{subreddit}/{filename}', + targetname=f"results/{subreddit}/{filename}", ) save_data(subreddit, filename, title, idx, background_config[2]) - print_step('Removing temporary files 🗑') + print_step("Removing temporary files 🗑") cleanups = cleanup() - print_substep(f'Removed {cleanups} temporary files 🗑') - print_substep('See result in the results folder!') + print_substep(f"Removed {cleanups} temporary files 🗑") + print_substep("See result in the results folder!") print_step( f'Reddit title: {reddit_obj["thread_title"]} \n Background Credit: {background_config[2]}' diff --git a/video_creation/screenshot_downloader.py b/video_creation/screenshot_downloader.py index de7a43d..0ec7e4e 100644 --- a/video_creation/screenshot_downloader.py +++ b/video_creation/screenshot_downloader.py @@ -18,8 +18,8 @@ from attr import attrs, attrib from attr.validators import instance_of, optional from typing import TypeVar, Optional, Callable, Union -_function = TypeVar('_function', bound=Callable[..., object]) -_exceptions = TypeVar('_exceptions', bound=Optional[Union[type, tuple, list]]) +_function = TypeVar("_function", bound=Callable[..., object]) +_exceptions = TypeVar("_exceptions", bound=Optional[Union[type, tuple, list]]) @attrs @@ -45,17 +45,17 @@ class ExceptionDecorator: except Exception as caughtException: import logging - logger = logging.getLogger('webdriver_log') + logger = logging.getLogger("webdriver_log") logger.setLevel(logging.ERROR) - handler = logging.FileHandler('.webdriver.log', mode='a+', encoding='utf-8') + handler = logging.FileHandler(".webdriver.log", mode="a+", encoding="utf-8") logger.addHandler(handler) if isinstance(self.exception, type): if not type(caughtException) == self.exception: - logger.error(f'unexpected error - {caughtException}') + logger.error(f"unexpected error - {caughtException}") else: if not type(caughtException) in self.exception: - logger.error(f'unexpected error - {caughtException}') + logger.error(f"unexpected error - {caughtException}") return wrapper @@ -89,9 +89,9 @@ class Browser: default_Viewport: dict = attrib( validator=instance_of(dict), default={ - 'defaultViewport': { - 'width': 500, - 'height': 1200, + "defaultViewport": { + "width": 500, + "height": 1200, }, }, kw_only=True, @@ -230,28 +230,28 @@ class RedditScreenshot(Browser, Wait): await self.click( page_instance, - '//*[contains(@class, \'header-user-dropdown\')]', - {'timeout': 5000}, + "//*[contains(@class, 'header-user-dropdown')]", + {"timeout": 5000}, ) # It's normal not to find it, sometimes there is none :shrug: await self.click( page_instance, - '//*[contains(text(), \'Settings\')]/ancestor::button[1]', - {'timeout': 5000}, + "//*[contains(text(), 'Settings')]/ancestor::button[1]", + {"timeout": 5000}, ) await self.click( page_instance, - '//*[contains(text(), \'Dark Mode\')]/ancestor::button[1]', - {'timeout': 5000}, + "//*[contains(text(), 'Dark Mode')]/ancestor::button[1]", + {"timeout": 5000}, ) # Closes settings await self.click( page_instance, - '//*[contains(@class, \'header-user-dropdown\')]', - {'timeout': 5000}, + "//*[contains(@class, 'header-user-dropdown')]", + {"timeout": 5000}, ) async def __close_nsfw( @@ -260,7 +260,7 @@ class RedditScreenshot(Browser, Wait): ) -> None: from asyncio import ensure_future - print_substep('Post is NSFW. You are spicy...') + print_substep("Post is NSFW. You are spicy...") # To await indirectly reload navigation = ensure_future(page_instance.waitForNavigation()) @@ -268,7 +268,7 @@ class RedditScreenshot(Browser, Wait): await self.click( page_instance, '//button[text()="Yes"]', - {'timeout': 5000}, + {"timeout": 5000}, ) # Await reload @@ -277,7 +277,7 @@ class RedditScreenshot(Browser, Wait): await (await self.find_xpath( page_instance, '//button[text()="Click to see nsfw"]', - {'timeout': 5000}, + {"timeout": 5000}, )).click() async def __collect_comment( @@ -296,10 +296,10 @@ class RedditScreenshot(Browser, Wait): await comment_page.goto(f'https://reddit.com{comment_obj["comment_url"]}') # Translates submission' comment - if settings.config['reddit']['thread']['post_lang']: + if settings.config["reddit"]["thread"]["post_lang"]: comment_tl = ts.google( - comment_obj['comment_body'], - to_language=settings.config['reddit']['thread']['post_lang'], + comment_obj["comment_body"], + to_language=settings.config["reddit"]["thread"]["post_lang"], ) await comment_page.evaluate( f'([tl_content, tl_id]) => document.querySelector(`#t1_{comment_obj["comment_id"]} > div:nth-child(2) ' @@ -309,7 +309,7 @@ class RedditScreenshot(Browser, Wait): await self.screenshot( comment_page, f'//*[contains(@id, \'t1_{comment_obj["comment_id"]}\')]', - {'path': f'assets/temp/png/comment_{filename_idx}.png'}, + {"path": f"assets/temp/png/comment_{filename_idx}.png"}, ) async def download( @@ -318,31 +318,31 @@ class RedditScreenshot(Browser, Wait): """ Downloads screenshots of reddit posts as seen on the web. Downloads to assets/temp/png """ - print_step('Downloading screenshots of reddit posts...') + print_step("Downloading screenshots of reddit posts...") - print_substep('Launching Headless Browser...') + print_substep("Launching Headless Browser...") await self.get_browser() # ! Make sure the reddit screenshots folder exists - Path('assets/temp/png').mkdir(parents=True, exist_ok=True) + Path("assets/temp/png").mkdir(parents=True, exist_ok=True) # Get the thread screenshot reddit_main = await self.browser.newPage() - await reddit_main.goto(self.reddit_object['thread_url']) + await reddit_main.goto(self.reddit_object["thread_url"]) - if settings.config['settings']['theme'] == 'dark': + if settings.config["settings"]["theme"] == "dark": await self.__dark_theme(reddit_main) - if self.reddit_object['is_nsfw']: + if self.reddit_object["is_nsfw"]: # This means the post is NSFW and requires to click the proceed button. await self.__close_nsfw(reddit_main) # Translates submission title - if settings.config['reddit']['thread']['post_lang']: - print_substep('Translating post...') + if settings.config["reddit"]["thread"]["post_lang"]: + print_substep("Translating post...") texts_in_tl = ts.google( - self.reddit_object['thread_title'], - to_language=settings.config['reddit']['thread']['post_lang'], + self.reddit_object["thread_title"], + to_language=settings.config["reddit"]["thread"]["post_lang"], ) await reddit_main.evaluate( @@ -351,10 +351,10 @@ class RedditScreenshot(Browser, Wait): texts_in_tl, ) else: - print_substep('Skipping translation...') + print_substep("Skipping translation...") async_tasks_primary = [ - self.__collect_comment(self.reddit_object['comments'][idx], idx) for idx in + self.__collect_comment(self.reddit_object["comments"][idx], idx) for idx in self.screenshot_idx ] @@ -362,7 +362,7 @@ class RedditScreenshot(Browser, Wait): self.screenshot( reddit_main, f'//*[contains(@id, \'t3_{self.reddit_object["thread_id"]}\')]', - {'path': f'assets/temp/png/title.png'}, + {"path": "assets/temp/png/title.png"}, ) ) @@ -380,9 +380,10 @@ class RedditScreenshot(Browser, Wait): chunk_list = async_tasks_primary.__len__() // 10 + (1 if async_tasks_primary.__len__() % 10 != 0 else 0) for task in track( as_completed(chunked_tasks), - description=f'Downloading comments: Chunk {idx}/{chunk_list}', + description=f"Downloading comments: Chunk {idx}/{chunk_list}", + total=chunked_tasks.__len__(), ): await task - print_substep('Comments downloaded Successfully.', style='bold green') + print_substep("Comments downloaded Successfully.", style="bold green") await self.close_browser() diff --git a/video_creation/voices.py b/video_creation/voices.py index 7d78e5f..b372042 100644 --- a/video_creation/voices.py +++ b/video_creation/voices.py @@ -9,10 +9,10 @@ from utils.console import print_table, print_step TTSProviders = { - 'GoogleTranslate': GTTS, - 'AWSPolly': AWSPolly, - 'StreamlabsPolly': StreamlabsPolly, - 'TikTok': TikTok, + "GoogleTranslate": GTTS, + "AWSPolly": AWSPolly, + "StreamlabsPolly": StreamlabsPolly, + "TikTok": TikTok, } @@ -28,15 +28,15 @@ def save_text_to_mp3( The number of comments audio was generated for """ - voice = settings.config['settings']['tts']['choice'] + voice = settings.config["settings"]["tts"]["choice"] if voice.casefold() not in map(lambda _: _.casefold(), TTSProviders): while True: - print_step('Please choose one of the following TTS providers: ') + print_step("Please choose one of the following TTS providers: ") print_table(TTSProviders) - voice = input('\n') + voice = input("\n") if voice.casefold() in map(lambda _: _.casefold(), TTSProviders): break - print('Unknown Choice') + print("Unknown Choice") engine_instance = TTSEngine(get_case_insensitive_key_value(TTSProviders, voice), reddit_obj) return engine_instance.run() From a01da8a8bf13f3243973cefbb1af8eb3436d8ad7 Mon Sep 17 00:00:00 2001 From: Drugsosos <44712637+Drugsosos@users.noreply.github.com> Date: Sun, 17 Jul 2022 23:26:02 +0300 Subject: [PATCH 26/39] refactor in final_video + fixes --- main.py | 4 +- video_creation/final_video.py | 443 ++++++++++++++++++---------------- 2 files changed, 233 insertions(+), 214 deletions(-) diff --git a/main.py b/main.py index eae5cb2..d0d21b9 100755 --- a/main.py +++ b/main.py @@ -13,7 +13,7 @@ from utils import settings from video_creation.background import ( get_background_config, ) -from video_creation.final_video import make_final_video +from video_creation.final_video import FinalVideo from video_creation.screenshot_downloader import RedditScreenshot from video_creation.voices import save_text_to_mp3 @@ -45,7 +45,7 @@ async def main( comments_created = save_text_to_mp3(reddit_object) await RedditScreenshot(reddit_object, comments_created).download() bg_config = get_background_config() - make_final_video(comments_created, reddit_object, bg_config) + FinalVideo().make(comments_created, reddit_object, bg_config) async def run_many(times): diff --git a/video_creation/final_video.py b/video_creation/final_video.py index 80f4282..6621e5b 100755 --- a/video_creation/final_video.py +++ b/video_creation/final_video.py @@ -15,6 +15,7 @@ from moviepy.editor import ( from moviepy.video.io.ffmpeg_tools import ffmpeg_extract_subclip from rich.console import Console from rich.progress import track +from attr import attrs from utils.cleanup import cleanup from utils.console import print_step, print_substep @@ -23,243 +24,261 @@ from utils.videos import save_data from utils import settings from video_creation.background import download_background, chop_background_video -console = Console() - - -def name_normalize( - name: str -) -> str: - name = re.sub(r'[?\\"%*:|<>]', "", name) - name = re.sub(r"( [w,W]\s?\/\s?[o,O,0])", r" without", name) - name = re.sub(r"( [w,W]\s?\/)", r" with", name) - name = re.sub(r"(\d+)\s?\/\s?(\d+)", r"\1 of \2", name) - name = re.sub(r"(\w+)\s?\/\s?(\w+)", r"\1 or \2", name) - name = re.sub(r"\/", "", name) - # name[:30] # the hell this little guy does? commented until explained - - lang = settings.config["reddit"]["thread"]["post_lang"] - if lang: - import translators as ts - - print_substep("Translating filename...") - translated_name = ts.google(name, to_language=lang) - return translated_name - return name - - -def make_final_video( - indexes_of_clips: list, - reddit_obj: dict, - background_config: Tuple[str, str, str, Any], -) -> None: - """ - Gathers audio clips, gathers all screenshots, stitches them together and saves the final video to assets/temp - - Args: - indexes_of_clips (list): Indexes of voiced comments - reddit_obj (dict): The reddit object that contains the posts to read. - background_config (Tuple[str, str, str, Any]): The background config to use. - """ - W: int = int(settings.config["settings"]["video_width"]) - H: int = int(settings.config["settings"]["video_height"]) - - if not W or not H: - W, H = 1080, 1920 - - max_length: int = int(settings.config["settings"]["video_length"]) - time_before_first_picture: float = settings.config["settings"]["time_before_first_picture"] - time_before_tts: float = settings.config["settings"]["time_before_tts"] - time_between_pictures: float = settings.config["settings"]["time_between_pictures"] - delay_before_end: float = settings.config["settings"]["delay_before_end"] - - print_step("Creating the final video 🎥") - VideoFileClip.reW = lambda clip: clip.resize(width=W) - VideoFileClip.reH = lambda clip: clip.resize(width=H) - opacity = settings.config["settings"]["opacity"] / 100 +@attrs +class FinalVideo: + video_duration: int = 0 + console = Console() + + def __attrs_post_init__(self): + self.W: int = int(settings.config["settings"]["video_width"]) + self.H: int = int(settings.config["settings"]["video_height"]) + + if not self.W or not self.H: + self.W, self.H = 1080, 1920 + + self.vertical_video: bool = self.W < self.H + + self.max_length: int = int(settings.config["settings"]["video_length"]) + self.time_before_first_picture: float = settings.config["settings"]["time_before_first_picture"] + self.time_before_tts: float = settings.config["settings"]["time_before_tts"] + self.time_between_pictures: float = settings.config["settings"]["time_between_pictures"] + self.delay_before_end: float = settings.config["settings"]["delay_before_end"] + + self.opacity = settings.config["settings"]["opacity"] + self.opacity = 1 if self.opacity is None or self.opacity >= 1 else self.opacity + + @staticmethod + def name_normalize( + name: str + ) -> str: + name = re.sub(r'[?\\"%*:|<>]', "", name) + name = re.sub(r"( [w,W]\s?\/\s?[o,O,0])", r" without", name) + name = re.sub(r"( [w,W]\s?\/)", r" with", name) + name = re.sub(r"(\d+)\s?\/\s?(\d+)", r"\1 of \2", name) + name = re.sub(r"(\w+)\s?\/\s?(\w+)", r"\1 or \2", name) + name = re.sub(r"\/", "", name) + # name[:30] # the hell this little guy does? commented until explained + + lang = settings.config["reddit"]["thread"]["post_lang"] + if lang: + import translators as ts + + print_substep("Translating filename...") + translated_name = ts.google(name, to_language=lang) + return translated_name + return name + + @staticmethod def create_audio_clip( clip_title: Union[str, int], clip_start: float, - ) -> 'AudioFileClip': + ) -> AudioFileClip: return ( AudioFileClip(f"assets/temp/mp3/{clip_title}.mp3") .set_start(clip_start) ) - video_duration = 0 - - # Gather all audio clips - audio_clips = list() - correct_audio_offset = time_before_tts * 2 + time_between_pictures - - audio_title = create_audio_clip( - "title", - time_before_first_picture + time_before_tts, - ) - video_duration += audio_title.duration + time_before_first_picture + time_before_tts - audio_clips.append(audio_title) - indexes_for_videos = list() - - for audio_title in track( - indexes_of_clips, - description="Gathering audio clips...", - total=indexes_of_clips.__len__() - ): - temp_audio_clip = create_audio_clip( - audio_title, - correct_audio_offset + video_duration, - ) - if video_duration + temp_audio_clip.duration + correct_audio_offset + delay_before_end <= max_length: - video_duration += temp_audio_clip.duration + correct_audio_offset - audio_clips.append(temp_audio_clip) - indexes_for_videos.append(audio_title) - - video_duration += delay_before_end + time_before_tts - - # Can't use concatenate_audioclips here, it resets clips' start point - audio_composite = CompositeAudioClip(audio_clips) - - console.log("[bold green] Video Will Be: %.2f Seconds Long" % video_duration) - - # Gather all images - new_opacity = 1 if opacity is None or opacity >= 1 else opacity - def create_image_clip( + self, image_title: Union[str, int], audio_start: float, audio_duration: float, - ) -> 'ImageClip': + clip_position: str, + ) -> ImageClip: return ( ImageClip(f"assets/temp/png/{image_title}.png") - .set_start(audio_start - time_before_tts) - .set_duration(time_before_tts * 2 + audio_duration) - .set_opacity(new_opacity) - .resize(width=W - 100) + .set_start(audio_start - self.time_before_tts) + .set_duration(self.time_before_tts * 2 + audio_duration) + .set_opacity(self.opacity) + .set_position(clip_position) + .resize( + width=self.W - self.W / 20 if self.vertical_video else None, + height=self.H - self.H / 5 if not self.vertical_video else None, + ) ) - # add title to video - image_clips = list() - - # Accounting for title and other stuff if audio_clips - index_offset = 1 - - image_clips.append( - create_image_clip( + def make( + self, + indexes_of_clips: list, + reddit_obj: dict, + background_config: Tuple[str, str, str, Any], + ) -> None: + """ + Gathers audio clips, gathers all screenshots, stitches them together and saves the final video to assets/temp + + Args: + indexes_of_clips (list): Indexes of voiced comments + reddit_obj (dict): The reddit object that contains the posts to read. + background_config (Tuple[str, str, str, Any]): The background config to use. + """ + print_step("Creating the final video 🎥") + + # Gather all audio clips + audio_clips = list() + correct_audio_offset = self.time_before_tts * 2 + self.time_between_pictures + + audio_title = self.create_audio_clip( "title", - audio_clips[0].start, - audio_clips[0].duration + self.time_before_first_picture + self.time_before_tts, ) - ) - - for idx, photo_idx in track( - enumerate( - indexes_for_videos, - start=index_offset, - ), - description="Gathering audio clips...", - total=indexes_for_videos[index_offset:].__len__() - ): + self.video_duration += audio_title.duration + self.time_before_first_picture + self.time_before_tts + audio_clips.append(audio_title) + indexes_for_videos = list() + + for audio_title in track( + indexes_of_clips, + description="Gathering audio clips...", + total=indexes_of_clips.__len__() + ): + temp_audio_clip = self.create_audio_clip( + audio_title, + correct_audio_offset + self.video_duration, + ) + if self.video_duration + temp_audio_clip.duration + \ + correct_audio_offset + self.delay_before_end <= self.max_length: + self.video_duration += temp_audio_clip.duration + correct_audio_offset + audio_clips.append(temp_audio_clip) + indexes_for_videos.append(audio_title) + + self.video_duration += self.delay_before_end + self.time_before_tts + + # Can't use concatenate_audioclips here, it resets clips' start point + audio_composite = CompositeAudioClip(audio_clips) + + self.console.log("[bold green] Video Will Be: %.2f Seconds Long" % self.video_duration) + + # Gather all images + image_clips = list() + + # Accounting for title and other stuff if audio_clips + index_offset = 1 + image_clips.append( - create_image_clip( - f"comment_{photo_idx}", - audio_clips[idx].start, - audio_clips[idx].duration + self.create_image_clip( + "title", + audio_clips[0].start, + audio_clips[0].duration, + background_config[3], ) ) - # if os.path.exists("assets/mp3/posttext.mp3"): - # image_clips.insert( - # 0, - # ImageClip("assets/png/title.png") - # .set_duration(audio_clips[0].duration + audio_clips[1].duration) - # .set_position("center") - # .resize(width=W - 100) - # .set_opacity(float(opacity)), - # ) - # else: story mode stuff - - # Can't use concatenate_videoclips here, it resets clips' start point - image_concat = CompositeVideoClip(image_clips) - image_concat.set_position(background_config[3]) - - download_background(background_config) - chop_background_video(background_config, video_duration) - background_clip = ( - VideoFileClip("assets/temp/background.mp4") - .set_start(0) - .set_end(video_duration) - .without_audio() - .resize(height=H) - ) - - back_video_width, back_video_height = background_clip.size - - # Fix for crop with vertical videos - if back_video_width < H: + for idx, photo_idx in track( + enumerate( + indexes_for_videos, + start=index_offset, + ), + description="Gathering audio clips...", + total=indexes_for_videos.__len__() + ): + image_clips.append( + self.create_image_clip( + f"comment_{photo_idx}", + audio_clips[idx].start, + audio_clips[idx].duration, + background_config[3], + ) + ) + + # if os.path.exists("assets/mp3/posttext.mp3"): + # image_clips.insert( + # 0, + # ImageClip("assets/png/title.png") + # .set_duration(audio_clips[0].duration + audio_clips[1].duration) + # .set_position("center") + # .resize(width=W - 100) + # .set_opacity(float(opacity)), + # ) + # else: story mode stuff + + # Can't use concatenate_videoclips here, it resets clips' start point + image_concat = CompositeVideoClip(image_clips) + image_concat.set_position(background_config[3]) + + download_background(background_config) + chop_background_video(background_config, self.video_duration) background_clip = ( - background_clip - .resize(width=W) + VideoFileClip("assets/temp/background.mp4") + .set_start(0) + .set_end(self.video_duration) + .without_audio() + .resize(height=self.H) ) + back_video_width, back_video_height = background_clip.size - background_clip = background_clip.crop( - x1=0, - x2=back_video_width, - y1=back_video_height / 2 - H / 2, - y2=back_video_height / 2 + H / 2 - ) - else: - background_clip = background_clip.crop( - x1=back_video_width / 2 - W / 2, - x2=back_video_width / 2 + W / 2, - y1=0, - y2=back_video_height + + # Fix for crop with vertical videos + if back_video_width < self.H: + background_clip = ( + background_clip + .resize(width=self.W) + ) + back_video_width, back_video_height = background_clip.size + background_clip = background_clip.crop( + x1=0, + x2=back_video_width, + y1=back_video_height / 2 - self.H / 2, + y2=back_video_height / 2 + self.H / 2 + ) + else: + background_clip = background_clip.crop( + x1=back_video_width / 2 - self.W / 2, + x2=back_video_width / 2 + self.W / 2, + y1=0, + y2=back_video_height + ) + + final = CompositeVideoClip([background_clip, image_concat]) + final.audio = audio_composite + + title = re.sub(r"[^\w\s-]", "", reddit_obj["thread_title"]) + idx = re.sub(r"[^\w\s-]", "", reddit_obj["thread_id"]) + + filename = f"{self.name_normalize(title)}.mp4" + subreddit = str(settings.config["reddit"]["thread"]["subreddit"]) + + if not exists(f"./results/{subreddit}"): + print_substep("The results folder didn't exist so I made it") + os.makedirs(f"./results/{subreddit}") + + # if ( + # settings.config["settings"]['background']["background_audio"] and + # exists(f"assets/backgrounds/background.mp3") + # ): + # audioclip = ( + # AudioFileClip(f"assets/backgrounds/background.mp3") + # .set_duration(final.duration) + # .volumex(0.2) + # ) + # final_audio = CompositeAudioClip([final.audio, audioclip]) + # # lowered_audio = audio_background.multiply_volume( # TODO get this to work + # # VOLUME_MULTIPLIER) # lower volume by background_audio_volume, use with fx + # final.set_audio(final_audio) + + final = Video(final).add_watermark( + text=f"Background credit: {background_config[2]}", opacity=0.4 ) - final = CompositeVideoClip([background_clip, image_concat]) - final.audio = audio_composite - - title = re.sub(r"[^\w\s-]", "", reddit_obj["thread_title"]) - idx = re.sub(r"[^\w\s-]", "", reddit_obj["thread_id"]) - - filename = f"{name_normalize(title)}.mp4" - subreddit = str(settings.config["reddit"]["thread"]["subreddit"]) - - if not exists(f"./results/{subreddit}"): - print_substep("The results folder didn't exist so I made it") - os.makedirs(f"./results/{subreddit}") - - # if settings.config["settings"]['background']["background_audio"] and exists(f"assets/backgrounds/background.mp3"): - # audioclip = mpe.AudioFileClip(f"assets/backgrounds/background.mp3").set_duration(final.duration) - # audioclip = audioclip.fx( volumex, 0.2) - # final_audio = mpe.CompositeAudioClip([final.audio, audioclip]) - # # lowered_audio = audio_background.multiply_volume( # todo get this to work - # # VOLUME_MULTIPLIER) # lower volume by background_audio_volume, use with fx - # final.set_audio(final_audio) - - final = Video(final).add_watermark( - text=f"Background credit: {background_config[2]}", opacity=0.4 - ) - - final.write_videofile( - "assets/temp/temp.mp4", - fps=30, - audio_codec="aac", - audio_bitrate="192k", - verbose=False, - threads=multiprocessing.cpu_count(), - ) - ffmpeg_extract_subclip( - "assets/temp/temp.mp4", - 0, - video_duration, - targetname=f"results/{subreddit}/{filename}", - ) - save_data(subreddit, filename, title, idx, background_config[2]) - print_step("Removing temporary files 🗑") - cleanups = cleanup() - print_substep(f"Removed {cleanups} temporary files 🗑") - print_substep("See result in the results folder!") - - print_step( - f'Reddit title: {reddit_obj["thread_title"]} \n Background Credit: {background_config[2]}' - ) + final.write_videofile( + "assets/temp/temp.mp4", + fps=30, + audio_codec="aac", + audio_bitrate="192k", + verbose=False, + threads=multiprocessing.cpu_count(), + ) + ffmpeg_extract_subclip( + "assets/temp/temp.mp4", + 0, + self.video_duration, + targetname=f"results/{subreddit}/{filename}", + ) + save_data(subreddit, filename, title, idx, background_config[2]) + print_step("Removing temporary files 🗑") + cleanups = cleanup() + print_substep(f"Removed {cleanups} temporary files 🗑") + print_substep("See result in the results folder!") + + print_step( + f'Reddit title: {reddit_obj["thread_title"]} \n Background Credit: {background_config[2]}' + ) From 50eb5dbc8a2ae4ef498fca593f94b817a565d170 Mon Sep 17 00:00:00 2001 From: Drugsosos <44712637+Drugsosos@users.noreply.github.com> Date: Sun, 17 Jul 2022 23:27:31 +0300 Subject: [PATCH 27/39] fixes in regex --- video_creation/final_video.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/video_creation/final_video.py b/video_creation/final_video.py index 6621e5b..7a8da83 100755 --- a/video_creation/final_video.py +++ b/video_creation/final_video.py @@ -53,11 +53,11 @@ class FinalVideo: name: str ) -> str: name = re.sub(r'[?\\"%*:|<>]', "", name) - name = re.sub(r"( [w,W]\s?\/\s?[o,O,0])", r" without", name) - name = re.sub(r"( [w,W]\s?\/)", r" with", name) - name = re.sub(r"(\d+)\s?\/\s?(\d+)", r"\1 of \2", name) - name = re.sub(r"(\w+)\s?\/\s?(\w+)", r"\1 or \2", name) - name = re.sub(r"\/", "", name) + name = re.sub(r"( [w,W]\s?/\s?[oO0])", r" without", name) + name = re.sub(r"( [w,W]\s?/)", r" with", name) + name = re.sub(r"(\d+)\s?/\s?(\d+)", r"\1 of \2", name) + name = re.sub(r"(\w+)\s?/\s?(\w+)", r"\1 or \2", name) + name = re.sub(r"/", "", name) # name[:30] # the hell this little guy does? commented until explained lang = settings.config["reddit"]["thread"]["post_lang"] From 7ff93a2f0edb6be88d911e51a2c16e70d628d736 Mon Sep 17 00:00:00 2001 From: Drugsosos <44712637+Drugsosos@users.noreply.github.com> Date: Sun, 17 Jul 2022 23:28:57 +0300 Subject: [PATCH 28/39] fixes with screenshots width --- video_creation/final_video.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/video_creation/final_video.py b/video_creation/final_video.py index 7a8da83..fff83b9 100755 --- a/video_creation/final_video.py +++ b/video_creation/final_video.py @@ -192,8 +192,6 @@ class FinalVideo: # else: story mode stuff # Can't use concatenate_videoclips here, it resets clips' start point - image_concat = CompositeVideoClip(image_clips) - image_concat.set_position(background_config[3]) download_background(background_config) chop_background_video(background_config, self.video_duration) @@ -228,7 +226,7 @@ class FinalVideo: y2=back_video_height ) - final = CompositeVideoClip([background_clip, image_concat]) + final = CompositeVideoClip([background_clip, *image_clips]) final.audio = audio_composite title = re.sub(r"[^\w\s-]", "", reddit_obj["thread_title"]) From 15aa7fa45bf58fdb3c66e9e707871021263a4e99 Mon Sep 17 00:00:00 2001 From: Drugsosos <44712637+Drugsosos@users.noreply.github.com> Date: Sun, 17 Jul 2022 23:29:26 +0300 Subject: [PATCH 29/39] removed ffmpeg_extract_subclip --- video_creation/final_video.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/video_creation/final_video.py b/video_creation/final_video.py index fff83b9..2063bf0 100755 --- a/video_creation/final_video.py +++ b/video_creation/final_video.py @@ -12,7 +12,6 @@ from moviepy.editor import ( CompositeAudioClip, CompositeVideoClip, ) -from moviepy.video.io.ffmpeg_tools import ffmpeg_extract_subclip from rich.console import Console from rich.progress import track from attr import attrs @@ -265,11 +264,10 @@ class FinalVideo: verbose=False, threads=multiprocessing.cpu_count(), ) - ffmpeg_extract_subclip( + # Moves file in subreddit folder and renames it + os.rename( "assets/temp/temp.mp4", - 0, - self.video_duration, - targetname=f"results/{subreddit}/{filename}", + f"results/{subreddit}/{filename}", ) save_data(subreddit, filename, title, idx, background_config[2]) print_step("Removing temporary files 🗑") From ec2292bae3ea692fc43e5197e69e814db3f5d2c5 Mon Sep 17 00:00:00 2001 From: Drugsosos <44712637+Drugsosos@users.noreply.github.com> Date: Sun, 17 Jul 2022 23:54:46 +0300 Subject: [PATCH 30/39] reverted unnecessary changes --- main.py | 6 ++---- reddit/subreddit.py | 4 +--- utils/settings.py | 4 ++-- utils/subreddit.py | 2 +- utils/video.py | 8 ++++++-- utils/voice.py | 4 +--- video_creation/background.py | 2 +- video_creation/final_video.py | 2 +- video_creation/screenshot_downloader.py | 1 + video_creation/voices.py | 7 +------ 10 files changed, 17 insertions(+), 23 deletions(-) diff --git a/main.py b/main.py index d0d21b9..1e76442 100755 --- a/main.py +++ b/main.py @@ -37,9 +37,7 @@ print_markdown( print_step(f"You are using v{__VERSION__} of the bot") -async def main( - POST_ID=None -): +async def main(POST_ID=None): cleanup() reddit_object = get_subreddit_threads(POST_ID) comments_created = save_text_to_mp3(reddit_object) @@ -84,7 +82,7 @@ if __name__ == "__main__": Popen("cls" if name == "nt" else "clear", shell=True).wait() else: main() - except KeyboardInterrupt: + except KeyboardInterrupt: # TODO wont work with async code shutdown() except ResponseException: # error for invalid credentials diff --git a/reddit/subreddit.py b/reddit/subreddit.py index 50c1fb9..486447f 100644 --- a/reddit/subreddit.py +++ b/reddit/subreddit.py @@ -10,9 +10,7 @@ from utils.videos import check_done from utils.voice import sanitize_text -def get_subreddit_threads( - POST_ID: str -): +def get_subreddit_threads(POST_ID: str): """ Returns a list of threads from the AskReddit subreddit. """ diff --git a/utils/settings.py b/utils/settings.py index 53d83a4..43796bd 100755 --- a/utils/settings.py +++ b/utils/settings.py @@ -3,13 +3,13 @@ import toml from rich.console import Console import re -from typing import Tuple, Dict +from typing import Tuple, Dict, Optional from utils.console import handle_input console = Console() -config = dict() # calling instance of a dict to calm lint down (dict[any] will work as well) +config: Optional[dict] = None # autocomplete def crawl(obj: dict, func=lambda x, y: print(x, y, end="\n"), path=None): diff --git a/utils/subreddit.py b/utils/subreddit.py index 0a6b1e6..3253099 100644 --- a/utils/subreddit.py +++ b/utils/subreddit.py @@ -57,7 +57,7 @@ def get_subreddit_undone(submissions: list, subreddit, times_checked=0): return get_subreddit_undone( subreddit.top( - time_filter=VALID_TIME_FILTERS[index], limit=100 + time_filter=VALID_TIME_FILTERS[index], limit=(50 if int(index) == 0 else index + 1 * 50) ), subreddit, times_checked=index, diff --git a/utils/video.py b/utils/video.py index 0d65e68..556693e 100644 --- a/utils/video.py +++ b/utils/video.py @@ -1,3 +1,5 @@ +from __future__ import annotations + from typing import Tuple from PIL import ImageFont, Image, ImageDraw, ImageEnhance @@ -26,7 +28,7 @@ class Video: draw = ImageDraw.Draw(wm) w, h = draw.textsize(text, font) draw.text(((width - w) / 2, (height - h) / 2), text, white, font) - en = ImageEnhance.Brightness(wm) # TODO allow it to use the fontsize + en = ImageEnhance.Brightness(wm) # todo allow it to use the fontsize mask = en.enhance(1 - opacity) im.paste(wm, (25, 25), mask) im.save(path) @@ -40,11 +42,13 @@ class Video: ndigits=2, ) position = (compensation, position[1]) + # print(f'{compensation=}') + # print(f'{position=}') img_clip = self._create_watermark(text, opacity=opacity, fontsize=fontsize) img_clip = img_clip.set_opacity(opacity).set_duration(duration) img_clip = img_clip.set_position( position, relative=True - ) # TODO get data from utils/CONSTANTS.py and adapt position accordingly + ) # todo get data from utils/CONSTANTS.py and adapt position accordingly # Overlay the img clip on the first video clip self.video = CompositeVideoClip([self.video, img_clip]) diff --git a/utils/voice.py b/utils/voice.py index 3113227..a0709fa 100644 --- a/utils/voice.py +++ b/utils/voice.py @@ -10,9 +10,7 @@ if sys.version_info[0] >= 3: from datetime import timezone -def check_ratelimit( - response: Response, -): +def check_ratelimit(response: Response): """ Checks if the response is a ratelimit response. If it is, it sleeps for the time specified in the response. diff --git a/video_creation/background.py b/video_creation/background.py index 73f4251..6e656fa 100644 --- a/video_creation/background.py +++ b/video_creation/background.py @@ -38,7 +38,7 @@ def get_background_config(): # Handle default / not supported background using default option. # Default : pick random from supported background. - if choice not in background_options: + if not choice or choice not in background_options: choice = random.choice(list(background_options.keys())) return background_options[choice] diff --git a/video_creation/final_video.py b/video_creation/final_video.py index 2063bf0..88ea499 100755 --- a/video_creation/final_video.py +++ b/video_creation/final_video.py @@ -57,7 +57,7 @@ class FinalVideo: name = re.sub(r"(\d+)\s?/\s?(\d+)", r"\1 of \2", name) name = re.sub(r"(\w+)\s?/\s?(\w+)", r"\1 or \2", name) name = re.sub(r"/", "", name) - # name[:30] # the hell this little guy does? commented until explained + # name[:30] # does nothing lang = settings.config["reddit"]["thread"]["post_lang"] if lang: diff --git a/video_creation/screenshot_downloader.py b/video_creation/screenshot_downloader.py index 0ec7e4e..62e4df1 100644 --- a/video_creation/screenshot_downloader.py +++ b/video_creation/screenshot_downloader.py @@ -89,6 +89,7 @@ class Browser: default_Viewport: dict = attrib( validator=instance_of(dict), default={ + # 9x21 to see long posts "defaultViewport": { "width": 500, "height": 1200, diff --git a/video_creation/voices.py b/video_creation/voices.py index b372042..1e5a1a5 100644 --- a/video_creation/voices.py +++ b/video_creation/voices.py @@ -3,7 +3,6 @@ from TTS.GTTS import GTTS from TTS.streamlabs_polly import StreamlabsPolly from TTS.aws_polly import AWSPolly from TTS.TikTok import TikTok - from utils import settings from utils.console import print_table, print_step @@ -41,11 +40,7 @@ def save_text_to_mp3( return engine_instance.run() -def get_case_insensitive_key_value( - input_dict, - key, -) -> object: - # TODO add a factory later +def get_case_insensitive_key_value(input_dict, key): return next( (value for dict_key, value in input_dict.items() if dict_key.lower() == key.lower()), None, From b1663f33816ad67fc672141cca6671a6502d850c Mon Sep 17 00:00:00 2001 From: Drugsosos <44712637+Drugsosos@users.noreply.github.com> Date: Tue, 19 Jul 2022 20:11:32 +0300 Subject: [PATCH 31/39] fixes in config & name[:30] in final_video --- utils/.config.template.toml | 2 +- video_creation/final_video.py | 5 ++--- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/utils/.config.template.toml b/utils/.config.template.toml index be2c76f..d703fb3 100644 --- a/utils/.config.template.toml +++ b/utils/.config.template.toml @@ -16,7 +16,7 @@ subreddit = { optional = false, regex = "[_0-9a-zA-Z]+$", nmin = 3, explanation post_id = { optional = true, default = "", regex = "^((?!://|://)[+a-zA-Z])*$", explanation = "Used if you want to use a specific post.", example = "urdtfx" } max_comment_length = { default = 500, optional = false, nmin = 10, nmax = 10000, type = "int", explanation = "max number of characters a comment can have. default is 500", example = 500, oob_error = "the max comment length should be between 10 and 10000" } post_lang = { default = "", optional = true, explanation = "The language you would like to translate to.", example = "es-cr" } -min_comments = { default = 20, optional = false, nmin = 15, type = "int", explanation = "The minimum number of comments a post should have to be included. default is 20", example = 29, oob_error = "the minimum number of comments should be between 1 and 999999" } +min_comments = { default = 20, optional = false, nmin = 15, type = "int", explanation = "The minimum number of comments a post should have to be included. default is 20", example = 29, oob_error = "the minimum number of comments should be between 15 and 999999" } [settings] allow_nsfw = { optional = false, type = "bool", default = false, example = false, options = [true, false, diff --git a/video_creation/final_video.py b/video_creation/final_video.py index 88ea499..14204e6 100755 --- a/video_creation/final_video.py +++ b/video_creation/final_video.py @@ -57,16 +57,15 @@ class FinalVideo: name = re.sub(r"(\d+)\s?/\s?(\d+)", r"\1 of \2", name) name = re.sub(r"(\w+)\s?/\s?(\w+)", r"\1 or \2", name) name = re.sub(r"/", "", name) - # name[:30] # does nothing lang = settings.config["reddit"]["thread"]["post_lang"] + translated_name = None if lang: import translators as ts print_substep("Translating filename...") translated_name = ts.google(name, to_language=lang) - return translated_name - return name + return translated_name[:30] if translated_name else name[:30] @staticmethod def create_audio_clip( From 27577d0da6fd8b5a6a2b7833a462f0da4967890a Mon Sep 17 00:00:00 2001 From: Drugsosos <44712637+Drugsosos@users.noreply.github.com> Date: Tue, 19 Jul 2022 22:46:13 +0300 Subject: [PATCH 32/39] async playwright in factory WIP, added collect_story in pyppetter --- README.md | 3 + install.sh | 25 ++- main.py | 5 +- requirements.txt | 1 + webdriver/__init__.py | 0 webdriver/common.py | 67 ++++++ webdriver/playwright.py | 202 ++++++++++++++++++ .../pyppeteer.py | 117 +++++----- webdriver/web_engine.py | 22 ++ 9 files changed, 369 insertions(+), 73 deletions(-) create mode 100644 webdriver/__init__.py create mode 100644 webdriver/common.py create mode 100644 webdriver/playwright.py rename video_creation/screenshot_downloader.py => webdriver/pyppeteer.py (81%) create mode 100644 webdriver/web_engine.py diff --git a/README.md b/README.md index d7e1816..d7227f9 100644 --- a/README.md +++ b/README.md @@ -33,12 +33,15 @@ The only original thing being done is the editing and gathering of all materials ## Requirements - Python 3.9+ +- Playwright (this should install automatically in installation) ## Installation 👩‍💻 1. Clone this repository 2. Run `pip install -r requirements.txt` +3. Run `python -m playwright install` and `python -m playwright install-deps` + **EXPERIMENTAL!!!!** On MacOS and Linux (debian, arch, fedora and centos, and based on those), you can run an install script that will automatically install steps 1 to 3. (requires bash) diff --git a/install.sh b/install.sh index 254438f..fb8a431 100644 --- a/install.sh +++ b/install.sh @@ -12,7 +12,7 @@ function Help(){ echo "Options:" echo " -h: Show this help message and exit" echo " -d: Install only dependencies" - echo " -p: Install only python dependencies" + echo " -p: Install only python dependencies (including playwright)" echo " -b: Install just the bot" echo " -l: Install the bot and the python dependencies" } @@ -107,6 +107,23 @@ function install_python_dep(){ cd .. } +# install playwright function +function install_playwright(){ + # tell the user that the script is going to install playwright + echo "Installing playwright" + # cd into the directory where the script is downloaded + cd RedditVideoMakerBot + # run the install script + python3 -m playwright install + python3 -m playwright install-deps + # give a note + printf "Note, if these gave any errors, playwright may not be officially supported on your OS, check this issues page for support\nhttps://github.com/microsoft/playwright/issues" + if [ -x "$(command -v pacman)" ]; then + printf "It seems you are on and Arch based distro.\nTry installing these from the AUR for playwright to run:\nenchant1.6\nicu66\nlibwebp052\n" + fi + cd .. +} + # Install depndencies function install_deps(){ # if the platform is mac, install macos @@ -131,7 +148,7 @@ function install_deps(){ # else else # print an error message and exit - printf "Your OS is not supported\n Please install python3, pip3 and git manually\n After that, run the script again with the -pb option to install python and dependencies\n If you want to add support for your OS, please open a pull request on github\n + printf "Your OS is not supported\n Please install python3, pip3 and git manually\n After that, run the script again with the -pb option to install python and playwright dependencies\n If you want to add support for your OS, please open a pull request on github\n https://github.com/elebumm/RedditVideoMakerBot" exit 1 fi @@ -159,9 +176,10 @@ function install_main(){ echo "Installing only dependencies" install_deps elif [[ PYTHON_ONLY -eq 1 ]]; then - # if the -p (only python dependencies) options is selected install just the python dependencies + # if the -p (only python dependencies) options is selected install just the python dependencies and playwright echo "Installing only python dependencies" install_python_dep + install_playwright # if the -b (only the bot) options is selected install just the bot elif [[ JUST_BOT -eq 1 ]]; then echo "Installing only the bot" @@ -177,6 +195,7 @@ function install_main(){ install_deps get_the_bot install_python_dep + install_playwright fi DIR="./RedditVideoMakerBot" diff --git a/main.py b/main.py index 1e76442..ded1f2c 100755 --- a/main.py +++ b/main.py @@ -14,7 +14,7 @@ from video_creation.background import ( get_background_config, ) from video_creation.final_video import FinalVideo -from video_creation.screenshot_downloader import RedditScreenshot +from webdriver.web_engine import screenshot_factory from video_creation.voices import save_text_to_mp3 __VERSION__ = "2.3.1" @@ -41,7 +41,8 @@ async def main(POST_ID=None): cleanup() reddit_object = get_subreddit_threads(POST_ID) comments_created = save_text_to_mp3(reddit_object) - await RedditScreenshot(reddit_object, comments_created).download() + webdriver = screenshot_factory(config["settings"]["times_to_run"]) # TODO add in config + await webdriver(reddit_object, comments_created).download() bg_config = get_background_config() FinalVideo().make(comments_created, reddit_object, bg_config) diff --git a/requirements.txt b/requirements.txt index a0fb434..9684dc7 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,6 +2,7 @@ boto3==1.24.24 botocore==1.27.24 gTTS==2.2.4 moviepy==1.0.3 +playwright==1.23.0 praw==7.6.0 pytube==12.1.0 requests==2.28.1 diff --git a/webdriver/__init__.py b/webdriver/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/webdriver/common.py b/webdriver/common.py new file mode 100644 index 0000000..3c70a9f --- /dev/null +++ b/webdriver/common.py @@ -0,0 +1,67 @@ +from attr import attrs, attrib +from typing import TypeVar, Optional, Callable, Union + + +_function = TypeVar("_function", bound=Callable[..., object]) +_exceptions = TypeVar("_exceptions", bound=Optional[Union[type, tuple, list]]) + + +@attrs +class ExceptionDecorator: + """ + Decorator factory for catching exceptions and writing logs + """ + exception: Optional[_exceptions] = attrib(default=None) + _default_exception: Optional[_exceptions] = attrib( + kw_only=True, + default=None + ) + + def __attrs_post_init__(self): + if not self.exception: + self.exception = self._default_exception + + def __call__( + self, + func: _function, + ): + async def wrapper(*args, **kwargs): + try: + obj_to_return = await func(*args, **kwargs) + return obj_to_return + except Exception as caughtException: + import logging + + logger = logging.getLogger("webdriver_log") + logger.setLevel(logging.ERROR) + handler = logging.FileHandler(".webdriver.log", mode="a+", encoding="utf-8") + logger.addHandler(handler) + + if isinstance(self.exception, type): + if not type(caughtException) == self.exception: + logger.error(f"unexpected error - {caughtException}") + else: + if not type(caughtException) in self.exception: + logger.error(f"unexpected error - {caughtException}") + + return wrapper + + @classmethod + def catch_exception( + cls, + func: Optional[_function], + exception: Optional[_exceptions] = None, + ) -> Union[object, _function]: + """ + Decorator for catching exceptions and writing logs + + Args: + func: Function to be decorated + exception: Expected exception(s) + Returns: + Decorated function + """ + exceptor = cls(exception) + if func: + exceptor = exceptor(func) + return exceptor diff --git a/webdriver/playwright.py b/webdriver/playwright.py new file mode 100644 index 0000000..f1934d9 --- /dev/null +++ b/webdriver/playwright.py @@ -0,0 +1,202 @@ +from playwright.async_api import async_playwright, ViewportSize +from playwright.async_api import Browser, Playwright +from rich.progress import track + +from pathlib import Path +import translators as ts +from utils import settings +from utils.console import print_step, print_substep +from attr import attrs, attrib +from attr.validators import instance_of, optional + +from typing import Dict, Optional, Union + + +@attrs +class Browser: + """ + Args: + default_Viewport (dict):Pyppeteer Browser default_Viewport options + browser (BrowserCls): Pyppeteer Browser instance + """ + default_Viewport: dict = attrib( + validator=instance_of(dict), + default={ + # 9x21 to see long posts + "defaultViewport": { + "width": 500, + "height": 1200, + }, + }, + kw_only=True, + ) + playwright: Playwright + browser: Browser + + async def get_browser( + self, + ) -> None: + """ + Creates Playwright instance & browser + """ + self.playwright = await async_playwright().start() + self.browser = await self.playwright.chromium.launch() + + async def close_browser( + self, + ) -> None: + """ + Closes Pyppeteer browser + """ + await self.browser.close() + await self.playwright.stop() + + +@attrs(auto_attribs=True) +class RedditScreenshot(Browser): + """ + Args: + reddit_object (Dict): Reddit object received from reddit/subreddit.py + screenshot_idx (int): List with indexes of voiced comments + """ + reddit_object: dict + screenshot_idx: list + + async def __dark_theme( + self, + page_instance: PageCls, + ) -> None: + """ + Enables dark theme in Reddit + + Args: + page_instance: Pyppeteer page instance with reddit page opened + """ + + await self.click( + page_instance, + "//*[contains(@class, 'header-user-dropdown')]", + {"timeout": 5000}, + ) + + # It's normal not to find it, sometimes there is none :shrug: + await self.click( + page_instance, + "//*[contains(text(), 'Settings')]/ancestor::button[1]", + {"timeout": 5000}, + ) + + await self.click( + page_instance, + "//*[contains(text(), 'Dark Mode')]/ancestor::button[1]", + {"timeout": 5000}, + ) + + # Closes settings + await self.click( + page_instance, + "//*[contains(@class, 'header-user-dropdown')]", + {"timeout": 5000}, + ) + + + +storymode = False + + +def download_screenshots_of_reddit_posts(reddit_object: dict, screenshot_num: int): + """Downloads screenshots of reddit posts as seen on the web. Downloads to assets/temp/png + + Args: + reddit_object (Dict): Reddit object received from reddit/subreddit.py + screenshot_num (int): Number of screenshots to download + """ + print_step("Downloading screenshots of reddit posts...") + + # ! Make sure the reddit screenshots folder exists + Path("assets/temp/png").mkdir(parents=True, exist_ok=True) + + with sync_playwright() as p: + print_substep("Launching Headless Browser...") + + browser = p.chromium.launch() + context = browser.new_context() + + if settings.config["settings"]["theme"] == "dark": + cookie_file = open("./video_creation/data/cookie-dark-mode.json", encoding="utf-8") + else: + cookie_file = open("./video_creation/data/cookie-light-mode.json", encoding="utf-8") + cookies = json.load(cookie_file) + context.add_cookies(cookies) # load preference cookies + # Get the thread screenshot + page = context.new_page() + page.goto(reddit_object["thread_url"], timeout=0) + page.set_viewport_size(ViewportSize(width=1920, height=1080)) + if page.locator('[data-testid="content-gate"]').is_visible(): + # This means the post is NSFW and requires to click the proceed button. + + print_substep("Post is NSFW. You are spicy...") + page.locator('[data-testid="content-gate"] button').click() + page.wait_for_load_state() # Wait for page to fully load + + if page.locator('[data-click-id="text"] button').is_visible(): + page.locator( + '[data-click-id="text"] button' + ).click() # Remove "Click to see nsfw" Button in Screenshot + + # translate code + + if settings.config["reddit"]["thread"]["post_lang"]: + print_substep("Translating post...") + texts_in_tl = ts.google( + reddit_object["thread_title"], + to_language=settings.config["reddit"]["thread"]["post_lang"], + ) + + page.evaluate( + "tl_content => document.querySelector('[data-test-id=\"post-content\"] > div:nth-child(3) > div > div').textContent = tl_content", + texts_in_tl, + ) + else: + print_substep("Skipping translation...") + + page.locator('[data-test-id="post-content"]').screenshot(path="assets/temp/png/title.png") + + if storymode: + page.locator('[data-click-id="text"]').screenshot( + path="assets/temp/png/story_content.png" + ) + else: + for idx, comment in enumerate( + track(reddit_object["comments"], "Downloading screenshots...") + ): + # Stop if we have reached the screenshot_num + if idx >= screenshot_num: + break + + if page.locator('[data-testid="content-gate"]').is_visible(): + page.locator('[data-testid="content-gate"] button').click() + + page.goto(f'https://reddit.com{comment["comment_url"]}', timeout=0) + + # translate code + + if settings.config["reddit"]["thread"]["post_lang"]: + comment_tl = ts.google( + comment["comment_body"], + to_language=settings.config["reddit"]["thread"]["post_lang"], + ) + page.evaluate( + '([tl_content, tl_id]) => document.querySelector(`#t1_${tl_id} > div:nth-child(2) > div > div[data-testid="comment"] > div`).textContent = tl_content', + [comment_tl, comment["comment_id"]], + ) + try: + page.locator(f"#t1_{comment['comment_id']}").screenshot( + path=f"assets/temp/png/comment_{idx}.png" + ) + except TimeoutError: + del reddit_object["comments"] + screenshot_num += 1 + print("TimeoutError: Skipping screenshot...") + continue + print_substep("Screenshots downloaded Successfully.", style="bold green") diff --git a/video_creation/screenshot_downloader.py b/webdriver/pyppeteer.py similarity index 81% rename from video_creation/screenshot_downloader.py rename to webdriver/pyppeteer.py index 62e4df1..b9b409b 100644 --- a/video_creation/screenshot_downloader.py +++ b/webdriver/pyppeteer.py @@ -16,67 +16,11 @@ from utils.console import print_step, print_substep from attr import attrs, attrib from attr.validators import instance_of, optional -from typing import TypeVar, Optional, Callable, Union +from typing import Optional -_function = TypeVar("_function", bound=Callable[..., object]) -_exceptions = TypeVar("_exceptions", bound=Optional[Union[type, tuple, list]]) +from webdriver.common import ExceptionDecorator - -@attrs -class ExceptionDecorator: - """ - Decorator factory for catching exceptions and writing logs - """ - exception: Optional[_exceptions] = attrib(default=None) - __default_exception: _exceptions = attrib(default=BrowserTimeoutError) - - def __attrs_post_init__(self): - if not self.exception: - self.exception = self.__default_exception - - def __call__( - self, - func: _function, - ): - async def wrapper(*args, **kwargs): - try: - obj_to_return = await func(*args, **kwargs) - return obj_to_return - except Exception as caughtException: - import logging - - logger = logging.getLogger("webdriver_log") - logger.setLevel(logging.ERROR) - handler = logging.FileHandler(".webdriver.log", mode="a+", encoding="utf-8") - logger.addHandler(handler) - - if isinstance(self.exception, type): - if not type(caughtException) == self.exception: - logger.error(f"unexpected error - {caughtException}") - else: - if not type(caughtException) in self.exception: - logger.error(f"unexpected error - {caughtException}") - - return wrapper - - -def catch_exception( - func: Optional[_function], - exception: Optional[_exceptions] = None, -) -> Union[ExceptionDecorator, _function]: - """ - Decorator for catching exceptions and writing logs - - Args: - func: Function to be decorated - exception: Expected exception(s) - Returns: - Decorated function - """ - exceptor = ExceptionDecorator(exception) - if func: - exceptor = exceptor(func) - return exceptor +catch_exception = ExceptionDecorator(default_exception=BrowserTimeoutError).catch_exception @attrs @@ -97,11 +41,7 @@ class Browser: }, kw_only=True, ) - browser: Optional[BrowserCls] = attrib( - validator=optional(instance_of(BrowserCls)), - default=None, - kw_only=True, - ) + browser: BrowserCls async def get_browser( self, @@ -217,6 +157,10 @@ class RedditScreenshot(Browser, Wait): """ reddit_object: dict screenshot_idx: list + story_mode: Optional[bool] = attrib( + validator=instance_of(bool), + default=False, + ) async def __dark_theme( self, @@ -313,6 +257,37 @@ class RedditScreenshot(Browser, Wait): {"path": f"assets/temp/png/comment_{filename_idx}.png"}, ) + # WIP TODO test it + async def __collect_story( + self, + main_page: PageCls, + + ): + # Translates submission text + if settings.config["reddit"]["thread"]["post_lang"]: + story_tl = ts.google( + self.reddit_object["thread_post"], + to_language=settings.config["reddit"]["thread"]["post_lang"], + ) + split_story_tl = story_tl.split('\n') + await main_page.evaluate( + # Find all elements + 'var elements = document.querySelectorAll(`[data-test-id="post-content"]' + ' > [data-click-id="text"] > div > p`);' + # Set array with translated text + f"var texts = {split_story_tl};" + # Map 2 arrays together + "var text_map = texts.map(function(e, i) { return [e, elements[i]]; });" + # Change text on the page + "for (i = 0; i < text_map.length; ++i) { text_map[i][1].textContent = text_map[i][0] ; };" + ) + + await self.screenshot( + main_page, + "//*[@data-click-id='text']", + {"path": "assets/temp/png/story_content.png"}, + ) + async def download( self, ): @@ -354,10 +329,16 @@ class RedditScreenshot(Browser, Wait): else: print_substep("Skipping translation...") - async_tasks_primary = [ - self.__collect_comment(self.reddit_object["comments"][idx], idx) for idx in - self.screenshot_idx - ] + async_tasks_primary = ( + [ + self.__collect_comment(self.reddit_object["comments"][idx], idx) for idx in + self.screenshot_idx + ] + if not self.story_mode + else [ + self.__collect_story(reddit_main) + ] + ) async_tasks_primary.append( self.screenshot( diff --git a/webdriver/web_engine.py b/webdriver/web_engine.py new file mode 100644 index 0000000..2ca28ab --- /dev/null +++ b/webdriver/web_engine.py @@ -0,0 +1,22 @@ +from typing import Union + +from webdriver.pyppeteer import RedditScreenshot as Pyppeteer + + +def screenshot_factory( + driver: str, +) -> Union[Pyppeteer]: + """ + Factory for webdriver + Args: + driver: (str) Name of a driver + + Returns: + Webdriver instance + """ + web_drivers = { + "pyppeteer": Pyppeteer, + "playwright": None, + } + + return web_drivers[driver] From e0b22dc342f32df230af24f524087d90d740b96d Mon Sep 17 00:00:00 2001 From: Jason Date: Tue, 19 Jul 2022 15:52:31 -0400 Subject: [PATCH 33/39] Update .config.template.toml update docs --- utils/.config.template.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/utils/.config.template.toml b/utils/.config.template.toml index be2c76f..e2d311f 100644 --- a/utils/.config.template.toml +++ b/utils/.config.template.toml @@ -16,7 +16,7 @@ subreddit = { optional = false, regex = "[_0-9a-zA-Z]+$", nmin = 3, explanation post_id = { optional = true, default = "", regex = "^((?!://|://)[+a-zA-Z])*$", explanation = "Used if you want to use a specific post.", example = "urdtfx" } max_comment_length = { default = 500, optional = false, nmin = 10, nmax = 10000, type = "int", explanation = "max number of characters a comment can have. default is 500", example = 500, oob_error = "the max comment length should be between 10 and 10000" } post_lang = { default = "", optional = true, explanation = "The language you would like to translate to.", example = "es-cr" } -min_comments = { default = 20, optional = false, nmin = 15, type = "int", explanation = "The minimum number of comments a post should have to be included. default is 20", example = 29, oob_error = "the minimum number of comments should be between 1 and 999999" } +min_comments = { default = 20, optional = false, nmin = 15, type = "int", explanation = "The minimum number of comments a post should have to be included. default is 20", example = 29, oob_error = "the minimum number of comments must be at least 15" } [settings] allow_nsfw = { optional = false, type = "bool", default = false, example = false, options = [true, false, From 66494022db8a81969f628db951f58c796c0425e0 Mon Sep 17 00:00:00 2001 From: Drugsosos <44712637+Drugsosos@users.noreply.github.com> Date: Wed, 20 Jul 2022 01:50:03 +0300 Subject: [PATCH 34/39] added async playwright --- webdriver/common.py | 8 + webdriver/playwright.py | 331 ++++++++++++++++++++++++++++------------ webdriver/pyppeteer.py | 74 +++++---- 3 files changed, 282 insertions(+), 131 deletions(-) diff --git a/webdriver/common.py b/webdriver/common.py index 3c70a9f..c1c9f35 100644 --- a/webdriver/common.py +++ b/webdriver/common.py @@ -65,3 +65,11 @@ class ExceptionDecorator: if func: exceptor = exceptor(func) return exceptor + + +# Lots of tabs - lots of memory +# chunk needed to minimize memory required +def chunks(lst, n): + """Yield successive n-sized chunks from list.""" + for i in range(0, len(lst), n): + yield lst[i:i + n] diff --git a/webdriver/playwright.py b/webdriver/playwright.py index f1934d9..87716a0 100644 --- a/webdriver/playwright.py +++ b/webdriver/playwright.py @@ -1,15 +1,21 @@ -from playwright.async_api import async_playwright, ViewportSize -from playwright.async_api import Browser, Playwright -from rich.progress import track +from asyncio import as_completed + +from playwright.async_api import async_playwright, TimeoutError +from playwright.async_api import Browser, Playwright, Page, BrowserContext, Locator from pathlib import Path -import translators as ts from utils import settings from utils.console import print_step, print_substep +import translators as ts +from rich.progress import track + from attr import attrs, attrib -from attr.validators import instance_of, optional +from attr.validators import instance_of +from typing import Dict, Optional + +from webdriver.common import ExceptionDecorator, chunks -from typing import Dict, Optional, Union +catch_exception = ExceptionDecorator(default_exception=TimeoutError).catch_exception @attrs @@ -23,15 +29,14 @@ class Browser: validator=instance_of(dict), default={ # 9x21 to see long posts - "defaultViewport": { - "width": 500, - "height": 1200, - }, + "width": 500, + "height": 1200, }, kw_only=True, ) playwright: Playwright browser: Browser + context: BrowserContext async def get_browser( self, @@ -41,30 +46,98 @@ class Browser: """ self.playwright = await async_playwright().start() self.browser = await self.playwright.chromium.launch() + self.context = await self.browser.new_context(viewport=self.default_Viewport) async def close_browser( self, ) -> None: """ - Closes Pyppeteer browser + Closes Playwright stuff """ + await self.context.close() await self.browser.close() await self.playwright.stop() +class Flaky: + """ + All methods decorated with function catching default exceptions and writing logs + """ + + @staticmethod + @catch_exception + def find_element( + query: str, + page_instance: Page, + options: Optional[dict] = None, + ) -> Locator: + return page_instance.locator(query, **options) if options else page_instance.locator(query) + + @catch_exception + async def click( + self, + page_instance: Optional[Page] = None, + query: Optional[str] = None, + options: Optional[dict] = None, + *, + find_options: Optional[dict] = None, + element: Optional[Locator] = None, + ) -> None: + if element: + await element.click(**options) if options else element.click() + else: + results = ( + self.find_element(query, page_instance, **find_options) + if find_options + else self.find_element(query, page_instance) + ) + await results.click(**options) if options else await results.click() + + @catch_exception + async def screenshot( + self, + page_instance: Optional[Page] = None, + query: Optional[str] = None, + options: Optional[dict] = None, + *, + find_options: Optional[dict] = None, + element: Optional[Locator] = None, + ) -> None: + if element: + await element.screenshot(**options) if options else await element.screenshot() + else: + results = ( + self.find_element(query, page_instance, **find_options) + if find_options + else self.find_element(query, page_instance) + ) + await results.screenshot(**options) if options else await results.screenshot() + + @attrs(auto_attribs=True) -class RedditScreenshot(Browser): +class RedditScreenshot(Flaky, Browser): + """ + Args: + reddit_object (Dict): Reddit object received from reddit/subreddit.py + screenshot_idx (int): List with indexes of voiced comments + story_mode (bool): If submission is a story takes screenshot of the story """ - Args: - reddit_object (Dict): Reddit object received from reddit/subreddit.py - screenshot_idx (int): List with indexes of voiced comments - """ reddit_object: dict screenshot_idx: list + story_mode: Optional[bool] = attrib( + validator=instance_of(bool), + default=False, + kw_only=True + ) + + def __attrs_post_init__( + self + ): + self.post_lang: Optional[bool] = settings.config["reddit"]["thread"]["post_lang"] async def __dark_theme( self, - page_instance: PageCls, + page_instance: Page, ) -> None: """ Enables dark theme in Reddit @@ -75,128 +148,190 @@ class RedditScreenshot(Browser): await self.click( page_instance, - "//*[contains(@class, 'header-user-dropdown')]", - {"timeout": 5000}, + "header-user-dropdown", ) # It's normal not to find it, sometimes there is none :shrug: await self.click( page_instance, - "//*[contains(text(), 'Settings')]/ancestor::button[1]", - {"timeout": 5000}, + ":nth-match(button) >> 'Settings'", ) await self.click( page_instance, - "//*[contains(text(), 'Dark Mode')]/ancestor::button[1]", - {"timeout": 5000}, + ":nth-match(button) >> 'Dark Mode'", ) # Closes settings await self.click( page_instance, - "//*[contains(@class, 'header-user-dropdown')]", + "header-user-dropdown" + ) + + async def __close_nsfw( + self, + page_instance: Page, + ) -> None: + """ + Closes NSFW stuff + + Args: + page_instance: Instance of main page + """ + + print_substep("Post is NSFW. You are spicy...") + + # Triggers indirectly reload + await self.click( + page_instance, + 'button:has-text("Yes")', {"timeout": 5000}, ) + # Await indirect reload + await page_instance.wait_for_load_state() + await self.click( + page_instance, + 'button:has-text("Click to see nsfw")', + {"timeout": 5000}, + ) -storymode = False + async def __collect_comment( + self, + comment_obj: dict, + filename_idx: int, + ) -> None: + """ + Makes a screenshot of the comment + Args: + comment_obj: prew comment object + filename_idx: index for the filename + """ + comment_page = await self.context.new_page() + await comment_page.goto(f'https://reddit.com{comment_obj["comment_url"]}') + + # Translates submission' comment + if self.post_lang: + comment_tl = ts.google( + comment_obj["comment_body"], + to_language=self.post_lang, + ) + await comment_page.evaluate( + f"document.querySelector('#t1_{comment_obj['comment_id']} > div:nth-child(2) " + f'> div > div[data-testid="comment"] > div\').textContent = {comment_tl}', + ) -def download_screenshots_of_reddit_posts(reddit_object: dict, screenshot_num: int): - """Downloads screenshots of reddit posts as seen on the web. Downloads to assets/temp/png + await self.screenshot( + comment_page, + f"id=t1_{comment_obj['comment_id']}", + {"path": f"assets/temp/png/comment_{filename_idx}.png"}, + ) - Args: - reddit_object (Dict): Reddit object received from reddit/subreddit.py - screenshot_num (int): Number of screenshots to download - """ - print_step("Downloading screenshots of reddit posts...") + # WIP TODO test it + async def __collect_story( + self, + main_page: Page, + ): + # Translates submission text + if self.post_lang: + story_tl = ts.google( + self.reddit_object["thread_post"], + to_language=self.post_lang, + ) + split_story_tl = story_tl.split('\n') + await main_page.evaluate( + # Find all elements + 'var elements = document.querySelectorAll(`[data-test-id="post-content"]' + ' > [data-click-id="text"] > div > p`);' + # Set array with translated text + f"var texts = {split_story_tl};" + # Map 2 arrays together + "var text_map = texts.map(function(e, i) { return [e, elements[i]]; });" + # Change text on the page + "for (i = 0; i < text_map.length; ++i) { text_map[i][1].textContent = text_map[i][0] ; };" + ) + + await self.screenshot( + main_page, + '[data-click-id="text"]', + {"path": "assets/temp/png/story_content.png"}, + ) - # ! Make sure the reddit screenshots folder exists - Path("assets/temp/png").mkdir(parents=True, exist_ok=True) + async def download( + self, + ): + """ + Downloads screenshots of reddit posts as seen on the web. Downloads to assets/temp/png + """ + print_step("Downloading screenshots of reddit posts...") - with sync_playwright() as p: print_substep("Launching Headless Browser...") + await self.get_browser() - browser = p.chromium.launch() - context = browser.new_context() + # ! Make sure the reddit screenshots folder exists + Path("assets/temp/png").mkdir(parents=True, exist_ok=True) - if settings.config["settings"]["theme"] == "dark": - cookie_file = open("./video_creation/data/cookie-dark-mode.json", encoding="utf-8") - else: - cookie_file = open("./video_creation/data/cookie-light-mode.json", encoding="utf-8") - cookies = json.load(cookie_file) - context.add_cookies(cookies) # load preference cookies # Get the thread screenshot - page = context.new_page() - page.goto(reddit_object["thread_url"], timeout=0) - page.set_viewport_size(ViewportSize(width=1920, height=1080)) - if page.locator('[data-testid="content-gate"]').is_visible(): - # This means the post is NSFW and requires to click the proceed button. - - print_substep("Post is NSFW. You are spicy...") - page.locator('[data-testid="content-gate"] button').click() - page.wait_for_load_state() # Wait for page to fully load + reddit_main = await self.browser.new_page() + # noinspection Duplicates + await reddit_main.goto(self.reddit_object["thread_url"]) - if page.locator('[data-click-id="text"] button').is_visible(): - page.locator( - '[data-click-id="text"] button' - ).click() # Remove "Click to see nsfw" Button in Screenshot + if settings.config["settings"]["theme"] == "dark": + await self.__dark_theme(reddit_main) - # translate code + if self.reddit_object["is_nsfw"]: + # This means the post is NSFW and requires to click the proceed button. + await self.__close_nsfw(reddit_main) + # Translates submission title if settings.config["reddit"]["thread"]["post_lang"]: print_substep("Translating post...") texts_in_tl = ts.google( - reddit_object["thread_title"], + self.reddit_object["thread_title"], to_language=settings.config["reddit"]["thread"]["post_lang"], ) - page.evaluate( - "tl_content => document.querySelector('[data-test-id=\"post-content\"] > div:nth-child(3) > div > div').textContent = tl_content", - texts_in_tl, + await reddit_main.evaluate( + "document.querySelector('[data-test-id=\"post-content\"] > div:nth-child(3) > div > " + f"div').textContent = {texts_in_tl}", ) else: print_substep("Skipping translation...") - page.locator('[data-test-id="post-content"]').screenshot(path="assets/temp/png/title.png") + # No sense to move it in common.py + # noinspection Duplicates + async_tasks_primary = ( + [ + self.__collect_comment(self.reddit_object["comments"][idx], idx) for idx in + self.screenshot_idx + ] + if not self.story_mode + else [ + self.__collect_story(reddit_main) + ] + ) - if storymode: - page.locator('[data-click-id="text"]').screenshot( - path="assets/temp/png/story_content.png" + async_tasks_primary.append( + self.screenshot( + reddit_main, + f"id=t3_{self.reddit_object['thread_id']}", + {"path": "assets/temp/png/title.png"}, ) - else: - for idx, comment in enumerate( - track(reddit_object["comments"], "Downloading screenshots...") + ) + + for idx, chunked_tasks in enumerate( + [chunk for chunk in chunks(async_tasks_primary, 10)], + start=1, + ): + chunk_list = async_tasks_primary.__len__() // 10 + (1 if async_tasks_primary.__len__() % 10 != 0 else 0) + for task in track( + as_completed(chunked_tasks), + description=f"Downloading comments: Chunk {idx}/{chunk_list}", + total=chunked_tasks.__len__(), ): - # Stop if we have reached the screenshot_num - if idx >= screenshot_num: - break - - if page.locator('[data-testid="content-gate"]').is_visible(): - page.locator('[data-testid="content-gate"] button').click() - - page.goto(f'https://reddit.com{comment["comment_url"]}', timeout=0) - - # translate code - - if settings.config["reddit"]["thread"]["post_lang"]: - comment_tl = ts.google( - comment["comment_body"], - to_language=settings.config["reddit"]["thread"]["post_lang"], - ) - page.evaluate( - '([tl_content, tl_id]) => document.querySelector(`#t1_${tl_id} > div:nth-child(2) > div > div[data-testid="comment"] > div`).textContent = tl_content', - [comment_tl, comment["comment_id"]], - ) - try: - page.locator(f"#t1_{comment['comment_id']}").screenshot( - path=f"assets/temp/png/comment_{idx}.png" - ) - except TimeoutError: - del reddit_object["comments"] - screenshot_num += 1 - print("TimeoutError: Skipping screenshot...") - continue - print_substep("Screenshots downloaded Successfully.", style="bold green") + await task + + print_substep("Comments downloaded Successfully.", style="bold green") + await self.close_browser() diff --git a/webdriver/pyppeteer.py b/webdriver/pyppeteer.py index b9b409b..9c630f6 100644 --- a/webdriver/pyppeteer.py +++ b/webdriver/pyppeteer.py @@ -7,18 +7,16 @@ from pyppeteer.element_handle import ElementHandle as ElementHandleCls from pyppeteer.errors import TimeoutError as BrowserTimeoutError from pathlib import Path -from typing import Dict from utils import settings - +from utils.console import print_step, print_substep from rich.progress import track import translators as ts -from utils.console import print_step, print_substep from attr import attrs, attrib -from attr.validators import instance_of, optional +from attr.validators import instance_of from typing import Optional -from webdriver.common import ExceptionDecorator +from webdriver.common import ExceptionDecorator, chunks catch_exception = ExceptionDecorator(default_exception=BrowserTimeoutError).catch_exception @@ -100,8 +98,9 @@ class Wait: self, page_instance: Optional[PageCls] = None, xpath: Optional[str] = None, - find_options: Optional[dict] = None, options: Optional[dict] = None, + *, + find_options: Optional[dict] = None, el: Optional[ElementHandleCls] = None, ) -> None: """ @@ -127,6 +126,7 @@ class Wait: page_instance: Optional[PageCls] = None, xpath: Optional[str] = None, options: Optional[dict] = None, + *, find_options: Optional[dict] = None, el: Optional[ElementHandleCls] = None, ) -> None: @@ -154,14 +154,21 @@ class RedditScreenshot(Browser, Wait): Args: reddit_object (Dict): Reddit object received from reddit/subreddit.py screenshot_idx (int): List with indexes of voiced comments + story_mode (bool): If submission is a story takes screenshot of the story """ reddit_object: dict screenshot_idx: list story_mode: Optional[bool] = attrib( validator=instance_of(bool), default=False, + kw_only=True ) + def __attrs_post_init__( + self, + ): + self.post_lang: Optional[bool] = settings.config["reddit"]["thread"]["post_lang"] + async def __dark_theme( self, page_instance: PageCls, @@ -176,33 +183,40 @@ class RedditScreenshot(Browser, Wait): await self.click( page_instance, "//*[contains(@class, 'header-user-dropdown')]", - {"timeout": 5000}, + find_options={"timeout": 5000}, ) # It's normal not to find it, sometimes there is none :shrug: await self.click( page_instance, "//*[contains(text(), 'Settings')]/ancestor::button[1]", - {"timeout": 5000}, + find_options={"timeout": 5000}, ) await self.click( page_instance, "//*[contains(text(), 'Dark Mode')]/ancestor::button[1]", - {"timeout": 5000}, + find_options={"timeout": 5000}, ) # Closes settings await self.click( page_instance, "//*[contains(@class, 'header-user-dropdown')]", - {"timeout": 5000}, + find_options={"timeout": 5000}, ) async def __close_nsfw( self, - page_instance: PageCls + page_instance: PageCls, ) -> None: + """ + Closes NSFW stuff + + Args: + page_instance: Instance of main page + """ + from asyncio import ensure_future print_substep("Post is NSFW. You are spicy...") @@ -213,17 +227,17 @@ class RedditScreenshot(Browser, Wait): await self.click( page_instance, '//button[text()="Yes"]', - {"timeout": 5000}, + find_options={"timeout": 5000}, ) # Await reload await navigation - await (await self.find_xpath( + await self.click( page_instance, '//button[text()="Click to see nsfw"]', - {"timeout": 5000}, - )).click() + find_options={"timeout": 5000}, + ) async def __collect_comment( self, @@ -241,19 +255,19 @@ class RedditScreenshot(Browser, Wait): await comment_page.goto(f'https://reddit.com{comment_obj["comment_url"]}') # Translates submission' comment - if settings.config["reddit"]["thread"]["post_lang"]: + if self.post_lang: comment_tl = ts.google( comment_obj["comment_body"], - to_language=settings.config["reddit"]["thread"]["post_lang"], + to_language=self.post_lang, ) await comment_page.evaluate( - f'([tl_content, tl_id]) => document.querySelector(`#t1_{comment_obj["comment_id"]} > div:nth-child(2) ' - f'> div > div[data-testid="comment"] > div`).textContent = {comment_tl}', + f"([tl_content, tl_id]) => document.querySelector('#t1_{comment_obj['comment_id']} > div:nth-child(2) " + f'> div > div[data-testid="comment"] > div\').textContent = {comment_tl}', ) await self.screenshot( comment_page, - f'//*[contains(@id, \'t1_{comment_obj["comment_id"]}\')]', + f"//*[contains(@id, 't1_{comment_obj['comment_id']}')]", {"path": f"assets/temp/png/comment_{filename_idx}.png"}, ) @@ -261,13 +275,12 @@ class RedditScreenshot(Browser, Wait): async def __collect_story( self, main_page: PageCls, - ): # Translates submission text - if settings.config["reddit"]["thread"]["post_lang"]: + if self.post_lang: story_tl = ts.google( self.reddit_object["thread_post"], - to_language=settings.config["reddit"]["thread"]["post_lang"], + to_language=self.post_lang, ) split_story_tl = story_tl.split('\n') await main_page.evaluate( @@ -304,6 +317,7 @@ class RedditScreenshot(Browser, Wait): # Get the thread screenshot reddit_main = await self.browser.newPage() + # noinspection Duplicates await reddit_main.goto(self.reddit_object["thread_url"]) if settings.config["settings"]["theme"] == "dark": @@ -322,13 +336,14 @@ class RedditScreenshot(Browser, Wait): ) await reddit_main.evaluate( - "tl_content => document.querySelector('[data-test-id=\"post-content\"] > div:nth-child(3) > div > " - "div').textContent = tl_content", - texts_in_tl, + "document.querySelector('[data-test-id=\"post-content\"] > div:nth-child(3) > div > " + f"div').textContent = {texts_in_tl}", ) else: print_substep("Skipping translation...") + # No sense to move it in common.py + # noinspection Duplicates async_tasks_primary = ( [ self.__collect_comment(self.reddit_object["comments"][idx], idx) for idx in @@ -348,13 +363,6 @@ class RedditScreenshot(Browser, Wait): ) ) - # Lots of tabs - lots of memory - # chunk needed to minimize memory required - def chunks(lst, n): - """Yield successive n-sized chunks from list.""" - for i in range(0, len(lst), n): - yield lst[i:i + n] - for idx, chunked_tasks in enumerate( [chunk for chunk in chunks(async_tasks_primary, 10)], start=1, From 1931b5e8e5dcf9c3b677eea47fd7202562a5eeca Mon Sep 17 00:00:00 2001 From: Drugsosos <44712637+Drugsosos@users.noreply.github.com> Date: Wed, 20 Jul 2022 03:43:53 +0300 Subject: [PATCH 35/39] fixes in async webdriver --- webdriver/common.py | 45 +++++++++++++--------------- webdriver/playwright.py | 66 ++++++++++++++++++++++------------------- webdriver/pyppeteer.py | 17 ++++++----- 3 files changed, 65 insertions(+), 63 deletions(-) diff --git a/webdriver/common.py b/webdriver/common.py index c1c9f35..5e544f8 100644 --- a/webdriver/common.py +++ b/webdriver/common.py @@ -5,21 +5,19 @@ from typing import TypeVar, Optional, Callable, Union _function = TypeVar("_function", bound=Callable[..., object]) _exceptions = TypeVar("_exceptions", bound=Optional[Union[type, tuple, list]]) +default_exception = None + @attrs class ExceptionDecorator: """ - Decorator factory for catching exceptions and writing logs + Decorator for catching exceptions and writing logs """ exception: Optional[_exceptions] = attrib(default=None) - _default_exception: Optional[_exceptions] = attrib( - kw_only=True, - default=None - ) def __attrs_post_init__(self): if not self.exception: - self.exception = self._default_exception + self.exception = default_exception def __call__( self, @@ -46,25 +44,24 @@ class ExceptionDecorator: return wrapper - @classmethod - def catch_exception( - cls, - func: Optional[_function], - exception: Optional[_exceptions] = None, - ) -> Union[object, _function]: - """ - Decorator for catching exceptions and writing logs - Args: - func: Function to be decorated - exception: Expected exception(s) - Returns: - Decorated function - """ - exceptor = cls(exception) - if func: - exceptor = exceptor(func) - return exceptor +def catch_exception( + func: Optional[_function], + exception: Optional[_exceptions] = None, +) -> Union[object, _function]: + """ + Decorator for catching exceptions and writing logs + + Args: + func: Function to be decorated + exception: Expected exception(s) + Returns: + Decorated function + """ + exceptor = ExceptionDecorator(exception) + if func: + exceptor = exceptor(func) + return exceptor # Lots of tabs - lots of memory diff --git a/webdriver/playwright.py b/webdriver/playwright.py index 87716a0..f7145bb 100644 --- a/webdriver/playwright.py +++ b/webdriver/playwright.py @@ -1,21 +1,21 @@ from asyncio import as_completed +from pathlib import Path +from typing import Dict, Optional +import translators as ts +from attr import attrs, attrib +from attr.validators import instance_of +from playwright.async_api import Browser, Playwright, Page, BrowserContext, ElementHandle from playwright.async_api import async_playwright, TimeoutError -from playwright.async_api import Browser, Playwright, Page, BrowserContext, Locator +from rich.progress import track -from pathlib import Path from utils import settings from utils.console import print_step, print_substep -import translators as ts -from rich.progress import track -from attr import attrs, attrib -from attr.validators import instance_of -from typing import Dict, Optional +import webdriver.common as common -from webdriver.common import ExceptionDecorator, chunks -catch_exception = ExceptionDecorator(default_exception=TimeoutError).catch_exception +common.default_exception = TimeoutError @attrs @@ -65,15 +65,19 @@ class Flaky: """ @staticmethod - @catch_exception - def find_element( - query: str, + @common.catch_exception + async def find_element( + selector: str, page_instance: Page, options: Optional[dict] = None, - ) -> Locator: - return page_instance.locator(query, **options) if options else page_instance.locator(query) + ) -> ElementHandle: + return ( + await page_instance.wait_for_selector(selector, **options) + if options + else await page_instance.wait_for_selector(selector) + ) - @catch_exception + @common.catch_exception async def click( self, page_instance: Optional[Page] = None, @@ -81,19 +85,19 @@ class Flaky: options: Optional[dict] = None, *, find_options: Optional[dict] = None, - element: Optional[Locator] = None, + element: Optional[ElementHandle] = None, ) -> None: if element: - await element.click(**options) if options else element.click() + await element.click(**options) if options else await element.click() else: results = ( - self.find_element(query, page_instance, **find_options) + await self.find_element(query, page_instance, **find_options) if find_options - else self.find_element(query, page_instance) + else await self.find_element(query, page_instance) ) await results.click(**options) if options else await results.click() - @catch_exception + @common.catch_exception async def screenshot( self, page_instance: Optional[Page] = None, @@ -101,15 +105,15 @@ class Flaky: options: Optional[dict] = None, *, find_options: Optional[dict] = None, - element: Optional[Locator] = None, + element: Optional[ElementHandle] = None, ) -> None: if element: await element.screenshot(**options) if options else await element.screenshot() else: results = ( - self.find_element(query, page_instance, **find_options) + await self.find_element(query, page_instance, **find_options) if find_options - else self.find_element(query, page_instance) + else await self.find_element(query, page_instance) ) await results.screenshot(**options) if options else await results.screenshot() @@ -135,7 +139,7 @@ class RedditScreenshot(Flaky, Browser): ): self.post_lang: Optional[bool] = settings.config["reddit"]["thread"]["post_lang"] - async def __dark_theme( + async def __dark_theme( # TODO isn't working self, page_instance: Page, ) -> None: @@ -148,24 +152,24 @@ class RedditScreenshot(Flaky, Browser): await self.click( page_instance, - "header-user-dropdown", + ".header-user-dropdown", ) # It's normal not to find it, sometimes there is none :shrug: await self.click( page_instance, - ":nth-match(button) >> 'Settings'", + "button >> span:has-text('Settings')", ) await self.click( page_instance, - ":nth-match(button) >> 'Dark Mode'", + "button >> span:has-text('Dark Mode')", ) # Closes settings await self.click( page_instance, - "header-user-dropdown" + ".header-user-dropdown" ) async def __close_nsfw( @@ -225,7 +229,7 @@ class RedditScreenshot(Flaky, Browser): await self.screenshot( comment_page, - f"id=t1_{comment_obj['comment_id']}", + f"[data-testid='post-container']", {"path": f"assets/temp/png/comment_{filename_idx}.png"}, ) @@ -255,7 +259,7 @@ class RedditScreenshot(Flaky, Browser): await self.screenshot( main_page, - '[data-click-id="text"]', + '[data-test-id="post-content"] > [data-click-id="text"]', {"path": "assets/temp/png/story_content.png"}, ) @@ -322,7 +326,7 @@ class RedditScreenshot(Flaky, Browser): ) for idx, chunked_tasks in enumerate( - [chunk for chunk in chunks(async_tasks_primary, 10)], + [chunk for chunk in common.chunks(async_tasks_primary, 10)], start=1, ): chunk_list = async_tasks_primary.__len__() // 10 + (1 if async_tasks_primary.__len__() % 10 != 0 else 0) diff --git a/webdriver/pyppeteer.py b/webdriver/pyppeteer.py index 9c630f6..b85a53d 100644 --- a/webdriver/pyppeteer.py +++ b/webdriver/pyppeteer.py @@ -16,9 +16,10 @@ from attr import attrs, attrib from attr.validators import instance_of from typing import Optional -from webdriver.common import ExceptionDecorator, chunks +import webdriver.common as common -catch_exception = ExceptionDecorator(default_exception=BrowserTimeoutError).catch_exception + +common.default_exception = BrowserTimeoutError @attrs @@ -60,7 +61,7 @@ class Browser: class Wait: @staticmethod - @catch_exception + @common.catch_exception async def find_xpath( page_instance: PageCls, xpath: Optional[str] = None, @@ -93,7 +94,7 @@ class Wait: el = await page_instance.waitForXPath(xpath) return el - @catch_exception + @common.catch_exception async def click( self, page_instance: Optional[PageCls] = None, @@ -120,7 +121,7 @@ class Wait: else: await el.click() - @catch_exception + @common.catch_exception async def screenshot( self, page_instance: Optional[PageCls] = None, @@ -297,7 +298,7 @@ class RedditScreenshot(Browser, Wait): await self.screenshot( main_page, - "//*[@data-click-id='text']", + "//div[@data-click-id='post-container']/child::div[@data-click-id='text']", {"path": "assets/temp/png/story_content.png"}, ) @@ -358,13 +359,13 @@ class RedditScreenshot(Browser, Wait): async_tasks_primary.append( self.screenshot( reddit_main, - f'//*[contains(@id, \'t3_{self.reddit_object["thread_id"]}\')]', + f'//*[@data-testid="post-container"]', {"path": "assets/temp/png/title.png"}, ) ) for idx, chunked_tasks in enumerate( - [chunk for chunk in chunks(async_tasks_primary, 10)], + [chunk for chunk in common.chunks(async_tasks_primary, 10)], start=1, ): chunk_list = async_tasks_primary.__len__() // 10 + (1 if async_tasks_primary.__len__() % 10 != 0 else 0) From 9c81447096a8e4588f3d97ac146739dabfe5dbe9 Mon Sep 17 00:00:00 2001 From: Drugsosos <44712637+Drugsosos@users.noreply.github.com> Date: Wed, 20 Jul 2022 23:29:00 +0300 Subject: [PATCH 36/39] changed locators a bit & updated chunks generator --- webdriver/common.py | 20 ++++++++++++++++---- webdriver/playwright.py | 24 +++++++++++++----------- webdriver/pyppeteer.py | 28 +++++++++++++++------------- 3 files changed, 44 insertions(+), 28 deletions(-) diff --git a/webdriver/common.py b/webdriver/common.py index 5e544f8..0061b3b 100644 --- a/webdriver/common.py +++ b/webdriver/common.py @@ -66,7 +66,19 @@ def catch_exception( # Lots of tabs - lots of memory # chunk needed to minimize memory required -def chunks(lst, n): - """Yield successive n-sized chunks from list.""" - for i in range(0, len(lst), n): - yield lst[i:i + n] +def chunks( + array: list, + size: int, +): + """ + Yield successive n-sized chunks from list. + + Args: + array: List to be chunked + size: size of a chunk + + Returns: + Generator with chunked list + """ + for i in range(0, len(array), size): + yield array[i:i + size] diff --git a/webdriver/playwright.py b/webdriver/playwright.py index f7145bb..b9bd0fb 100644 --- a/webdriver/playwright.py +++ b/webdriver/playwright.py @@ -14,7 +14,6 @@ from utils.console import print_step, print_substep import webdriver.common as common - common.default_exception = TimeoutError @@ -188,7 +187,7 @@ class RedditScreenshot(Flaky, Browser): # Triggers indirectly reload await self.click( page_instance, - 'button:has-text("Yes")', + "button:has-text('Yes')", {"timeout": 5000}, ) @@ -197,7 +196,7 @@ class RedditScreenshot(Flaky, Browser): await self.click( page_instance, - 'button:has-text("Click to see nsfw")', + "button:has-text('Click to see nsfw')", {"timeout": 5000}, ) @@ -229,7 +228,7 @@ class RedditScreenshot(Flaky, Browser): await self.screenshot( comment_page, - f"[data-testid='post-container']", + f"id=t1_{comment_obj['comment_id']}", {"path": f"assets/temp/png/comment_{filename_idx}.png"}, ) @@ -246,20 +245,23 @@ class RedditScreenshot(Flaky, Browser): ) split_story_tl = story_tl.split('\n') await main_page.evaluate( - # Find all elements - 'var elements = document.querySelectorAll(`[data-test-id="post-content"]' - ' > [data-click-id="text"] > div > p`);' + # Find all elements with story text + "const elements = document.querySelectorAll('[data-test-id=\"post-content\"]" + " > [data-click-id=\"text\"] > div > p');" # Set array with translated text - f"var texts = {split_story_tl};" + f"const texts = {split_story_tl};" # Map 2 arrays together - "var text_map = texts.map(function(e, i) { return [e, elements[i]]; });" + "const concat = (element, i) => [element, elements[i]];" + "const mappedTexts = texts.map(concat);" # Change text on the page - "for (i = 0; i < text_map.length; ++i) { text_map[i][1].textContent = text_map[i][0] ; };" + "for (i = 0; i < mappedTexts.length; ++i) {" + "mappedTexts[i][1].textContent = mappedTexts[i][0];" + "};" ) await self.screenshot( main_page, - '[data-test-id="post-content"] > [data-click-id="text"]', + "data-test-id='post-content' > data-click-id='text'", {"path": "assets/temp/png/story_content.png"}, ) diff --git a/webdriver/pyppeteer.py b/webdriver/pyppeteer.py index b85a53d..b2c1e2a 100644 --- a/webdriver/pyppeteer.py +++ b/webdriver/pyppeteer.py @@ -18,7 +18,6 @@ from typing import Optional import webdriver.common as common - common.default_exception = BrowserTimeoutError @@ -183,27 +182,27 @@ class RedditScreenshot(Browser, Wait): await self.click( page_instance, - "//*[contains(@class, 'header-user-dropdown')]", + "//div[@class='header-user-dropdown']", find_options={"timeout": 5000}, ) # It's normal not to find it, sometimes there is none :shrug: await self.click( page_instance, - "//*[contains(text(), 'Settings')]/ancestor::button[1]", + "//span[text()='Settings']/ancestor::button[1]", find_options={"timeout": 5000}, ) await self.click( page_instance, - "//*[contains(text(), 'Dark Mode')]/ancestor::button[1]", + "//span[text()='Dark Mode']/ancestor::button[1]", find_options={"timeout": 5000}, ) # Closes settings await self.click( page_instance, - "//*[contains(@class, 'header-user-dropdown')]", + "//div[@class='header-user-dropdown']", find_options={"timeout": 5000}, ) @@ -268,7 +267,7 @@ class RedditScreenshot(Browser, Wait): await self.screenshot( comment_page, - f"//*[contains(@id, 't1_{comment_obj['comment_id']}')]", + f"//div[@id='t1_{comment_obj['comment_id']}']", {"path": f"assets/temp/png/comment_{filename_idx}.png"}, ) @@ -285,15 +284,18 @@ class RedditScreenshot(Browser, Wait): ) split_story_tl = story_tl.split('\n') await main_page.evaluate( - # Find all elements - 'var elements = document.querySelectorAll(`[data-test-id="post-content"]' - ' > [data-click-id="text"] > div > p`);' + # Find all elements with story text + "const elements = document.querySelectorAll('[data-test-id=\"post-content\"]" + " > [data-click-id=\"text\"] > div > p');" # Set array with translated text - f"var texts = {split_story_tl};" + f"const texts = {split_story_tl};" # Map 2 arrays together - "var text_map = texts.map(function(e, i) { return [e, elements[i]]; });" + "const concat = (element, i) => [element, elements[i]];" + "const mappedTexts = texts.map(concat);" # Change text on the page - "for (i = 0; i < text_map.length; ++i) { text_map[i][1].textContent = text_map[i][0] ; };" + "for (i = 0; i < mappedTexts.length; ++i) {" + "mappedTexts[i][1].textContent = mappedTexts[i][0];" + "};" ) await self.screenshot( @@ -359,7 +361,7 @@ class RedditScreenshot(Browser, Wait): async_tasks_primary.append( self.screenshot( reddit_main, - f'//*[@data-testid="post-container"]', + f"//div[@data-testid='post-container']", {"path": "assets/temp/png/title.png"}, ) ) From a93a086212d25f9d0137827f3cc4e10702b595c9 Mon Sep 17 00:00:00 2001 From: Drugsosos <44712637+Drugsosos@users.noreply.github.com> Date: Thu, 21 Jul 2022 00:19:25 +0300 Subject: [PATCH 37/39] fixed dark_mode & typos --- webdriver/playwright.py | 9 +++++---- webdriver/pyppeteer.py | 7 ++++--- 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/webdriver/playwright.py b/webdriver/playwright.py index b9bd0fb..c4be81f 100644 --- a/webdriver/playwright.py +++ b/webdriver/playwright.py @@ -244,6 +244,7 @@ class RedditScreenshot(Flaky, Browser): to_language=self.post_lang, ) split_story_tl = story_tl.split('\n') + await main_page.evaluate( # Find all elements with story text "const elements = document.querySelectorAll('[data-test-id=\"post-content\"]" @@ -280,7 +281,7 @@ class RedditScreenshot(Flaky, Browser): Path("assets/temp/png").mkdir(parents=True, exist_ok=True) # Get the thread screenshot - reddit_main = await self.browser.new_page() + reddit_main = await self.context.new_page() # noinspection Duplicates await reddit_main.goto(self.reddit_object["thread_url"]) @@ -292,11 +293,11 @@ class RedditScreenshot(Flaky, Browser): await self.__close_nsfw(reddit_main) # Translates submission title - if settings.config["reddit"]["thread"]["post_lang"]: + if self.post_lang: print_substep("Translating post...") texts_in_tl = ts.google( self.reddit_object["thread_title"], - to_language=settings.config["reddit"]["thread"]["post_lang"], + to_language=self.post_lang, ) await reddit_main.evaluate( @@ -306,7 +307,7 @@ class RedditScreenshot(Flaky, Browser): else: print_substep("Skipping translation...") - # No sense to move it in common.py + # No sense to move it to common.py # noinspection Duplicates async_tasks_primary = ( [ diff --git a/webdriver/pyppeteer.py b/webdriver/pyppeteer.py index b2c1e2a..7afc957 100644 --- a/webdriver/pyppeteer.py +++ b/webdriver/pyppeteer.py @@ -283,6 +283,7 @@ class RedditScreenshot(Browser, Wait): to_language=self.post_lang, ) split_story_tl = story_tl.split('\n') + await main_page.evaluate( # Find all elements with story text "const elements = document.querySelectorAll('[data-test-id=\"post-content\"]" @@ -331,11 +332,11 @@ class RedditScreenshot(Browser, Wait): await self.__close_nsfw(reddit_main) # Translates submission title - if settings.config["reddit"]["thread"]["post_lang"]: + if self.post_lang: print_substep("Translating post...") texts_in_tl = ts.google( self.reddit_object["thread_title"], - to_language=settings.config["reddit"]["thread"]["post_lang"], + to_language=self.post_lang, ) await reddit_main.evaluate( @@ -345,7 +346,7 @@ class RedditScreenshot(Browser, Wait): else: print_substep("Skipping translation...") - # No sense to move it in common.py + # No sense to move it to common.py # noinspection Duplicates async_tasks_primary = ( [ From 374263b35104b040658ca2a19d8e9ce302bb1358 Mon Sep 17 00:00:00 2001 From: Drugsosos <44712637+Drugsosos@users.noreply.github.com> Date: Thu, 21 Jul 2022 01:54:27 +0300 Subject: [PATCH 38/39] fixed evaluates in webdrivers --- webdriver/playwright.py | 30 +++++++++--------------------- webdriver/pyppeteer.py | 30 +++++++++--------------------- 2 files changed, 18 insertions(+), 42 deletions(-) diff --git a/webdriver/playwright.py b/webdriver/playwright.py index c4be81f..30ea32c 100644 --- a/webdriver/playwright.py +++ b/webdriver/playwright.py @@ -222,8 +222,8 @@ class RedditScreenshot(Flaky, Browser): to_language=self.post_lang, ) await comment_page.evaluate( - f"document.querySelector('#t1_{comment_obj['comment_id']} > div:nth-child(2) " - f'> div > div[data-testid="comment"] > div\').textContent = {comment_tl}', + '([comment_id, comment_tl]) => document.querySelector(`#t1_${comment_id} > div:nth-child(2) > div > div[data-testid="comment"] > div`).textContent = comment_tl', # noqa + [comment_obj["comment_id"], comment_tl], ) await self.screenshot( @@ -246,23 +246,13 @@ class RedditScreenshot(Flaky, Browser): split_story_tl = story_tl.split('\n') await main_page.evaluate( - # Find all elements with story text - "const elements = document.querySelectorAll('[data-test-id=\"post-content\"]" - " > [data-click-id=\"text\"] > div > p');" - # Set array with translated text - f"const texts = {split_story_tl};" - # Map 2 arrays together - "const concat = (element, i) => [element, elements[i]];" - "const mappedTexts = texts.map(concat);" - # Change text on the page - "for (i = 0; i < mappedTexts.length; ++i) {" - "mappedTexts[i][1].textContent = mappedTexts[i][0];" - "};" + "(split_story_tl) => split_story_tl.map(function(element, i) { return [element, document.querySelectorAll('[data-test-id=\"post-content\"] > [data-click-id=\"text\"] > div > p')[i]]; }).forEach(mappedElement => mappedElement[1].textContent = mappedElement[0])", # noqa + split_story_tl, ) await self.screenshot( main_page, - "data-test-id='post-content' > data-click-id='text'", + "//div[@data-test-id='post-content']//div[@data-click-id='text']", {"path": "assets/temp/png/story_content.png"}, ) @@ -282,8 +272,7 @@ class RedditScreenshot(Flaky, Browser): # Get the thread screenshot reddit_main = await self.context.new_page() - # noinspection Duplicates - await reddit_main.goto(self.reddit_object["thread_url"]) + await reddit_main.goto(self.reddit_object["thread_url"]) # noqa if settings.config["settings"]["theme"] == "dark": await self.__dark_theme(reddit_main) @@ -301,15 +290,14 @@ class RedditScreenshot(Flaky, Browser): ) await reddit_main.evaluate( - "document.querySelector('[data-test-id=\"post-content\"] > div:nth-child(3) > div > " - f"div').textContent = {texts_in_tl}", + f"(texts_in_tl) => document.querySelector('[data-test-id=\"post-content\"] > div:nth-child(3) > div > div').textContent = texts_in_tl", # noqa + texts_in_tl, ) else: print_substep("Skipping translation...") # No sense to move it to common.py - # noinspection Duplicates - async_tasks_primary = ( + async_tasks_primary = ( # noqa [ self.__collect_comment(self.reddit_object["comments"][idx], idx) for idx in self.screenshot_idx diff --git a/webdriver/pyppeteer.py b/webdriver/pyppeteer.py index 7afc957..d38c3a3 100644 --- a/webdriver/pyppeteer.py +++ b/webdriver/pyppeteer.py @@ -261,8 +261,8 @@ class RedditScreenshot(Browser, Wait): to_language=self.post_lang, ) await comment_page.evaluate( - f"([tl_content, tl_id]) => document.querySelector('#t1_{comment_obj['comment_id']} > div:nth-child(2) " - f'> div > div[data-testid="comment"] > div\').textContent = {comment_tl}', + '([comment_id, comment_tl]) => document.querySelector(`#t1_${comment_id} > div:nth-child(2) > div > div[data-testid="comment"] > div`).textContent = comment_tl', # noqa + [comment_obj["comment_id"], comment_tl], ) await self.screenshot( @@ -285,23 +285,13 @@ class RedditScreenshot(Browser, Wait): split_story_tl = story_tl.split('\n') await main_page.evaluate( - # Find all elements with story text - "const elements = document.querySelectorAll('[data-test-id=\"post-content\"]" - " > [data-click-id=\"text\"] > div > p');" - # Set array with translated text - f"const texts = {split_story_tl};" - # Map 2 arrays together - "const concat = (element, i) => [element, elements[i]];" - "const mappedTexts = texts.map(concat);" - # Change text on the page - "for (i = 0; i < mappedTexts.length; ++i) {" - "mappedTexts[i][1].textContent = mappedTexts[i][0];" - "};" + "(split_story_tl) => split_story_tl.map(function(element, i) { return [element, document.querySelectorAll('[data-test-id=\"post-content\"] > [data-click-id=\"text\"] > div > p')[i]]; }).forEach(mappedElement => mappedElement[1].textContent = mappedElement[0])", # noqa + split_story_tl, ) await self.screenshot( main_page, - "//div[@data-click-id='post-container']/child::div[@data-click-id='text']", + "//div[@data-test-id='post-content']//div[@data-click-id='text']", {"path": "assets/temp/png/story_content.png"}, ) @@ -321,8 +311,7 @@ class RedditScreenshot(Browser, Wait): # Get the thread screenshot reddit_main = await self.browser.newPage() - # noinspection Duplicates - await reddit_main.goto(self.reddit_object["thread_url"]) + await reddit_main.goto(self.reddit_object["thread_url"]) # noqa if settings.config["settings"]["theme"] == "dark": await self.__dark_theme(reddit_main) @@ -340,15 +329,14 @@ class RedditScreenshot(Browser, Wait): ) await reddit_main.evaluate( - "document.querySelector('[data-test-id=\"post-content\"] > div:nth-child(3) > div > " - f"div').textContent = {texts_in_tl}", + f"(texts_in_tl) => document.querySelector('[data-test-id=\"post-content\"] > div:nth-child(3) > div > div').textContent = texts_in_tl", # noqa + texts_in_tl, ) else: print_substep("Skipping translation...") # No sense to move it to common.py - # noinspection Duplicates - async_tasks_primary = ( + async_tasks_primary = ( # noqa [ self.__collect_comment(self.reddit_object["comments"][idx], idx) for idx in self.screenshot_idx From c883dcba42bd4170634271fb1257459524fd94bc Mon Sep 17 00:00:00 2001 From: Drugsosos <44712637+Drugsosos@users.noreply.github.com> Date: Thu, 21 Jul 2022 02:22:27 +0300 Subject: [PATCH 39/39] hotfix in streamlabs_polly & GTTS --- TTS/GTTS.py | 2 +- TTS/streamlabs_polly.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/TTS/GTTS.py b/TTS/GTTS.py index 8974ddc..51dd92f 100644 --- a/TTS/GTTS.py +++ b/TTS/GTTS.py @@ -8,7 +8,7 @@ class GTTS: # voices = [] @staticmethod - async def run( + def run( text, filepath ) -> None: diff --git a/TTS/streamlabs_polly.py b/TTS/streamlabs_polly.py index a0b7e19..3cc8cb9 100644 --- a/TTS/streamlabs_polly.py +++ b/TTS/streamlabs_polly.py @@ -71,7 +71,7 @@ class StreamlabsPolly(BaseApiTTS): else: try: results = requests.get(response.json()["speak_url"]) - return results + return results.content except (KeyError, JSONDecodeError): try: if response.json()["error"] == "No text specified!":