RedditVideoMakerBot/video_creation/screenshot_downloader.py

import json

from pathlib import Path
from typing import Dict
from utils import settings
from playwright.async_api import async_playwright  # pylint: disable=unused-import

# do not remove the above line

from playwright.sync_api import sync_playwright, ViewportSize
from rich.progress import track
import translators as ts

from utils.console import print_step, print_substep

storymode = False


def download_screenshots_of_reddit_posts(reddit_object: dict, screenshot_num: int):
    """Downloads screenshots of reddit posts as seen on the web. Downloads to assets/temp/png

    Args:
        reddit_object (Dict): Reddit object received from reddit/subreddit.py
        screenshot_num (int): Number of screenshots to download
    """
    print_step("Downloading screenshots of reddit posts...")

    # ! Make sure the reddit screenshots folder exists
    Path("assets/temp/png").mkdir(parents=True, exist_ok=True)

    with sync_playwright() as p:
        print_substep("Launching Headless Browser...")

        browser = p.chromium.launch()
        context = browser.new_context()

        if settings.config["settings"]["theme"] == "dark":
            cookie_file = open("./video_creation/data/cookie-dark-mode.json", encoding="utf-8")
        else:
            cookie_file = open("./video_creation/data/cookie-light-mode.json", encoding="utf-8")
        cookies = json.load(cookie_file)
        context.add_cookies(cookies)  # load preference cookies
        # Get the thread screenshot
        page = context.new_page()
        page.goto(reddit_object["thread_url"], timeout=0)
        page.set_viewport_size(ViewportSize(width=1920, height=1080))
        if page.locator('[data-testid="content-gate"]').is_visible():
            # This means the post is NSFW and requires to click the proceed button.

            print_substep("Post is NSFW. You are spicy...")
            page.locator('[data-testid="content-gate"] button').click()
            page.wait_for_load_state()  # Wait for page to fully load

            if page.locator('[data-click-id="text"] button').is_visible():
                page.locator(
                    '[data-click-id="text"] button'
                ).click()  # Remove "Click to see nsfw" Button in Screenshot

        # translate code

        if settings.config["reddit"]["thread"]["post_lang"]:
            print_substep("Translating post...")
            texts_in_tl = ts.google(
                reddit_object["thread_title"],
                to_language=settings.config["reddit"]["thread"]["post_lang"],
            )

            page.evaluate(
                "tl_content => document.querySelector('[data-test-id=\"post-content\"] > div:nth-child(3) > div > div').textContent = tl_content",
                texts_in_tl,
            )
        else:
            print_substep("Skipping translation...")

        page.locator('[data-test-id="post-content"]').screenshot(path="assets/temp/png/title.png")

        if storymode:
            page.locator('[data-click-id="text"]').screenshot(
                path="assets/temp/png/story_content.png"
            )
        else:
            for idx, comment in enumerate(
                track(reddit_object["comments"], "Downloading screenshots...")
            ):
                # Stop if we have reached the screenshot_num
                if idx >= screenshot_num:
                    break

                if page.locator('[data-testid="content-gate"]').is_visible():
                    page.locator('[data-testid="content-gate"] button').click()

                page.goto(f'https://reddit.com{comment["comment_url"]}', timeout=0)

                # translate code

                if settings.config["reddit"]["thread"]["post_lang"]:
                    comment_tl = ts.google(
                        comment["comment_body"],
                        to_language=settings.config["reddit"]["thread"]["post_lang"],
                    )
                    page.evaluate(
                        '([tl_content, tl_id]) => document.querySelector(`#t1_${tl_id} > div:nth-child(2) > div > div[data-testid="comment"] > div`).textContent = tl_content',
                        [comment_tl, comment["comment_id"]],
                    )
                try:
                    page.locator(f"#t1_{comment['comment_id']}").screenshot(
                        path=f"assets/temp/png/comment_{idx}.png"
                    )
                except TimeoutError:
                    del reddit_object["comments"]
                    screenshot_num += 1
                    print("TimeoutError: Skipping screenshot...")
                    continue
        print_substep("Screenshots downloaded Successfully.", style="bold green")
feat: added dark mode screenshots closes #13 3 years ago			`import json`
refactor: screenshot_downloader.py uses toml config 2 years ago
Initial commit 3 years ago			`from pathlib import Path`
fixed the typehiting that @HallowedDust5 broke for py versions < 3.9 2 years ago			`from typing import Dict`
refactor: screenshot_downloader.py uses toml config 2 years ago			`from utils import settings`
Remove unused imports and fix pylint errors 'async_playwright' must stay due to anomalous error 2 years ago			`from playwright.async_api import async_playwright # pylint: disable=unused-import`
refactor: moved save_data func to utils/videos.py from final_video.py refactor: refactored using black formatter. cmd: black . --line-length 101 2 years ago
Remove unused imports and fix pylint errors 'async_playwright' must stay due to anomalous error 2 years ago			`# do not remove the above line`
Merge branch 'develop' into pr/ProjectSlxsh/418 2 years ago
Remove unused imports and fix pylint errors 'async_playwright' must stay due to anomalous error 2 years ago			`from playwright.sync_api import sync_playwright, ViewportSize`
			`from rich.progress import track`
downloader mods 2 years ago			`import translators as ts`

Remove unused imports and fix pylint errors 'async_playwright' must stay due to anomalous error 2 years ago			`from utils.console import print_step, print_substep`
Initial commit 3 years ago
added IN-PROGRESS storymode flag chore: changed print sizing feat: started working on #10 2 years ago			`storymode = False`

backup 3 years ago
style: improved typehinting 2 years ago			`def download_screenshots_of_reddit_posts(reddit_object: dict, screenshot_num: int):`
Added typing and redid function comment 2 years ago			`"""Downloads screenshots of reddit posts as seen on the web. Downloads to assets/temp/png`

reformatted using black command "black . --line-length 101" 2 years ago			`Args:`
docs: improved examples in .config.template.toml fix: mutability issue in settings.py style: added autocomplete support for the settings.config var style: fixed autocomplete issues overall 2 years ago			`reddit_object (Dict): Reddit object received from reddit/subreddit.py`
			`screenshot_num (int): Number of screenshots to download`
Formatted with python-black 2 years ago			`"""`
reformatted using black command "black . --line-length 101" 2 years ago			`print_step("Downloading screenshots of reddit posts...")`
Initial commit 3 years ago
reformatted using black command "black . --line-length 101" 2 years ago			`# ! Make sure the reddit screenshots folder exists`
			`Path("assets/temp/png").mkdir(parents=True, exist_ok=True)`
Initial commit 3 years ago
reformatted using black command "black . --line-length 101" 2 years ago			`with sync_playwright() as p:`
			`print_substep("Launching Headless Browser...")`
Initial commit 3 years ago
reformatted using black command "black . --line-length 101" 2 years ago			`browser = p.chromium.launch()`
			`context = browser.new_context()`
Initial commit 3 years ago
refactor: screenshot_downloader.py uses toml config 2 years ago			`if settings.config["settings"]["theme"] == "dark":`
style: reformatted 2 years ago			`cookie_file = open("./video_creation/data/cookie-dark-mode.json", encoding="utf-8")`
reformatted using black command "black . --line-length 101" 2 years ago			`else:`
style: reformatted 2 years ago			`cookie_file = open("./video_creation/data/cookie-light-mode.json", encoding="utf-8")`
reformatted using black command "black . --line-length 101" 2 years ago			`cookies = json.load(cookie_file)`
			`context.add_cookies(cookies) # load preference cookies`
			`# Get the thread screenshot`
			`page = context.new_page()`
added timeout prevention 2 years ago			`page.goto(reddit_object["thread_url"], timeout=0)`
reformatted using black command "black . --line-length 101" 2 years ago			`page.set_viewport_size(ViewportSize(width=1920, height=1080))`
			`if page.locator('[data-testid="content-gate"]').is_visible():`
			`# This means the post is NSFW and requires to click the proceed button.`
Initial commit 3 years ago
reformatted using black command "black . --line-length 101" 2 years ago			`print_substep("Post is NSFW. You are spicy...")`
			`page.locator('[data-testid="content-gate"] button').click()`
style: formatted with black 2 years ago			`page.wait_for_load_state() # Wait for page to fully load`
potentially fixes #978 2 years ago
			`if page.locator('[data-click-id="text"] button').is_visible():`
reformatted code 2 years ago			`page.locator(`
			`'[data-click-id="text"] button'`
			`).click() # Remove "Click to see nsfw" Button in Screenshot`
Initial commit 3 years ago
downloader mods 2 years ago			`# translate code`

refactor: screenshot_downloader.py uses toml config 2 years ago			`if settings.config["reddit"]["thread"]["post_lang"]:`
downloader mods 2 years ago			`print_substep("Translating post...")`
reformatted code 2 years ago			`texts_in_tl = ts.google(`
			`reddit_object["thread_title"],`
			`to_language=settings.config["reddit"]["thread"]["post_lang"],`
			`)`
downloader mods 2 years ago
removed old img modify, added textcontent change 2 years ago			`page.evaluate(`
Formatted with python-black 2 years ago			`"tl_content => document.querySelector('[data-test-id=\"post-content\"] > div:nth-child(3) > div > div').textContent = tl_content",`
reformatted code 2 years ago			`texts_in_tl,`
			`)`
downloader mods 2 years ago			`else:`
			`print_substep("Skipping translation...")`

style: reformatted 2 years ago			`page.locator('[data-test-id="post-content"]').screenshot(path="assets/temp/png/title.png")`
removed old img modify, added textcontent change 2 years ago
reformatted using black command "black . --line-length 101" 2 years ago			`if storymode:`
reformatted code 2 years ago			`page.locator('[data-click-id="text"]').screenshot(`
			`path="assets/temp/png/story_content.png"`
			`)`
reformatted using black command "black . --line-length 101" 2 years ago			`else:`
reformatted code 2 years ago			`for idx, comment in enumerate(`
			`track(reddit_object["comments"], "Downloading screenshots...")`
			`):`
reformatted using black command "black . --line-length 101" 2 years ago			`# Stop if we have reached the screenshot_num`
			`if idx >= screenshot_num:`
			`break`
Initial commit 3 years ago
reformatted using black command "black . --line-length 101" 2 years ago			`if page.locator('[data-testid="content-gate"]').is_visible():`
			`page.locator('[data-testid="content-gate"] button').click()`
Initial commit 3 years ago
added timeout prevention 2 years ago			`page.goto(f'https://reddit.com{comment["comment_url"]}', timeout=0)`
removed old img modify, added textcontent change 2 years ago
			`# translate code`

refactor: screenshot_downloader.py uses toml config 2 years ago			`if settings.config["reddit"]["thread"]["post_lang"]:`
reformatted code 2 years ago			`comment_tl = ts.google(`
			`comment["comment_body"],`
			`to_language=settings.config["reddit"]["thread"]["post_lang"],`
			`)`
removed old img modify, added textcontent change 2 years ago			`page.evaluate(`
Formatted with python-black 2 years ago			'([tl_content, tl_id]) => document.querySelector(`#t1_${tl_id} > div:nth-child(2) > div > div[data-testid="comment"] > div`).textContent = tl_content',
reformatted code 2 years ago			`[comment_tl, comment["comment_id"]],`
			`)`
potentially fixes #978 2 years ago			`try:`
reformatted code 2 years ago			`page.locator(f"#t1_{comment['comment_id']}").screenshot(`
			`path=f"assets/temp/png/comment_{idx}.png"`
			`)`
potentially fixes #978 2 years ago			`except TimeoutError:`
			`del reddit_object["comments"]`
			`screenshot_num += 1`
reformatted code 2 years ago			`print("TimeoutError: Skipping screenshot...")`
potentially fixes #978 2 years ago			`continue`
reformatted using black command "black . --line-length 101" 2 years ago			`print_substep("Screenshots downloaded Successfully.", style="bold green")`