Fixed error when taking screenshots

We must improve the screenshot to the comments (I do not have time to do if someone does it would be good) If the video generation fails, re-launch it, as it depends on the network and machine you have, it will be more or less likely to fail.
12 months ago · 097e29bbd2
parent 3d4c34d60c
commit 097e29bbd2
1 changed files with 60 additions and 23 deletions
--- a/video_creation/screenshot_downloader.py
+++ b/video_creation/screenshot_downloader.py
@ -73,7 +73,7 @@ def get_screenshots_of_reddit_posts(reddit_object: dict, screenshot_num: int):
    with sync_playwright() as p:
        print_substep("Launching Headless Browser...")

-        browser = p.chromium.launch(
+        browser = p.firefox.launch(
            headless=True
        )  # headless=False will show the browser for debugging purposes
        # Device scale factor (or dsf for short) allows us to increase the resolution of the screenshots
@ -99,6 +99,7 @@ def get_screenshots_of_reddit_posts(reddit_object: dict, screenshot_num: int):
        page.goto("https://www.reddit.com/login", timeout=0)
        page.set_viewport_size(ViewportSize(width=1920, height=1080))
        page.wait_for_load_state()
+        page.wait_for_timeout(timeout=500)

        page.locator(f'input[name="username"]').fill(settings.config["reddit"]["creds"]["username"])
        page.locator(f'input[name="password"]').fill(settings.config["reddit"]["creds"]["password"])
@ -160,10 +161,13 @@ def get_screenshots_of_reddit_posts(reddit_object: dict, screenshot_num: int):
                reddit_object["thread_title"],
                to_language=lang,
                translator="google",
-            )
+            )   
+
+            page.wait_for_timeout(2000) #small delay
+            page.wait_for_selector('shreddit-post > h1', timeout=5000) # we wait for the title to appere 

            page.evaluate(
-                "tl_content => document.querySelector('[data-adclicklocation=\"title\"] > div > div > h1').textContent = tl_content",
+                "tl_content => document.querySelector('shreddit-post > h1').textContent = tl_content",
                texts_in_tl,
            )
        else:
@ -177,12 +181,12 @@ def get_screenshots_of_reddit_posts(reddit_object: dict, screenshot_num: int):
                # zoom the body of the page
                page.evaluate("document.body.style.zoom=" + str(zoom))
                # as zooming the body doesn't change the properties of the divs, we need to adjust for the zoom
-                location = page.locator('[data-test-id="post-content"]').bounding_box()
+                location = page.locator('shreddit-post').bounding_box()
                for i in location:
                    location[i] = float("{:.2f}".format(location[i] * zoom))
                page.screenshot(clip=location, path=postcontentpath)
            else:
-                page.locator('[data-test-id="post-content"]').screenshot(path=postcontentpath)
+                page.locator('shreddit-post').screenshot(path=postcontentpath)
        except Exception as e:
            print_substep("Something went wrong!", style="red")
            resp = input(
@ -220,7 +224,7 @@ def get_screenshots_of_reddit_posts(reddit_object: dict, screenshot_num: int):
                if page.locator('[data-testid="content-gate"]').is_visible():
                    page.locator('[data-testid="content-gate"] button').click()

-                page.goto(f"https://new.reddit.com/{comment['comment_url']}")
+                page.goto(f"https://new.reddit.com{comment['comment_url']}")

                # translate code

@ -230,30 +234,63 @@ def get_screenshots_of_reddit_posts(reddit_object: dict, screenshot_num: int):
                        translator="google",
                        to_language=settings.config["reddit"]["thread"]["post_lang"],
                    )
+                    
+                    page.wait_for_timeout(timeout=1500)
                    page.evaluate(
-                        '([tl_content, tl_id]) => document.querySelector(`#t1_${tl_id} > div:nth-child(2) > div > div[data-testid="comment"] > div`).textContent = tl_content',
+                        '([tl_content, tl_id]) => document.querySelector(`#t1_${tl_id}-comment-rtjson-content > div:nth-child(1) > p`).textContent = tl_content',
                        [comment_tl, comment["comment_id"]],
                    )
                try:
                    if settings.config["settings"]["zoom"] != 1:
-                        # store zoom settings
+                       if settings.config["settings"]["zoom"] != 1:
+                        # Store zoom settings
                        zoom = settings.config["settings"]["zoom"]
-                        # zoom the body of the page
-                        page.evaluate("document.body.style.zoom=" + str(zoom))
-                        # scroll comment into view
-                        page.locator(f"#t1_{comment['comment_id']}").scroll_into_view_if_needed()
-                        # as zooming the body doesn't change the properties of the divs, we need to adjust for the zoom
-                        location = page.locator(f"#t1_{comment['comment_id']}").bounding_box()
-                        for i in location:
-                            location[i] = float("{:.2f}".format(location[i] * zoom))
-                        page.screenshot(
-                            clip=location,
-                            path=f"assets/temp/{reddit_id}/png/comment_{idx}.png",
-                        )
+
+                        # Zoom the body of the page
+                        page.evaluate(f"document.body.style.zoom = {zoom}")
+
+                        # Scroll the parent of the comment into view
+                        parent_locator = page.locator(f"#t1_{comment['comment_id']}-comment-rtjson-content").locator('xpath=..')
+                        parent_locator.scroll_into_view_if_needed()
+
+                        # Get the bounding box of the parent element
+                        location = parent_locator.bounding_box()
+
+                        # Ensure bounding box is valid
+                        if location:
+                            # Adjust for zoom (since zooming doesn't change the div's actual properties)
+                            location['x'] = float("{:.2f}".format(location['x'] * zoom))
+                            location['y'] = float("{:.2f}".format(location['y'] * zoom))
+                            location['width'] = float("{:.2f}".format(location['width'] * zoom))
+                            location['height'] = float("{:.2f}".format(location['height'] * zoom))
+
+                            # Adjust the height to be exactly 200px
+                            location['height'] = 200
+
+                            # Take a screenshot of the parent element with the adjusted height
+                            page.screenshot(
+                                clip=location,
+                                path=f"assets/temp/{reddit_id}/png/comment_{idx}_parent.png",
+                            )
+                        else:
+                            print("Could not get the bounding box of the parent element.")
                    else:
-                        page.locator(f"#t1_{comment['comment_id']}").screenshot(
-                            path=f"assets/temp/{reddit_id}/png/comment_{idx}.png"
-                        )
+                        # getting the bounding_box
+                        parent_locator = page.locator(f"#t1_{comment['comment_id']}-comment-rtjson-content").locator('xpath=..')
+                        location = parent_locator.bounding_box()
+
+                        # bounding_box is valid ?
+                        if location:
+                            # ajust 200px
+                            location['height'] = 200
+
+                            #  make screenshot
+                            page.screenshot(
+                                path=f"assets/temp/{reddit_id}/png/comment_{idx}.png",
+                                clip=location  # resize to 200px
+                            )
+                        else:
+                            print("No se pudo obtener el cuadro delimitador del elemento.")
                except TimeoutError:
                    del reddit_object["comments"]
                    screenshot_num += 1