From 097e29bbd2be932e66257f6803818b55c9e8fbb5 Mon Sep 17 00:00:00 2001 From: "Mohamed B." Date: Mon, 23 Sep 2024 20:05:07 +0200 Subject: [PATCH] Fixed error when taking screenshots We must improve the screenshot to the comments (I do not have time to do if someone does it would be good) If the video generation fails, re-launch it, as it depends on the network and machine you have, it will be more or less likely to fail. --- video_creation/screenshot_downloader.py | 83 ++++++++++++++++++------- 1 file changed, 60 insertions(+), 23 deletions(-) diff --git a/video_creation/screenshot_downloader.py b/video_creation/screenshot_downloader.py index 6b56e99..fb86b2a 100644 --- a/video_creation/screenshot_downloader.py +++ b/video_creation/screenshot_downloader.py @@ -73,7 +73,7 @@ def get_screenshots_of_reddit_posts(reddit_object: dict, screenshot_num: int): with sync_playwright() as p: print_substep("Launching Headless Browser...") - browser = p.chromium.launch( + browser = p.firefox.launch( headless=True ) # headless=False will show the browser for debugging purposes # Device scale factor (or dsf for short) allows us to increase the resolution of the screenshots @@ -99,6 +99,7 @@ def get_screenshots_of_reddit_posts(reddit_object: dict, screenshot_num: int): page.goto("https://www.reddit.com/login", timeout=0) page.set_viewport_size(ViewportSize(width=1920, height=1080)) page.wait_for_load_state() + page.wait_for_timeout(timeout=500) page.locator(f'input[name="username"]').fill(settings.config["reddit"]["creds"]["username"]) page.locator(f'input[name="password"]').fill(settings.config["reddit"]["creds"]["password"]) @@ -160,10 +161,13 @@ def get_screenshots_of_reddit_posts(reddit_object: dict, screenshot_num: int): reddit_object["thread_title"], to_language=lang, translator="google", - ) + ) + + page.wait_for_timeout(2000) #small delay + page.wait_for_selector('shreddit-post > h1', timeout=5000) # we wait for the title to appere page.evaluate( - "tl_content => document.querySelector('[data-adclicklocation=\"title\"] > div > div > h1').textContent = tl_content", + "tl_content => document.querySelector('shreddit-post > h1').textContent = tl_content", texts_in_tl, ) else: @@ -177,12 +181,12 @@ def get_screenshots_of_reddit_posts(reddit_object: dict, screenshot_num: int): # zoom the body of the page page.evaluate("document.body.style.zoom=" + str(zoom)) # as zooming the body doesn't change the properties of the divs, we need to adjust for the zoom - location = page.locator('[data-test-id="post-content"]').bounding_box() + location = page.locator('shreddit-post').bounding_box() for i in location: location[i] = float("{:.2f}".format(location[i] * zoom)) page.screenshot(clip=location, path=postcontentpath) else: - page.locator('[data-test-id="post-content"]').screenshot(path=postcontentpath) + page.locator('shreddit-post').screenshot(path=postcontentpath) except Exception as e: print_substep("Something went wrong!", style="red") resp = input( @@ -220,7 +224,7 @@ def get_screenshots_of_reddit_posts(reddit_object: dict, screenshot_num: int): if page.locator('[data-testid="content-gate"]').is_visible(): page.locator('[data-testid="content-gate"] button').click() - page.goto(f"https://new.reddit.com/{comment['comment_url']}") + page.goto(f"https://new.reddit.com{comment['comment_url']}") # translate code @@ -230,30 +234,63 @@ def get_screenshots_of_reddit_posts(reddit_object: dict, screenshot_num: int): translator="google", to_language=settings.config["reddit"]["thread"]["post_lang"], ) + + page.wait_for_timeout(timeout=1500) page.evaluate( - '([tl_content, tl_id]) => document.querySelector(`#t1_${tl_id} > div:nth-child(2) > div > div[data-testid="comment"] > div`).textContent = tl_content', + '([tl_content, tl_id]) => document.querySelector(`#t1_${tl_id}-comment-rtjson-content > div:nth-child(1) > p`).textContent = tl_content', [comment_tl, comment["comment_id"]], ) try: if settings.config["settings"]["zoom"] != 1: - # store zoom settings + if settings.config["settings"]["zoom"] != 1: + # Store zoom settings zoom = settings.config["settings"]["zoom"] - # zoom the body of the page - page.evaluate("document.body.style.zoom=" + str(zoom)) - # scroll comment into view - page.locator(f"#t1_{comment['comment_id']}").scroll_into_view_if_needed() - # as zooming the body doesn't change the properties of the divs, we need to adjust for the zoom - location = page.locator(f"#t1_{comment['comment_id']}").bounding_box() - for i in location: - location[i] = float("{:.2f}".format(location[i] * zoom)) - page.screenshot( - clip=location, - path=f"assets/temp/{reddit_id}/png/comment_{idx}.png", - ) + + # Zoom the body of the page + page.evaluate(f"document.body.style.zoom = {zoom}") + + # Scroll the parent of the comment into view + parent_locator = page.locator(f"#t1_{comment['comment_id']}-comment-rtjson-content").locator('xpath=..') + parent_locator.scroll_into_view_if_needed() + + # Get the bounding box of the parent element + location = parent_locator.bounding_box() + + # Ensure bounding box is valid + if location: + # Adjust for zoom (since zooming doesn't change the div's actual properties) + location['x'] = float("{:.2f}".format(location['x'] * zoom)) + location['y'] = float("{:.2f}".format(location['y'] * zoom)) + location['width'] = float("{:.2f}".format(location['width'] * zoom)) + location['height'] = float("{:.2f}".format(location['height'] * zoom)) + + # Adjust the height to be exactly 200px + location['height'] = 200 + + # Take a screenshot of the parent element with the adjusted height + page.screenshot( + clip=location, + path=f"assets/temp/{reddit_id}/png/comment_{idx}_parent.png", + ) + else: + print("Could not get the bounding box of the parent element.") else: - page.locator(f"#t1_{comment['comment_id']}").screenshot( - path=f"assets/temp/{reddit_id}/png/comment_{idx}.png" - ) + # getting the bounding_box + parent_locator = page.locator(f"#t1_{comment['comment_id']}-comment-rtjson-content").locator('xpath=..') + location = parent_locator.bounding_box() + + # bounding_box is valid ? + if location: + # ajust 200px + location['height'] = 200 + + # make screenshot + page.screenshot( + path=f"assets/temp/{reddit_id}/png/comment_{idx}.png", + clip=location # resize to 200px + ) + else: + print("No se pudo obtener el cuadro delimitador del elemento.") except TimeoutError: del reddit_object["comments"] screenshot_num += 1