@ -73,7 +73,7 @@ def get_screenshots_of_reddit_posts(reddit_object: dict, screenshot_num: int):
with sync_playwright ( ) as p :
print_substep ( " Launching Headless Browser... " )
browser = p . chromium . launch (
browser = p . firefox . launch (
headless = True
) # headless=False will show the browser for debugging purposes
# Device scale factor (or dsf for short) allows us to increase the resolution of the screenshots
@ -99,6 +99,7 @@ def get_screenshots_of_reddit_posts(reddit_object: dict, screenshot_num: int):
page . goto ( " https://www.reddit.com/login " , timeout = 0 )
page . set_viewport_size ( ViewportSize ( width = 1920 , height = 1080 ) )
page . wait_for_load_state ( )
page . wait_for_timeout ( timeout = 500 )
page . locator ( f ' input[name= " username " ] ' ) . fill ( settings . config [ " reddit " ] [ " creds " ] [ " username " ] )
page . locator ( f ' input[name= " password " ] ' ) . fill ( settings . config [ " reddit " ] [ " creds " ] [ " password " ] )
@ -160,10 +161,13 @@ def get_screenshots_of_reddit_posts(reddit_object: dict, screenshot_num: int):
reddit_object [ " thread_title " ] ,
to_language = lang ,
translator = " google " ,
)
)
page . wait_for_timeout ( 2000 ) #small delay
page . wait_for_selector ( ' shreddit-post > h1 ' , timeout = 5000 ) # we wait for the title to appere
page . evaluate (
" tl_content => document.querySelector( ' [data-adclicklocation= \" title \" ] > div > div > h1 ' ).textContent = tl_content " ,
" tl_content => document.querySelector( ' shreddit-post > h1' ).textContent = tl_content " ,
texts_in_tl ,
)
else :
@ -177,12 +181,12 @@ def get_screenshots_of_reddit_posts(reddit_object: dict, screenshot_num: int):
# zoom the body of the page
page . evaluate ( " document.body.style.zoom= " + str ( zoom ) )
# as zooming the body doesn't change the properties of the divs, we need to adjust for the zoom
location = page . locator ( ' [data-te st-id=" post-content" ] ' ) . bounding_box ( )
location = page . locator ( ' shreddi t-post' ) . bounding_box ( )
for i in location :
location [ i ] = float ( " {:.2f} " . format ( location [ i ] * zoom ) )
page . screenshot ( clip = location , path = postcontentpath )
else :
page . locator ( ' [data-te st-id=" post-content" ] ' ) . screenshot ( path = postcontentpath )
page . locator ( ' shreddi t-post' ) . screenshot ( path = postcontentpath )
except Exception as e :
print_substep ( " Something went wrong! " , style = " red " )
resp = input (
@ -220,7 +224,7 @@ def get_screenshots_of_reddit_posts(reddit_object: dict, screenshot_num: int):
if page . locator ( ' [data-testid= " content-gate " ] ' ) . is_visible ( ) :
page . locator ( ' [data-testid= " content-gate " ] button ' ) . click ( )
page . goto ( f " https://new.reddit.com / { comment [ ' comment_url ' ] } " )
page . goto ( f " https://new.reddit.com { comment [ ' comment_url ' ] } " )
# translate code
@ -230,30 +234,63 @@ def get_screenshots_of_reddit_posts(reddit_object: dict, screenshot_num: int):
translator = " google " ,
to_language = settings . config [ " reddit " ] [ " thread " ] [ " post_lang " ] ,
)
page . wait_for_timeout ( timeout = 1500 )
page . evaluate (
' ([tl_content, tl_id]) => document.querySelector(`#t1_$ {tl_id} > div:nth-child(2) > div > div[data-testid= " comment " ] > div`).textContent = tl_content ' ,
' ([tl_content, tl_id]) => document.querySelector(`#t1_$ {tl_id} -comment-rtjson-content > div:nth-child(1) > p `).textContent = tl_content' ,
[ comment_tl , comment [ " comment_id " ] ] ,
)
try :
if settings . config [ " settings " ] [ " zoom " ] != 1 :
# store zoom settings
if settings . config [ " settings " ] [ " zoom " ] != 1 :
# Store zoom settings
zoom = settings . config [ " settings " ] [ " zoom " ]
# zoom the body of the page
page . evaluate ( " document.body.style.zoom= " + str ( zoom ) )
# scroll comment into view
page . locator ( f " #t1_ { comment [ ' comment_id ' ] } " ) . scroll_into_view_if_needed ( )
# as zooming the body doesn't change the properties of the divs, we need to adjust for the zoom
location = page . locator ( f " #t1_ { comment [ ' comment_id ' ] } " ) . bounding_box ( )
for i in location :
location [ i ] = float ( " {:.2f} " . format ( location [ i ] * zoom ) )
page . screenshot (
clip = location ,
path = f " assets/temp/ { reddit_id } /png/comment_ { idx } .png " ,
)
# Zoom the body of the page
page . evaluate ( f " document.body.style.zoom = { zoom } " )
# Scroll the parent of the comment into view
parent_locator = page . locator ( f " #t1_ { comment [ ' comment_id ' ] } -comment-rtjson-content " ) . locator ( ' xpath=.. ' )
parent_locator . scroll_into_view_if_needed ( )
# Get the bounding box of the parent element
location = parent_locator . bounding_box ( )
# Ensure bounding box is valid
if location :
# Adjust for zoom (since zooming doesn't change the div's actual properties)
location [ ' x ' ] = float ( " {:.2f} " . format ( location [ ' x ' ] * zoom ) )
location [ ' y ' ] = float ( " {:.2f} " . format ( location [ ' y ' ] * zoom ) )
location [ ' width ' ] = float ( " {:.2f} " . format ( location [ ' width ' ] * zoom ) )
location [ ' height ' ] = float ( " {:.2f} " . format ( location [ ' height ' ] * zoom ) )
# Adjust the height to be exactly 200px
location [ ' height ' ] = 200
# Take a screenshot of the parent element with the adjusted height
page . screenshot (
clip = location ,
path = f " assets/temp/ { reddit_id } /png/comment_ { idx } _parent.png " ,
)
else :
print ( " Could not get the bounding box of the parent element. " )
else :
page . locator ( f " #t1_ { comment [ ' comment_id ' ] } " ) . screenshot (
path = f " assets/temp/ { reddit_id } /png/comment_ { idx } .png "
)
# getting the bounding_box
parent_locator = page . locator ( f " #t1_ { comment [ ' comment_id ' ] } -comment-rtjson-content " ) . locator ( ' xpath=.. ' )
location = parent_locator . bounding_box ( )
# bounding_box is valid ?
if location :
# ajust 200px
location [ ' height ' ] = 200
# make screenshot
page . screenshot (
path = f " assets/temp/ { reddit_id } /png/comment_ { idx } .png " ,
clip = location # resize to 200px
)
else :
print ( " No se pudo obtener el cuadro delimitador del elemento. " )
except TimeoutError :
del reddit_object [ " comments " ]
screenshot_num + = 1