Merge pull request #564 from sineckers/sineckers-translate

translate function fully working
pull/660/head
Jason 2 years ago committed by GitHub
commit 8774bcb5f5
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -1,3 +1,4 @@
REDDIT_CLIENT_ID="" #fFAGRNJru1FTz70BzhT3Zg REDDIT_CLIENT_ID="" #fFAGRNJru1FTz70BzhT3Zg
#EXPLANATION the ID of your Reddit app of SCRIPT type #EXPLANATION the ID of your Reddit app of SCRIPT type
#RANGE 12:30 #RANGE 12:30
@ -62,12 +63,19 @@ OPACITY="1" #.8
#MATCH_TYPE float #MATCH_TYPE float
#OOB_ERROR The opacity HAS to be between 0 and 1 #OOB_ERROR The opacity HAS to be between 0 and 1
# If you want to translate the comments to another language, set the language code here.
# If empty, no translation will be done.
POSTLANG=""
#EXPLANATION Activates the translation feature, set the language code gor translate or leave blank
# see different voice options: todo: add docs # see different voice options: todo: add docs
VOICE="Matthew" #en_us_002 VOICE="Matthew" # e.g. en_us_002
#EXPLANATION sets the voice the TTS uses #EXPLANATION sets the voice the TTS uses
TTsChoice="polly" #polly TTsChoice="polly" # todo add docs
#EXPLANATION the backend used for TTS, default is polly #EXPLANATION the backend used for TTS, default is polly
# IMPORTANT NOTE: if you use translate, you need to set this gtts or set tiktok and use custom voice in your language
#OPTIONAL #OPTIONAL
STORYMODE="False" STORYMODE="False"

@ -1,5 +1,5 @@
from gtts import gTTS from gtts import gTTS
import os
class GTTS: class GTTS:
def tts( def tts(
@ -9,5 +9,5 @@ class GTTS:
random_speaker=False, random_speaker=False,
censor=False, censor=False,
): ):
tts = gTTS(text=req_text, lang="en", slow=False) tts = gTTS(text=req_text, lang=os.getenv("POSTLANG") or "en", slow=False)
tts.save(f"{filename}") tts.save(f"{filename}")

@ -8,3 +8,4 @@ pytube==12.1.0
requests==2.28.0 requests==2.28.0
rich==12.4.4 rich==12.4.4
sox==1.4.1 sox==1.4.1
translators==5.2.2

@ -1,5 +1,6 @@
import json import json
from os import getenv from os import getenv
import os
from pathlib import Path from pathlib import Path
from playwright.async_api import async_playwright from playwright.async_api import async_playwright
@ -10,6 +11,8 @@ from utils.console import print_step, print_substep
import json import json
from rich.console import Console from rich.console import Console
import translators as ts
console = Console() console = Console()
storymode = False storymode = False
@ -18,8 +21,8 @@ storymode = False
def download_screenshots_of_reddit_posts(reddit_object, screenshot_num): def download_screenshots_of_reddit_posts(reddit_object, screenshot_num):
"""Downloads screenshots of reddit posts as they are seen on the web. """Downloads screenshots of reddit posts as they are seen on the web.
Args: Args:
reddit_object: The Reddit Object you received in askreddit.py reddit_object: The Reddit Object you received in askreddit.py
screenshot_num: The number of screenshots you want to download. screenshot_num: The number of screenshots you want to download.
""" """
print_step("Downloading screenshots of reddit posts...") print_step("Downloading screenshots of reddit posts...")
@ -51,7 +54,22 @@ def download_screenshots_of_reddit_posts(reddit_object, screenshot_num):
'[data-click-id="text"] button' '[data-click-id="text"] button'
).click() # Remove "Click to see nsfw" Button in Screenshot ).click() # Remove "Click to see nsfw" Button in Screenshot
page.locator('[data-test-id="post-content"]').screenshot(path="assets/temp/png/title.png") # translate code
if getenv("POSTLANG"):
print_substep("Translating post...")
texts_in_tl = ts.google(reddit_object["thread_title"], to_language=os.getenv("POSTLANG"))
page.evaluate(
'tl_content => document.querySelector(\'[data-test-id="post-content"] > div:nth-child(3) > div > div\').textContent = tl_content', texts_in_tl
)
else:
print_substep("Skipping translation...")
page.locator('[data-test-id="post-content"]').screenshot(
path="assets/temp/png/title.png"
)
if storymode: if storymode:
page.locator('[data-click-id="text"]').screenshot( page.locator('[data-click-id="text"]').screenshot(
path="assets/temp/png/story_content.png" path="assets/temp/png/story_content.png"
@ -60,7 +78,6 @@ def download_screenshots_of_reddit_posts(reddit_object, screenshot_num):
for idx, comment in track( for idx, comment in track(
enumerate(reddit_object["comments"]), "Downloading screenshots..." enumerate(reddit_object["comments"]), "Downloading screenshots..."
): ):
# Stop if we have reached the screenshot_num # Stop if we have reached the screenshot_num
if idx >= screenshot_num: if idx >= screenshot_num:
break break
@ -69,7 +86,17 @@ def download_screenshots_of_reddit_posts(reddit_object, screenshot_num):
page.locator('[data-testid="content-gate"] button').click() page.locator('[data-testid="content-gate"] button').click()
page.goto(f'https://reddit.com{comment["comment_url"]}', timeout=0) page.goto(f'https://reddit.com{comment["comment_url"]}', timeout=0)
# translate code
if getenv("POSTLANG"):
comment_tl = ts.google(comment["comment_body"], to_language=os.getenv("POSTLANG"))
page.evaluate(
'([tl_content, tl_id]) => document.querySelector(`#t1_${tl_id} > div:nth-child(2) > div > div[data-testid="comment"] > div`).textContent = tl_content', [comment_tl, comment['comment_id']]
)
page.locator(f"#t1_{comment['comment_id']}").screenshot( page.locator(f"#t1_{comment['comment_id']}").screenshot(
path=f"assets/temp/png/comment_{idx}.png" path=f"assets/temp/png/comment_{idx}.png"
) )
print_substep("Screenshots downloaded Successfully.", style="bold green") print_substep("Screenshots downloaded Successfully.", style="bold green")

@ -10,11 +10,13 @@ from rich.progress import track
from TTS.swapper import TTS from TTS.swapper import TTS
console = Console()
from utils.console import print_step, print_substep from utils.console import print_step, print_substep
from utils.voice import sanitize_text from utils.voice import sanitize_text
console = Console() import translators as ts
import os
VIDEO_LENGTH: int = 40 # secs VIDEO_LENGTH: int = 40 # secs
@ -22,39 +24,51 @@ VIDEO_LENGTH: int = 40 # secs
def save_text_to_mp3(reddit_obj): def save_text_to_mp3(reddit_obj):
"""Saves Text to MP3 files. """Saves Text to MP3 files.
Args: Args:
reddit_obj : The reddit object you received from the reddit API in the askreddit.py file. reddit_obj : The reddit object you received from the reddit API in the askreddit.py file.
""" """
print_step("Saving Text to MP3 files...") print_step("Saving Text to MP3 files...")
length = 0 length = 0
# Create a folder for the mp3 files. # Create a folder for the mp3 files.
Path("assets/temp/mp3").mkdir(parents=True, exist_ok=True) Path("assets/temp/mp3").mkdir(parents=True, exist_ok=True)
if os.getenv("POSTLANG"):
print_substep("Translating Texts...")
tl_title = ts.google(reddit_obj["thread_title"], to_language=os.getenv("POSTLANG"))
else:
print_substep("Skipping Translation...")
tl_title = reddit_obj["thread_title"]
TextToSpeech = TTS() TextToSpeech = TTS()
TextToSpeech.tts( TextToSpeech.tts(
sanitize_text(reddit_obj["thread_title"]), sanitize_text(tl_title),
filename="assets/temp/mp3/title.mp3", filename=f"assets/temp/mp3/title.mp3",
random_speaker=False, random_speaker=False,
) )
try: try:
length += MP3("assets/temp/mp3/title.mp3").info.length length += MP3(f"assets/temp/mp3/title.mp3").info.length
except HeaderNotFoundError: # note to self AudioFileClip except HeaderNotFoundError: # note to self AudioFileClip
length += sox.file_info.duration("assets/temp/mp3/title.mp3") length += sox.file_info.duration(f"assets/temp/mp3/title.mp3")
if getenv("STORYMODE").casefold() == "true": if getenv("STORYMODE").casefold() == "true":
TextToSpeech.tts( TextToSpeech.tts(
sanitize_text(reddit_obj["thread_content"]), sanitize_text(reddit_obj["thread_content"]),
filename="assets/temp/mp3/story_content.mp3", filename=f"assets/temp/mp3/story_content.mp3",
random_speaker=False, random_speaker=False,
) )
# 'story_content' # 'story_content'
com = 0 com = 0
for comment in track((reddit_obj["comments"]), "Saving..."): for comment in track((reddit_obj["comments"]), "Saving..."):
# ! Stop creating mp3 files if the length is greater than VIDEO_LENGTH seconds. This can be longer # ! Stop creating mp3 files if the length is greater than VIDEO_LENGTH seconds. This can be longer, but this is just a good_voices starting point
# but this is just a good_voices starting point
if length > VIDEO_LENGTH: if length > VIDEO_LENGTH:
break break
if os.getenv("POSTLANG"):
tl_comment = ts.google(comment["comment_body"], to_language=os.getenv("POSTLANG"))
else:
tl_comment = comment["comment_body"]
TextToSpeech.tts( TextToSpeech.tts(
sanitize_text(comment["comment_body"]), sanitize_text(tl_comment),
filename=f"assets/temp/mp3/{com}.mp3", filename=f"assets/temp/mp3/{com}.mp3",
random_speaker=False, random_speaker=False,
) )

Loading…
Cancel
Save