Merge pull request #564 from sineckers/sineckers-translate

translate function fully working
4 years ago · 8774bcb5f5
parent e23a150cd6 84bd523ed2
commit 8774bcb5f5
6 changed files with 69 additions and 19 deletions
--- a/0
+++ b/0
--- a/.env.template
+++ b/.env.template
@ -1,3 +1,4 @@
+
 REDDIT_CLIENT_ID="" #fFAGRNJru1FTz70BzhT3Zg
 #EXPLANATION the ID of your Reddit app of SCRIPT type
 #RANGE 12:30
@ -62,12 +63,19 @@ OPACITY="1" #.8
 #MATCH_TYPE float
 #OOB_ERROR The opacity HAS to be between 0 and 1

+# If you want to translate the comments to another language, set the language code here.
+# If empty, no translation will be done.
+POSTLANG=""
+#EXPLANATION Activates the translation feature, set the language code gor translate or leave blank
+
 # see different voice options: todo: add docs
-VOICE="Matthew" #en_us_002
+VOICE="Matthew" # e.g. en_us_002
 #EXPLANATION sets the voice the TTS uses

-TTsChoice="polly" #polly
+TTsChoice="polly" # todo add docs
 #EXPLANATION the backend used for TTS, default is polly
+# IMPORTANT NOTE: if you use translate, you need to set this gtts or set tiktok and use custom voice in your language
+

 #OPTIONAL
 STORYMODE="False"
--- a/TTS/GTTS.py
+++ b/TTS/GTTS.py
@ -1,5 +1,5 @@
 from gtts import gTTS
-
+import os

 class GTTS:
    def tts(
@ -9,5 +9,5 @@ class GTTS:
        random_speaker=False,
        censor=False,
    ):
-        tts = gTTS(text=req_text, lang="en", slow=False)
+        tts = gTTS(text=req_text, lang=os.getenv("POSTLANG") or "en", slow=False)
        tts.save(f"{filename}")
--- a/requirements.txt
+++ b/requirements.txt
@ -8,3 +8,4 @@ pytube==12.1.0
 requests==2.28.0
 rich==12.4.4
 sox==1.4.1
+translators==5.2.2
--- a/video_creation/screenshot_downloader.py
+++ b/video_creation/screenshot_downloader.py
@ -1,5 +1,6 @@
 import json
 from os import getenv
+import os
 from pathlib import Path

 from playwright.async_api import async_playwright
@ -10,6 +11,8 @@ from utils.console import print_step, print_substep
 import json
 from rich.console import Console

+import translators as ts
+
 console = Console()

 storymode = False
@ -18,8 +21,8 @@ storymode = False
 def download_screenshots_of_reddit_posts(reddit_object, screenshot_num):
    """Downloads screenshots of reddit posts as they are seen on the web.
    Args:
-            reddit_object: The Reddit Object you received in askreddit.py
-            screenshot_num: The number of screenshots you want to download.
+        reddit_object: The Reddit Object you received in askreddit.py
+        screenshot_num: The number of screenshots you want to download.
    """
    print_step("Downloading screenshots of reddit posts...")

@ -51,7 +54,22 @@ def download_screenshots_of_reddit_posts(reddit_object, screenshot_num):
                '[data-click-id="text"] button'
            ).click()  # Remove "Click to see nsfw" Button in Screenshot

-        page.locator('[data-test-id="post-content"]').screenshot(path="assets/temp/png/title.png")
+        # translate code
+
+        if getenv("POSTLANG"):
+            print_substep("Translating post...")
+            texts_in_tl = ts.google(reddit_object["thread_title"], to_language=os.getenv("POSTLANG"))
+
+            page.evaluate(
+                'tl_content => document.querySelector(\'[data-test-id="post-content"] > div:nth-child(3) > div > div\').textContent = tl_content', texts_in_tl
+            )
+        else:
+            print_substep("Skipping translation...")
+
+        page.locator('[data-test-id="post-content"]').screenshot(
+            path="assets/temp/png/title.png"
+        )
+
        if storymode:
            page.locator('[data-click-id="text"]').screenshot(
                path="assets/temp/png/story_content.png"
@ -60,7 +78,6 @@ def download_screenshots_of_reddit_posts(reddit_object, screenshot_num):
            for idx, comment in track(
                enumerate(reddit_object["comments"]), "Downloading screenshots..."
            ):
-
                # Stop if we have reached the screenshot_num
                if idx >= screenshot_num:
                    break
@ -69,7 +86,17 @@ def download_screenshots_of_reddit_posts(reddit_object, screenshot_num):
                    page.locator('[data-testid="content-gate"] button').click()

                page.goto(f'https://reddit.com{comment["comment_url"]}', timeout=0)
+
+                # translate code
+
+                if getenv("POSTLANG"):
+                    comment_tl = ts.google(comment["comment_body"], to_language=os.getenv("POSTLANG"))
+                    page.evaluate(
+                        '([tl_content, tl_id]) => document.querySelector(`#t1_${tl_id} > div:nth-child(2) > div > div[data-testid="comment"] > div`).textContent = tl_content', [comment_tl, comment['comment_id']]
+                    )
+
                page.locator(f"#t1_{comment['comment_id']}").screenshot(
                    path=f"assets/temp/png/comment_{idx}.png"
                )
+
        print_substep("Screenshots downloaded Successfully.", style="bold green")
--- a/video_creation/voices.py
+++ b/video_creation/voices.py
@ -10,11 +10,13 @@ from rich.progress import track

 from TTS.swapper import TTS

+console = Console()
+
 from utils.console import print_step, print_substep
 from utils.voice import sanitize_text

-console = Console()
-
+import translators as ts
+import os

 VIDEO_LENGTH: int = 40  # secs

@ -22,39 +24,51 @@ VIDEO_LENGTH: int = 40  # secs
 def save_text_to_mp3(reddit_obj):
    """Saves Text to MP3 files.
    Args:
-            reddit_obj : The reddit object you received from the reddit API in the askreddit.py file.
+        reddit_obj : The reddit object you received from the reddit API in the askreddit.py file.
    """
    print_step("Saving Text to MP3 files...")
    length = 0

    # Create a folder for the mp3 files.
    Path("assets/temp/mp3").mkdir(parents=True, exist_ok=True)
+
+    if os.getenv("POSTLANG"):
+        print_substep("Translating Texts...")
+        tl_title = ts.google(reddit_obj["thread_title"], to_language=os.getenv("POSTLANG"))
+    else:
+        print_substep("Skipping Translation...")
+        tl_title = reddit_obj["thread_title"]
+    
    TextToSpeech = TTS()
    TextToSpeech.tts(
-        sanitize_text(reddit_obj["thread_title"]),
-        filename="assets/temp/mp3/title.mp3",
+        sanitize_text(tl_title),
+        filename=f"assets/temp/mp3/title.mp3",
        random_speaker=False,
    )
    try:
-        length += MP3("assets/temp/mp3/title.mp3").info.length
+        length += MP3(f"assets/temp/mp3/title.mp3").info.length
    except HeaderNotFoundError:  # note to self AudioFileClip
-        length += sox.file_info.duration("assets/temp/mp3/title.mp3")
+        length += sox.file_info.duration(f"assets/temp/mp3/title.mp3")
    if getenv("STORYMODE").casefold() == "true":
        TextToSpeech.tts(
            sanitize_text(reddit_obj["thread_content"]),
-            filename="assets/temp/mp3/story_content.mp3",
+            filename=f"assets/temp/mp3/story_content.mp3",
            random_speaker=False,
        )
        # 'story_content'
    com = 0
    for comment in track((reddit_obj["comments"]), "Saving..."):
-        # ! Stop creating mp3 files if the length is greater than VIDEO_LENGTH seconds. This can be longer
-        # but this is just a good_voices starting point
+        # ! Stop creating mp3 files if the length is greater than VIDEO_LENGTH seconds. This can be longer, but this is just a good_voices starting point
        if length > VIDEO_LENGTH:
            break

+        if os.getenv("POSTLANG"):
+            tl_comment = ts.google(comment["comment_body"], to_language=os.getenv("POSTLANG"))
+        else:
+            tl_comment = comment["comment_body"]
+
        TextToSpeech.tts(
-            sanitize_text(comment["comment_body"]),
+            sanitize_text(tl_comment),
            filename=f"assets/temp/mp3/{com}.mp3",
            random_speaker=False,
        )