change from pyttsx3 to custom tiktok api wrapper

3 years ago · bfa8f3bc22
parent 49102605af
commit bfa8f3bc22
5 changed files with 139 additions and 29 deletions
--- a/.env.template
+++ b/.env.template
@ -14,5 +14,5 @@ SUBREDDIT=""
 # Range is 0 -> 1
 OPACITY="0.9"

-# Valid options are "male" and "female" for the variable below
-VoiceGender=""
+# Valid options are listed here URl leave blank for default
+Voice=""
--- a/examples/ValidOptionsTTS.txt
+++ b/examples/ValidOptionsTTS.txt
@ -0,0 +1,43 @@
+# DISNEY VOICES
+'en_us_ghostface',  # Ghost Face
+'en_us_chewbacca',  # Chewbacca
+'en_us_c3po',  # C3PO
+'en_us_stitch',  # Stitch
+'en_us_stormtrooper',  # Stormtrooper
+'en_us_rocket',  # Rocket
+
+# ENGLISH VOICES
+'en_au_001',  # English AU - Female
+'en_au_002',  # English AU - Male
+'en_uk_001',  # English UK - Male 1
+'en_uk_003',  # English UK - Male 2
+'en_us_001',  # English US - Female (Int. 1)
+'en_us_002',  # English US - Female (Int. 2)
+'en_us_006',  # English US - Male 1
+'en_us_007',  # English US - Male 2
+'en_us_009',  # English US - Male 3
+'en_us_010',  # English US - Male 4
+
+# EUROPE VOICES
+'fr_001',  # French - Male 1
+'fr_002',  # French - Male 2
+'de_001',  # German - Female
+'de_002',  # German - Male
+'es_002',  # Spanish - Male
+
+# AMERICA VOICES
+'es_mx_002',  # Spanish MX - Male
+'br_001',  # Portuguese BR - Female 1
+'br_003',  # Portuguese BR - Female 2
+'br_004',  # Portuguese BR - Female 3
+'br_005',  # Portuguese BR - Male
+
+# ASIA VOICES
+'id_001',  # Indonesian - Female
+'jp_001',  # Japanese - Female 1
+'jp_003',  # Japanese - Female 2
+'jp_005',  # Japanese - Female 3
+'jp_006',  # Japanese - Male
+'kr_002',  # Korean - Male 1
+'kr_003',  # Korean - Female
+'kr_004',  # Korean - Male 2
--- a/requirements.txt
+++ b/requirements.txt
@ -1,8 +1,8 @@
-librosa==0.9.1
 moviepy==1.0.3
+mutagen==1.45.1
 playwright==1.22.0
 praw==7.6.0
 python-dotenv==0.20.0
-pyttsx3==2.90
+requests==2.27.1
 rich==12.4.4
 yt_dlp==2022.5.18
--- a/video_creation/TTSwrapper.py
+++ b/video_creation/TTSwrapper.py
@ -0,0 +1,81 @@
+import requests, base64
+
+# based off: https://github.com/JasonLovesDoggo/RedditVideoMakerBot/blob/master/video_creation/TTSwrapper.py
+
+# https://twitter.com/scanlime/status/1512598559769702406
+
+voices = [  # DISNEY VOICES
+    'en_us_ghostface',  # Ghost Face
+    'en_us_chewbacca',  # Chewbacca
+    'en_us_c3po',  # C3PO
+    'en_us_stitch',  # Stitch
+    'en_us_stormtrooper',  # Stormtrooper
+    'en_us_rocket',  # Rocket
+
+    # ENGLISH VOICES
+    'en_au_001',  # English AU - Female
+    'en_au_002',  # English AU - Male
+    'en_uk_001',  # English UK - Male 1
+    'en_uk_003',  # English UK - Male 2
+    'en_us_001',  # English US - Female (Int. 1)
+    'en_us_002',  # English US - Female (Int. 2)
+    'en_us_006',  # English US - Male 1
+    'en_us_007',  # English US - Male 2
+    'en_us_009',  # English US - Male 3
+    'en_us_010',  # English US - Male 4
+
+    # EUROPE VOICES
+    'fr_001',  # French - Male 1
+    'fr_002',  # French - Male 2
+    'de_001',  # German - Female
+    'de_002',  # German - Male
+    'es_002',  # Spanish - Male
+
+    # AMERICA VOICES
+    'es_mx_002',  # Spanish MX - Male
+    'br_001',  # Portuguese BR - Female 1
+    'br_003',  # Portuguese BR - Female 2
+    'br_004',  # Portuguese BR - Female 3
+    'br_005',  # Portuguese BR - Male
+
+    # ASIA VOICES
+    'id_001',  # Indonesian - Female
+    'jp_001',  # Japanese - Female 1
+    'jp_003',  # Japanese - Female 2
+    'jp_005',  # Japanese - Female 3
+    'jp_006',  # Japanese - Male
+    'kr_002',  # Korean - Male 1
+    'kr_003',  # Korean - Female
+    'kr_004',  # Korean - Male 2
+]
+
+class TTTTSWrapper:  # TikTok Text-to-Speech Wrapper
+
+    def tts(req_text: str = "This is test text", filename: str = 'title.mp3', voice: str = 'en_us_002'):
+
+        URI_BASE = 'https://api16-normal-useast5.us.tiktokv.com/media/api/text/speech/invoke/?text_speaker='
+
+        vstr = ''
+
+        # use multiple api requests to make the sentence
+        if len(req_text) > 299:
+            req_text_split = [req_text[i:i+299] for i in range(0, len(req_text), 299)]
+            for j in req_text_split: 
+                r = requests.post(f"{URI_BASE}{voice}&req_text={j}&speaker_map_type=0")
+                vstr += [r.json()["data"]["v_str"]][0]
+
+            b64d = base64.b64decode(vstr)
+            with open(filename, "wb") as out:
+                out.write(b64d)
+
+            return
+
+        # if under 299 characters do it in one
+        r = requests.post(f"{URI_BASE}{voice}&req_text={req_text}&speaker_map_type=0")
+
+        vstr = [r.json()["data"]["v_str"]][0]
+
+        b64d = base64.b64decode(vstr)
+
+        with open(filename, "wb") as out:
+            out.write(b64d)
--- a/video_creation/voices.py
+++ b/video_creation/voices.py
@ -1,15 +1,10 @@
 from pathlib import Path
 from utils.console import print_step, print_substep
 from rich.progress import track
-import pyttsx3
 from dotenv import load_dotenv
 import os
-import librosa
-
-load_dotenv()
-
-VoiceGender = os.getenv('VoiceGender')
-
+from mutagen.mp3 import MP3
+from video_creation.TTSwrapper import TTTTSWrapper as TTS

 def save_text_to_mp3(reddit_obj):
    """Saves Text to MP3 files.
@ -20,36 +15,27 @@ def save_text_to_mp3(reddit_obj):
    print_step("Saving Text to MP3 files...")
    length = 0

-    # create the object
-    engine = pyttsx3.init()
-    voices = engine.getProperty('voices')
-    engine.setProperty('volume', 1.0) 
+    # load_dotenv
+    load_dotenv()
+    Voice = os.getenv('Voice')

    # Set the voice depending on what was set in .env
-    if VoiceGender == 'male':
-        engine.setProperty('voice', voices[0].id)
-    elif VoiceGender == 'female':
-        engine.setProperty('voice', voices[1].id)
-    # if nothing set default to female voice
-    else:
-        engine.setProperty('voice', voices[1].id)
-
+    if Voice == '':
+        Voice = 'en_us_002'

    # Create a folder for the mp3 files.
    Path("assets/mp3").mkdir(parents=True, exist_ok=True)

-    engine.save_to_file(str(reddit_obj["thread_title"]), "./assets/mp3/title.mp3")
-    engine.runAndWait()
-    length += librosa.get_duration(filename='./assets/mp3/title.mp3')
+    TTS.tts(str(reddit_obj["thread_title"]), "./assets/mp3/title.mp3", Voice)
+    length += MP3(f"./assets/mp3/title.mp3").info.length

    for idx, comment in track(enumerate(reddit_obj["comments"]), "Saving..."):
        # ! Stop creating mp3 files if the length is greater than 50 seconds. This can be longer, but this is just a good starting point
        if length > 50:
            break

-        engine.save_to_file(str(comment["comment_body"]), f"./assets/mp3/{idx}.mp3")
-        engine.runAndWait()
-        length += librosa.get_duration(filename=f'./assets/mp3/{idx}.mp3')
+        TTS.tts(str(comment["comment_body"]), f"./assets/mp3/{idx}.mp3", Voice)
+        length += MP3(f"./assets/mp3/{idx}.mp3").info.length
    print_substep("Saved Text to MP3 files successfully.", style="bold green")
    # ! Return the index so we know how many screenshots of comments we need to make.
    return length, idx