From bfa8f3bc223cc245df6905cec82743ba4aa7e9ef Mon Sep 17 00:00:00 2001
From: The-Drobe <Giles@wardr0be.me>
Date: Sun, 5 Jun 2022 18:41:31 +0800
Subject: [PATCH] change from pyttsx3 to custom tiktok api wrapper

---
 .env.template                |  4 +-
 examples/ValidOptionsTTS.txt | 43 +++++++++++++++++++
 requirements.txt             |  4 +-
 video_creation/TTSwrapper.py | 81 ++++++++++++++++++++++++++++++++++++
 video_creation/voices.py     | 36 +++++-----------
 5 files changed, 139 insertions(+), 29 deletions(-)
 create mode 100644 examples/ValidOptionsTTS.txt
 create mode 100644 video_creation/TTSwrapper.py

diff --git a/.env.template b/.env.template
index 526cf65..592ecf6 100644
--- a/.env.template
+++ b/.env.template
@@ -14,5 +14,5 @@ SUBREDDIT=""
 # Range is 0 -> 1
 OPACITY="0.9"
 
-# Valid options are "male" and "female" for the variable below
-VoiceGender=""
+# Valid options are listed here URl leave blank for default
+Voice=""
diff --git a/examples/ValidOptionsTTS.txt b/examples/ValidOptionsTTS.txt
new file mode 100644
index 0000000..f99760c
--- /dev/null
+++ b/examples/ValidOptionsTTS.txt
@@ -0,0 +1,43 @@
+# DISNEY VOICES
+'en_us_ghostface',  # Ghost Face
+'en_us_chewbacca',  # Chewbacca
+'en_us_c3po',  # C3PO
+'en_us_stitch',  # Stitch
+'en_us_stormtrooper',  # Stormtrooper
+'en_us_rocket',  # Rocket
+
+# ENGLISH VOICES
+'en_au_001',  # English AU - Female
+'en_au_002',  # English AU - Male
+'en_uk_001',  # English UK - Male 1
+'en_uk_003',  # English UK - Male 2
+'en_us_001',  # English US - Female (Int. 1)
+'en_us_002',  # English US - Female (Int. 2)
+'en_us_006',  # English US - Male 1
+'en_us_007',  # English US - Male 2
+'en_us_009',  # English US - Male 3
+'en_us_010',  # English US - Male 4
+
+# EUROPE VOICES
+'fr_001',  # French - Male 1
+'fr_002',  # French - Male 2
+'de_001',  # German - Female
+'de_002',  # German - Male
+'es_002',  # Spanish - Male
+
+# AMERICA VOICES
+'es_mx_002',  # Spanish MX - Male
+'br_001',  # Portuguese BR - Female 1
+'br_003',  # Portuguese BR - Female 2
+'br_004',  # Portuguese BR - Female 3
+'br_005',  # Portuguese BR - Male
+
+# ASIA VOICES
+'id_001',  # Indonesian - Female
+'jp_001',  # Japanese - Female 1
+'jp_003',  # Japanese - Female 2
+'jp_005',  # Japanese - Female 3
+'jp_006',  # Japanese - Male
+'kr_002',  # Korean - Male 1
+'kr_003',  # Korean - Female
+'kr_004',  # Korean - Male 2
\ No newline at end of file
diff --git a/requirements.txt b/requirements.txt
index eb5b22e..0db1e06 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,8 +1,8 @@
-librosa==0.9.1
 moviepy==1.0.3
+mutagen==1.45.1
 playwright==1.22.0
 praw==7.6.0
 python-dotenv==0.20.0
-pyttsx3==2.90
+requests==2.27.1
 rich==12.4.4
 yt_dlp==2022.5.18
diff --git a/video_creation/TTSwrapper.py b/video_creation/TTSwrapper.py
new file mode 100644
index 0000000..db417de
--- /dev/null
+++ b/video_creation/TTSwrapper.py
@@ -0,0 +1,81 @@
+import requests, base64
+
+# based off: https://github.com/JasonLovesDoggo/RedditVideoMakerBot/blob/master/video_creation/TTSwrapper.py
+
+# https://twitter.com/scanlime/status/1512598559769702406
+
+voices = [  # DISNEY VOICES
+    'en_us_ghostface',  # Ghost Face
+    'en_us_chewbacca',  # Chewbacca
+    'en_us_c3po',  # C3PO
+    'en_us_stitch',  # Stitch
+    'en_us_stormtrooper',  # Stormtrooper
+    'en_us_rocket',  # Rocket
+
+    # ENGLISH VOICES
+    'en_au_001',  # English AU - Female
+    'en_au_002',  # English AU - Male
+    'en_uk_001',  # English UK - Male 1
+    'en_uk_003',  # English UK - Male 2
+    'en_us_001',  # English US - Female (Int. 1)
+    'en_us_002',  # English US - Female (Int. 2)
+    'en_us_006',  # English US - Male 1
+    'en_us_007',  # English US - Male 2
+    'en_us_009',  # English US - Male 3
+    'en_us_010',  # English US - Male 4
+
+    # EUROPE VOICES
+    'fr_001',  # French - Male 1
+    'fr_002',  # French - Male 2
+    'de_001',  # German - Female
+    'de_002',  # German - Male
+    'es_002',  # Spanish - Male
+
+    # AMERICA VOICES
+    'es_mx_002',  # Spanish MX - Male
+    'br_001',  # Portuguese BR - Female 1
+    'br_003',  # Portuguese BR - Female 2
+    'br_004',  # Portuguese BR - Female 3
+    'br_005',  # Portuguese BR - Male
+
+    # ASIA VOICES
+    'id_001',  # Indonesian - Female
+    'jp_001',  # Japanese - Female 1
+    'jp_003',  # Japanese - Female 2
+    'jp_005',  # Japanese - Female 3
+    'jp_006',  # Japanese - Male
+    'kr_002',  # Korean - Male 1
+    'kr_003',  # Korean - Female
+    'kr_004',  # Korean - Male 2
+]
+
+class TTTTSWrapper:  # TikTok Text-to-Speech Wrapper
+
+    def tts(req_text: str = "This is test text", filename: str = 'title.mp3', voice: str = 'en_us_002'):
+
+        URI_BASE = 'https://api16-normal-useast5.us.tiktokv.com/media/api/text/speech/invoke/?text_speaker='
+
+        vstr = ''
+
+        # use multiple api requests to make the sentence
+        if len(req_text) > 299:
+            req_text_split = [req_text[i:i+299] for i in range(0, len(req_text), 299)]
+            for j in req_text_split: 
+                r = requests.post(f"{URI_BASE}{voice}&req_text={j}&speaker_map_type=0")
+                vstr += [r.json()["data"]["v_str"]][0]
+
+            b64d = base64.b64decode(vstr)
+            with open(filename, "wb") as out:
+                out.write(b64d)
+
+            return
+
+        # if under 299 characters do it in one
+        r = requests.post(f"{URI_BASE}{voice}&req_text={req_text}&speaker_map_type=0")
+
+        vstr = [r.json()["data"]["v_str"]][0]
+
+        b64d = base64.b64decode(vstr)
+
+        with open(filename, "wb") as out:
+            out.write(b64d)
diff --git a/video_creation/voices.py b/video_creation/voices.py
index 180293d..cf596fd 100644
--- a/video_creation/voices.py
+++ b/video_creation/voices.py
@@ -1,15 +1,10 @@
 from pathlib import Path
 from utils.console import print_step, print_substep
 from rich.progress import track
-import pyttsx3
 from dotenv import load_dotenv
 import os
-import librosa
-
-load_dotenv()
-
-VoiceGender = os.getenv('VoiceGender')
-
+from mutagen.mp3 import MP3
+from video_creation.TTSwrapper import TTTTSWrapper as TTS
 
 def save_text_to_mp3(reddit_obj):
     """Saves Text to MP3 files.
@@ -20,36 +15,27 @@ def save_text_to_mp3(reddit_obj):
     print_step("Saving Text to MP3 files...")
     length = 0
 
-    # create the object
-    engine = pyttsx3.init()
-    voices = engine.getProperty('voices')
-    engine.setProperty('volume', 1.0) 
+    # load_dotenv
+    load_dotenv()
+    Voice = os.getenv('Voice')
 
     # Set the voice depending on what was set in .env
-    if VoiceGender == 'male':
-        engine.setProperty('voice', voices[0].id)
-    elif VoiceGender == 'female':
-        engine.setProperty('voice', voices[1].id)
-    # if nothing set default to female voice
-    else:
-        engine.setProperty('voice', voices[1].id)
-
+    if Voice == '':
+        Voice = 'en_us_002'
 
     # Create a folder for the mp3 files.
     Path("assets/mp3").mkdir(parents=True, exist_ok=True)
 
-    engine.save_to_file(str(reddit_obj["thread_title"]), "./assets/mp3/title.mp3")
-    engine.runAndWait()
-    length += librosa.get_duration(filename='./assets/mp3/title.mp3')
+    TTS.tts(str(reddit_obj["thread_title"]), "./assets/mp3/title.mp3", Voice)
+    length += MP3(f"./assets/mp3/title.mp3").info.length
 
     for idx, comment in track(enumerate(reddit_obj["comments"]), "Saving..."):
         # ! Stop creating mp3 files if the length is greater than 50 seconds. This can be longer, but this is just a good starting point
         if length > 50:
             break
 
-        engine.save_to_file(str(comment["comment_body"]), f"./assets/mp3/{idx}.mp3")
-        engine.runAndWait()
-        length += librosa.get_duration(filename=f'./assets/mp3/{idx}.mp3')
+        TTS.tts(str(comment["comment_body"]), f"./assets/mp3/{idx}.mp3", Voice)
+        length += MP3(f"./assets/mp3/{idx}.mp3").info.length
     print_substep("Saved Text to MP3 files successfully.", style="bold green")
     # ! Return the index so we know how many screenshots of comments we need to make.
     return length, idx