From bfa8f3bc223cc245df6905cec82743ba4aa7e9ef Mon Sep 17 00:00:00 2001 From: The-Drobe Date: Sun, 5 Jun 2022 18:41:31 +0800 Subject: [PATCH] change from pyttsx3 to custom tiktok api wrapper --- .env.template | 4 +- examples/ValidOptionsTTS.txt | 43 +++++++++++++++++++ requirements.txt | 4 +- video_creation/TTSwrapper.py | 81 ++++++++++++++++++++++++++++++++++++ video_creation/voices.py | 36 +++++----------- 5 files changed, 139 insertions(+), 29 deletions(-) create mode 100644 examples/ValidOptionsTTS.txt create mode 100644 video_creation/TTSwrapper.py diff --git a/.env.template b/.env.template index 526cf65..592ecf6 100644 --- a/.env.template +++ b/.env.template @@ -14,5 +14,5 @@ SUBREDDIT="" # Range is 0 -> 1 OPACITY="0.9" -# Valid options are "male" and "female" for the variable below -VoiceGender="" +# Valid options are listed here URl leave blank for default +Voice="" diff --git a/examples/ValidOptionsTTS.txt b/examples/ValidOptionsTTS.txt new file mode 100644 index 0000000..f99760c --- /dev/null +++ b/examples/ValidOptionsTTS.txt @@ -0,0 +1,43 @@ +# DISNEY VOICES +'en_us_ghostface', # Ghost Face +'en_us_chewbacca', # Chewbacca +'en_us_c3po', # C3PO +'en_us_stitch', # Stitch +'en_us_stormtrooper', # Stormtrooper +'en_us_rocket', # Rocket + +# ENGLISH VOICES +'en_au_001', # English AU - Female +'en_au_002', # English AU - Male +'en_uk_001', # English UK - Male 1 +'en_uk_003', # English UK - Male 2 +'en_us_001', # English US - Female (Int. 1) +'en_us_002', # English US - Female (Int. 2) +'en_us_006', # English US - Male 1 +'en_us_007', # English US - Male 2 +'en_us_009', # English US - Male 3 +'en_us_010', # English US - Male 4 + +# EUROPE VOICES +'fr_001', # French - Male 1 +'fr_002', # French - Male 2 +'de_001', # German - Female +'de_002', # German - Male +'es_002', # Spanish - Male + +# AMERICA VOICES +'es_mx_002', # Spanish MX - Male +'br_001', # Portuguese BR - Female 1 +'br_003', # Portuguese BR - Female 2 +'br_004', # Portuguese BR - Female 3 +'br_005', # Portuguese BR - Male + +# ASIA VOICES +'id_001', # Indonesian - Female +'jp_001', # Japanese - Female 1 +'jp_003', # Japanese - Female 2 +'jp_005', # Japanese - Female 3 +'jp_006', # Japanese - Male +'kr_002', # Korean - Male 1 +'kr_003', # Korean - Female +'kr_004', # Korean - Male 2 \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index eb5b22e..0db1e06 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,8 +1,8 @@ -librosa==0.9.1 moviepy==1.0.3 +mutagen==1.45.1 playwright==1.22.0 praw==7.6.0 python-dotenv==0.20.0 -pyttsx3==2.90 +requests==2.27.1 rich==12.4.4 yt_dlp==2022.5.18 diff --git a/video_creation/TTSwrapper.py b/video_creation/TTSwrapper.py new file mode 100644 index 0000000..db417de --- /dev/null +++ b/video_creation/TTSwrapper.py @@ -0,0 +1,81 @@ +import requests, base64 + +# based off: https://github.com/JasonLovesDoggo/RedditVideoMakerBot/blob/master/video_creation/TTSwrapper.py + +# https://twitter.com/scanlime/status/1512598559769702406 + +voices = [ # DISNEY VOICES + 'en_us_ghostface', # Ghost Face + 'en_us_chewbacca', # Chewbacca + 'en_us_c3po', # C3PO + 'en_us_stitch', # Stitch + 'en_us_stormtrooper', # Stormtrooper + 'en_us_rocket', # Rocket + + # ENGLISH VOICES + 'en_au_001', # English AU - Female + 'en_au_002', # English AU - Male + 'en_uk_001', # English UK - Male 1 + 'en_uk_003', # English UK - Male 2 + 'en_us_001', # English US - Female (Int. 1) + 'en_us_002', # English US - Female (Int. 2) + 'en_us_006', # English US - Male 1 + 'en_us_007', # English US - Male 2 + 'en_us_009', # English US - Male 3 + 'en_us_010', # English US - Male 4 + + # EUROPE VOICES + 'fr_001', # French - Male 1 + 'fr_002', # French - Male 2 + 'de_001', # German - Female + 'de_002', # German - Male + 'es_002', # Spanish - Male + + # AMERICA VOICES + 'es_mx_002', # Spanish MX - Male + 'br_001', # Portuguese BR - Female 1 + 'br_003', # Portuguese BR - Female 2 + 'br_004', # Portuguese BR - Female 3 + 'br_005', # Portuguese BR - Male + + # ASIA VOICES + 'id_001', # Indonesian - Female + 'jp_001', # Japanese - Female 1 + 'jp_003', # Japanese - Female 2 + 'jp_005', # Japanese - Female 3 + 'jp_006', # Japanese - Male + 'kr_002', # Korean - Male 1 + 'kr_003', # Korean - Female + 'kr_004', # Korean - Male 2 +] + +class TTTTSWrapper: # TikTok Text-to-Speech Wrapper + + def tts(req_text: str = "This is test text", filename: str = 'title.mp3', voice: str = 'en_us_002'): + + URI_BASE = 'https://api16-normal-useast5.us.tiktokv.com/media/api/text/speech/invoke/?text_speaker=' + + vstr = '' + + # use multiple api requests to make the sentence + if len(req_text) > 299: + req_text_split = [req_text[i:i+299] for i in range(0, len(req_text), 299)] + for j in req_text_split: + r = requests.post(f"{URI_BASE}{voice}&req_text={j}&speaker_map_type=0") + vstr += [r.json()["data"]["v_str"]][0] + + b64d = base64.b64decode(vstr) + with open(filename, "wb") as out: + out.write(b64d) + + return + + # if under 299 characters do it in one + r = requests.post(f"{URI_BASE}{voice}&req_text={req_text}&speaker_map_type=0") + + vstr = [r.json()["data"]["v_str"]][0] + + b64d = base64.b64decode(vstr) + + with open(filename, "wb") as out: + out.write(b64d) diff --git a/video_creation/voices.py b/video_creation/voices.py index 180293d..cf596fd 100644 --- a/video_creation/voices.py +++ b/video_creation/voices.py @@ -1,15 +1,10 @@ from pathlib import Path from utils.console import print_step, print_substep from rich.progress import track -import pyttsx3 from dotenv import load_dotenv import os -import librosa - -load_dotenv() - -VoiceGender = os.getenv('VoiceGender') - +from mutagen.mp3 import MP3 +from video_creation.TTSwrapper import TTTTSWrapper as TTS def save_text_to_mp3(reddit_obj): """Saves Text to MP3 files. @@ -20,36 +15,27 @@ def save_text_to_mp3(reddit_obj): print_step("Saving Text to MP3 files...") length = 0 - # create the object - engine = pyttsx3.init() - voices = engine.getProperty('voices') - engine.setProperty('volume', 1.0) + # load_dotenv + load_dotenv() + Voice = os.getenv('Voice') # Set the voice depending on what was set in .env - if VoiceGender == 'male': - engine.setProperty('voice', voices[0].id) - elif VoiceGender == 'female': - engine.setProperty('voice', voices[1].id) - # if nothing set default to female voice - else: - engine.setProperty('voice', voices[1].id) - + if Voice == '': + Voice = 'en_us_002' # Create a folder for the mp3 files. Path("assets/mp3").mkdir(parents=True, exist_ok=True) - engine.save_to_file(str(reddit_obj["thread_title"]), "./assets/mp3/title.mp3") - engine.runAndWait() - length += librosa.get_duration(filename='./assets/mp3/title.mp3') + TTS.tts(str(reddit_obj["thread_title"]), "./assets/mp3/title.mp3", Voice) + length += MP3(f"./assets/mp3/title.mp3").info.length for idx, comment in track(enumerate(reddit_obj["comments"]), "Saving..."): # ! Stop creating mp3 files if the length is greater than 50 seconds. This can be longer, but this is just a good starting point if length > 50: break - engine.save_to_file(str(comment["comment_body"]), f"./assets/mp3/{idx}.mp3") - engine.runAndWait() - length += librosa.get_duration(filename=f'./assets/mp3/{idx}.mp3') + TTS.tts(str(comment["comment_body"]), f"./assets/mp3/{idx}.mp3", Voice) + length += MP3(f"./assets/mp3/{idx}.mp3").info.length print_substep("Saved Text to MP3 files successfully.", style="bold green") # ! Return the index so we know how many screenshots of comments we need to make. return length, idx