diff --git a/.env.template b/.env.template index b66ec3e..6bf7e4f 100644 --- a/.env.template +++ b/.env.template @@ -32,8 +32,8 @@ MAX_COMMENT_LENGTH="500" OPACITY="1" # see TTSwrapper.py for all valid options -VOICE="en_us_001" # e.g. en_us_002 +VOICE="Matthew" # e.g. en_us_002 +TTsChoice="polly" # todo add docs # IN-PROGRESS - not yet implemented -TTsChoice="TikTok" # todo add docs STORYMODE="False" diff --git a/TTS/POLLY.py b/TTS/POLLY.py new file mode 100644 index 0000000..a32510f --- /dev/null +++ b/TTS/POLLY.py @@ -0,0 +1,115 @@ +import os +import random +import re + +import requests +import sox +from moviepy.audio.AudioClip import concatenate_audioclips, CompositeAudioClip +from moviepy.audio.io.AudioFileClip import AudioFileClip +voices = {'neural': [ + 'Ivy', + 'Joanna', + 'Kendra', + 'Kimberly', + 'Salli', + 'Joey', + 'Justin', + 'Matthew', + 'Amy', + 'Emma', + 'Brian' + +], 'standard': [ + 'Ivy', + 'Joanna', + 'Kendra', + 'Kimberly', + 'Salli', + 'Joey', + 'Justin', + 'Matthew', + "Russell", + "Nicole", + "Amy", + "Emma", + "Brian", + "Aditi", + "Raveena", + "Geraint" +]} + + +# valid voices https://lazypy.ro/tts/ + + +class POLLY: + def __init__(self): + self.url = 'https://streamlabs.com/polly/speak' + + def tts( + self, + req_text: str = "Amazon Text To Speech", + filename: str = "title.mp3", + random_speaker=False, + censer=False, + ): + if random_speaker: + voice = self.randomvoice() + else: + if not os.getenv('VOICE'): + return ValueError('Please set the environment variable VOICE to a valid voice. options are: {}'.format(voices)) + voice = str(os.getenv("VOICE")).capitalize() + body = {'voice': voice, 'text': req_text} + response = requests.post(self.url, data=body) + try: + voice_data = requests.get(response.json()['speak_url']) + with open(filename, 'wb') as f: + f.write(voice_data.content) + except KeyError: + if response.json()['error'] == 'Text length is too long!': + chunks = [ + m.group().strip() for m in re.finditer(r" *((.{0,530})(\.|.$))", req_text) + ] + + audio_clips = [] + cbn = sox.Combiner() + + chunkId = 0 + for chunk in chunks: + body = {'voice': 'Brian', 'text': chunk} + resp = requests.post(self.url, data=body) + voice_data = requests.get(resp.json()['speak_url']) + with open(filename.replace(".mp3", f"-{chunkId}.mp3"), "wb") as out: + out.write(voice_data.content) + + audio_clips.append(filename.replace(".mp3", f"-{chunkId}.mp3")) + + chunkId = chunkId + 1 + try: + if len(audio_clips) > 1: + cbn.convert(samplerate=44100, n_channels=2) + cbn.build(audio_clips, filename, "concatenate") + else: + os.rename(audio_clips[0], filename) + except (sox.core.SoxError, + FileNotFoundError): # https://github.com/JasonLovesDoggo/RedditVideoMakerBot/issues/67#issuecomment-1150466339 + for clip in audio_clips: + i = audio_clips.index(clip) # get the index of the clip + audio_clips = ( + audio_clips[:i] + [AudioFileClip(clip)] + audio_clips[i + 1:] + ) # replace the clip with an AudioFileClip + audio_concat = concatenate_audioclips(audio_clips) + audio_composite = CompositeAudioClip([audio_concat]) + audio_composite.write_audiofile(filename, 44100, 2, 2000, None) + + def make_readable(self, text): + """ + Amazon Polly fails to read some symbols properly such as '& (and)'. + So we normalize input text before passing it to the service + """ + text = text.replace('&', 'and') + return text + + def randomvoice(self): + valid = voices['neural'] + voices['standard'] + return random.choice(valid) diff --git a/TTS/TikTok.py b/TTS/TikTok.py index 8ed9f8e..a299216 100644 --- a/TTS/TikTok.py +++ b/TTS/TikTok.py @@ -110,6 +110,7 @@ class TikTok: # TikTok Text-to-Speech Wrapper r = session.post( f"{self.URI_BASE}{voice}&req_text={chunk}&speaker_map_type=0" ) + print(r.text) vstr = [r.json()["data"]["v_str"]][0] b64d = base64.b64decode(vstr) diff --git a/TTS/swapper.py b/TTS/swapper.py index cd18223..f4717b1 100644 --- a/TTS/swapper.py +++ b/TTS/swapper.py @@ -3,9 +3,10 @@ from os import getenv from dotenv import load_dotenv from TTS.GTTS import GTTS +from TTS.POLLY import POLLY from TTS.TikTok import TikTok -CHOICE_DIR = {"tiktok": TikTok, "gtts": GTTS} +CHOICE_DIR = {"tiktok": TikTok, "gtts": GTTS, 'polly': POLLY} class TTS: diff --git a/video_creation/background.py b/video_creation/background.py index e8f8ad1..d1a7948 100644 --- a/video_creation/background.py +++ b/video_creation/background.py @@ -14,7 +14,7 @@ def get_start_and_end_times(video_length, length_of_clip): def download_background(): - """Downloads the backgrounds/s video from youtube.""" + """Downloads the backgrounds/s video from YouTube.""" Path("./assets/backgrounds/").mkdir(parents=True, exist_ok=True) background_options = [ # uri , filename , credit ("https://www.youtube.com/watch?v=n_Dv4JMiwK8", "parkour.mp4", "bbswitzer"), @@ -25,7 +25,7 @@ def download_background(): # ), ] # note: make sure the file name doesn't include an - in it - if not len(listdir("./assets/backgrounds")) <= len( + if not len(listdir("./assets/backgrounds")) >= len( background_options ): # if there are any background videos not installed print_step(