From d19dfac8a353175849d2a84720e6001983ca4d05 Mon Sep 17 00:00:00 2001 From: liamb Date: Fri, 5 May 2023 11:21:06 +1000 Subject: [PATCH] adds elevenlabs --- TTS/elevenlabs.py | 46 +++++++++++++++++++++++++++++++++++++ requirements.txt | 3 ++- utils/.config.template.toml | 4 +++- video_creation/voices.py | 2 ++ 4 files changed, 53 insertions(+), 2 deletions(-) create mode 100644 TTS/elevenlabs.py diff --git a/TTS/elevenlabs.py b/TTS/elevenlabs.py new file mode 100644 index 0000000..4a7bfd2 --- /dev/null +++ b/TTS/elevenlabs.py @@ -0,0 +1,46 @@ +import random + +from elevenlabs import generate, save + +from utils import settings + + +class elevenlabs: + def __init__(self): + self.max_chars = 5000 + self.voices = ["Adam", "Antoni", "Arnold", "Bella", "Domi", "Elli", "Josh", "Rachel", "Sam"] + + def run( + self, + text: str, + filepath: str, + random_voice=False, + ): + voice_name = settings.config["settings"]["tts"]["elevenlabs_voice_name"] + if voice_name == "": + voice_name = "Bella" + raise ValueError( + "set elevenlabs name value to a valid value, switching to default voice (Bella)" + ) + if random_voice: + voice_name = self.randomvoice() + + if settings.config["settings"]["tts"]["elevenlabs_api_key"]: + api_key = settings.config["settings"]["tts"]["elevenlabs_api_key"] + else: + api_key = None + print("set elevenlabs api key value to a valid value or quota will be limited") + + audio = generate( + api_key=api_key, + text=text, + voice=voice_name, + model="eleven_monolingual_v1" + ) + save( + audio=audio, + filename=f"{filepath}" + ) + + def randomvoice(self): + return random.choice(self.voices) diff --git a/requirements.txt b/requirements.txt index 22d3d7f..88f4f45 100644 --- a/requirements.txt +++ b/requirements.txt @@ -19,4 +19,5 @@ unidecode==1.3.2 spacy==3.4.1 torch==1.12.1 transformers==4.25.1 -ffmpeg-python==0.2.0 \ No newline at end of file +ffmpeg-python==0.2.0 +elevenlabs==0.2.10 \ No newline at end of file diff --git a/utils/.config.template.toml b/utils/.config.template.toml index b2fa1d4..58ff86e 100644 --- a/utils/.config.template.toml +++ b/utils/.config.template.toml @@ -42,7 +42,9 @@ background_thumbnail_font_size = { optional = true, type = "int", default = 96, background_thumbnail_font_color = { optional = true, default = "255,255,255", example = "255,255,255", explanation = "Font color in RGB format for the thumbnail text" } [settings.tts] -voice_choice = { optional = false, default = "tiktok", options = ["streamlabspolly", "tiktok", "googletranslate", "awspolly", "pyttsx", ], example = "tiktok", explanation = "The voice platform used for TTS generation. This can be left blank and you will be prompted to choose at runtime." } +voice_choice = { optional = false, default = "tiktok", options = ["elevenlabs", "streamlabspolly", "tiktok", "googletranslate", "awspolly", "pyttsx", ], example = "tiktok", explanation = "The voice platform used for TTS generation. This can be left blank and you will be prompted to choose at runtime." } +elevenlabs_voice_name = { optional = false, default = "Bella", example = "Bella", explanation = "The voice used for elevenlabs" } +elevenlabs_api_key = { optional = true, example = "21f13f91f54d741e2ae27d2ab1b99d59", explanation = "Elevenlabs API key" } aws_polly_voice = { optional = false, default = "Matthew", example = "Matthew", explanation = "The voice used for AWS Polly" } streamlabs_polly_voice = { optional = false, default = "Matthew", example = "Matthew", explanation = "The voice used for Streamlabs Polly" } tiktok_voice = { optional = true, default = "en_us_001", example = "en_us_006", explanation = "The voice used for TikTok TTS" } diff --git a/video_creation/voices.py b/video_creation/voices.py index 425f589..cd75f06 100644 --- a/video_creation/voices.py +++ b/video_creation/voices.py @@ -7,6 +7,7 @@ from TTS.TikTok import TikTok from TTS.aws_polly import AWSPolly from TTS.engine_wrapper import TTSEngine from TTS.pyttsx import pyttsx +from TTS.elevenlabs import elevenlabs from TTS.streamlabs_polly import StreamlabsPolly from utils import settings from utils.console import print_table, print_step @@ -19,6 +20,7 @@ TTSProviders = { "StreamlabsPolly": StreamlabsPolly, "TikTok": TikTok, "pyttsx": pyttsx, + "ElevenLabs": elevenlabs }