adding OpenAI TTS API Option

This allows users to configure OpenAI TTS API to generate voice for their videos, which might be a cheaper option than ElevenLabs (especially if one runs KokoroTTS localy [which has a similar API structure like OpenAI and is fully compatible])
pull/2268/head
bnfone 6 months ago
parent 3d4c34d60c
commit e137dbb40d

@ -0,0 +1,89 @@
import random
import requests
from utils import settings
class OpenAITTS:
"""
A Text-to-Speech engine that uses an OpenAI-like TTS API endpoint to generate audio from text.
Attributes:
max_chars (int): Maximum number of characters allowed per API call.
api_key (str): API key loaded from settings.
api_url (str): The complete API endpoint URL, built from a base URL provided in the config.
available_voices (list): Static list of supported voices (according to current docs).
"""
def __init__(self):
# Set maximum input size based on API limits (4096 characters per request)
self.max_chars = 4096
self.api_key = settings.config["settings"]["tts"].get("openai_api_key")
if not self.api_key:
raise ValueError("No OpenAI API key provided in settings! Please set 'openai_api_key' in your config.")
# Lese den Basis-URL aus der Konfiguration (z. B. "https://api.openai.com/v1" oder "https://api.openai.com/v1/")
base_url = settings.config["settings"]["tts"].get("openai_api_url", "https://api.openai.com/v1")
# Entferne ggf. den abschließenden Slash
if base_url.endswith("/"):
base_url = base_url[:-1]
# Hänge den TTS-spezifischen Pfad an
self.api_url = base_url + "/audio/speech"
# Set the available voices to a static list as per OpenAI TTS documentation.
self.available_voices = self.get_available_voices()
def get_available_voices(self):
"""
Return a static list of supported voices for the OpenAI TTS API.
According to the documentation, supported voices include:
"alloy", "ash", "coral", "echo", "fable", "onyx", "nova", "sage", "shimmer"
"""
return ["alloy", "ash", "coral", "echo", "fable", "onyx", "nova", "sage", "shimmer"]
def randomvoice(self):
"""
Select and return a random voice from the available voices.
"""
return random.choice(self.available_voices)
def run(self, text, filepath, random_voice: bool = False):
"""
Convert the provided text to speech and save the resulting audio to the specified filepath.
Args:
text (str): The input text to convert.
filepath (str): The file path where the generated audio will be saved.
random_voice (bool): If True, select a random voice from the available voices.
"""
# Choose voice based on configuration or randomly if requested.
if random_voice:
voice = self.randomvoice()
else:
voice = settings.config["settings"]["tts"].get("openai_voice_name", "alloy")
voice = str(voice).lower() # Ensure lower-case as expected by the API
# Select the model from configuration; default to 'tts-1'
model = settings.config["settings"]["tts"].get("openai_model", "tts-1")
# Debug output: print which voice and model will be used
print(f"Using OpenAI TTS model: {model} with voice: {voice}")
# Erstelle das Payload für den API-Request
payload = {
"model": model,
"voice": voice,
"input": text,
"response_format": "mp3" # erlaubte Formate: "mp3", "aac", "opus", "flac", "pcm" oder "wav"
}
headers = {
"Authorization": f"Bearer {self.api_key}",
"Content-Type": "application/json"
}
try:
response = requests.post(self.api_url, headers=headers, json=payload)
if response.status_code != 200:
raise RuntimeError(f"Error from TTS API: {response.status_code} {response.text}")
# Schreibe die Binärdaten (mp3) direkt in die Datei.
with open(filepath, "wb") as f:
f.write(response.content)
except Exception as e:
raise RuntimeError(f"Failed to generate audio with OpenAI TTS API: {str(e)}")

@ -122,6 +122,7 @@ if __name__ == "__main__":
except Exception as err: except Exception as err:
config["settings"]["tts"]["tiktok_sessionid"] = "REDACTED" config["settings"]["tts"]["tiktok_sessionid"] = "REDACTED"
config["settings"]["tts"]["elevenlabs_api_key"] = "REDACTED" config["settings"]["tts"]["elevenlabs_api_key"] = "REDACTED"
config["settings"]["tts"]["openai_api_key"] = "REDACTED"
print_step( print_step(
f"Sorry, something went wrong with this version! Try again, and feel free to report this issue at GitHub or the Discord community.\n" f"Sorry, something went wrong with this version! Try again, and feel free to report this issue at GitHub or the Discord community.\n"
f"Version: {__VERSION__} \n" f"Version: {__VERSION__} \n"

@ -44,7 +44,6 @@ background_thumbnail_font_size = { optional = true, type = "int", default = 96,
background_thumbnail_font_color = { optional = true, default = "255,255,255", example = "255,255,255", explanation = "Font color in RGB format for the thumbnail text" } background_thumbnail_font_color = { optional = true, default = "255,255,255", example = "255,255,255", explanation = "Font color in RGB format for the thumbnail text" }
[settings.tts] [settings.tts]
voice_choice = { optional = false, default = "tiktok", options = ["elevenlabs", "streamlabspolly", "tiktok", "googletranslate", "awspolly", "pyttsx", ], example = "tiktok", explanation = "The voice platform used for TTS generation. " }
random_voice = { optional = false, type = "bool", default = true, example = true, options = [true, false,], explanation = "Randomizes the voice used for each comment" } random_voice = { optional = false, type = "bool", default = true, example = true, options = [true, false,], explanation = "Randomizes the voice used for each comment" }
elevenlabs_voice_name = { optional = false, default = "Bella", example = "Bella", explanation = "The voice used for elevenlabs", options = ["Adam", "Antoni", "Arnold", "Bella", "Domi", "Elli", "Josh", "Rachel", "Sam", ] } elevenlabs_voice_name = { optional = false, default = "Bella", example = "Bella", explanation = "The voice used for elevenlabs", options = ["Adam", "Antoni", "Arnold", "Bella", "Domi", "Elli", "Josh", "Rachel", "Sam", ] }
elevenlabs_api_key = { optional = true, example = "21f13f91f54d741e2ae27d2ab1b99d59", explanation = "Elevenlabs API key" } elevenlabs_api_key = { optional = true, example = "21f13f91f54d741e2ae27d2ab1b99d59", explanation = "Elevenlabs API key" }
@ -56,3 +55,7 @@ python_voice = { optional = false, default = "1", example = "1", explanation = "
py_voice_num = { optional = false, default = "2", example = "2", explanation = "The number of system voices (2 are pre-installed in Windows)" } py_voice_num = { optional = false, default = "2", example = "2", explanation = "The number of system voices (2 are pre-installed in Windows)" }
silence_duration = { optional = true, example = "0.1", explanation = "Time in seconds between TTS comments", default = 0.3, type = "float" } silence_duration = { optional = true, example = "0.1", explanation = "Time in seconds between TTS comments", default = 0.3, type = "float" }
no_emojis = { optional = false, type = "bool", default = false, example = false, options = [true, false,], explanation = "Whether to remove emojis from the comments" } no_emojis = { optional = false, type = "bool", default = false, example = false, options = [true, false,], explanation = "Whether to remove emojis from the comments" }
openai_api_url = { optional = true, default = "https://api.openai.com/v1/", example = "https://api.openai.com/v1/", explanation = "The API endpoint URL for OpenAI TTS generation" }
openai_api_key = { optional = true, example = "sk-abc123def456...", explanation = "Your OpenAI API key for TTS generation" }
openai_voice_name = { optional = false, default = "alloy", example = "alloy", explanation = "The voice used for OpenAI TTS generation", options = ["alloy", "ash", "coral", "echo", "fable", "onyx", "nova", "sage", "shimmer"] }
openai_model = { optional = false, default = "tts-1", example = "tts-1", explanation = "The model variant used for OpenAI TTS generation", options = ["tts-1", "tts-1-hd"] }

@ -9,6 +9,7 @@ from TTS.GTTS import GTTS
from TTS.pyttsx import pyttsx from TTS.pyttsx import pyttsx
from TTS.streamlabs_polly import StreamlabsPolly from TTS.streamlabs_polly import StreamlabsPolly
from TTS.TikTok import TikTok from TTS.TikTok import TikTok
from TTS.openai_tts import OpenAITTS
from utils import settings from utils import settings
from utils.console import print_step, print_table from utils.console import print_step, print_table
@ -21,6 +22,7 @@ TTSProviders = {
"TikTok": TikTok, "TikTok": TikTok,
"pyttsx": pyttsx, "pyttsx": pyttsx,
"ElevenLabs": elevenlabs, "ElevenLabs": elevenlabs,
"OpenAI": OpenAITTS,
} }

Loading…
Cancel
Save