diff --git a/TTS/openai_tts.py b/TTS/openai_tts.py index 774a7a6..a267163 100644 --- a/TTS/openai_tts.py +++ b/TTS/openai_tts.py @@ -1,39 +1,47 @@ import random + import requests + from utils import settings + class OpenAITTS: """ A Text-to-Speech engine that uses an OpenAI-like TTS API endpoint to generate audio from text. - + Attributes: max_chars (int): Maximum number of characters allowed per API call. api_key (str): API key loaded from settings. api_url (str): The complete API endpoint URL, built from a base URL provided in the config. available_voices (list): Static list of supported voices (according to current docs). """ + def __init__(self): # Set maximum input size based on API limits (4096 characters per request) self.max_chars = 4096 self.api_key = settings.config["settings"]["tts"].get("openai_api_key") if not self.api_key: - raise ValueError("No OpenAI API key provided in settings! Please set 'openai_api_key' in your config.") - + raise ValueError( + "No OpenAI API key provided in settings! Please set 'openai_api_key' in your config." + ) + # Lese den Basis-URL aus der Konfiguration (z. B. "https://api.openai.com/v1" oder "https://api.openai.com/v1/") - base_url = settings.config["settings"]["tts"].get("openai_api_url", "https://api.openai.com/v1") + base_url = settings.config["settings"]["tts"].get( + "openai_api_url", "https://api.openai.com/v1" + ) # Entferne ggf. den abschließenden Slash if base_url.endswith("/"): base_url = base_url[:-1] # Hänge den TTS-spezifischen Pfad an self.api_url = base_url + "/audio/speech" - + # Set the available voices to a static list as per OpenAI TTS documentation. self.available_voices = self.get_available_voices() def get_available_voices(self): """ Return a static list of supported voices for the OpenAI TTS API. - + According to the documentation, supported voices include: "alloy", "ash", "coral", "echo", "fable", "onyx", "nova", "sage", "shimmer" """ @@ -48,7 +56,7 @@ class OpenAITTS: def run(self, text, filepath, random_voice: bool = False): """ Convert the provided text to speech and save the resulting audio to the specified filepath. - + Args: text (str): The input text to convert. filepath (str): The file path where the generated audio will be saved. @@ -69,12 +77,9 @@ class OpenAITTS: "model": model, "voice": voice, "input": text, - "response_format": "mp3" # allowed formats: "mp3", "aac", "opus", "flac", "pcm" or "wav" - } - headers = { - "Authorization": f"Bearer {self.api_key}", - "Content-Type": "application/json" + "response_format": "mp3", # allowed formats: "mp3", "aac", "opus", "flac", "pcm" or "wav" } + headers = {"Authorization": f"Bearer {self.api_key}", "Content-Type": "application/json"} try: response = requests.post(self.api_url, headers=headers, json=payload) if response.status_code != 200: @@ -83,4 +88,4 @@ class OpenAITTS: with open(filepath, "wb") as f: f.write(response.content) except Exception as e: - raise RuntimeError(f"Failed to generate audio with OpenAI TTS API: {str(e)}") \ No newline at end of file + raise RuntimeError(f"Failed to generate audio with OpenAI TTS API: {str(e)}") diff --git a/video_creation/voices.py b/video_creation/voices.py index 13ded7c..3d48e9e 100644 --- a/video_creation/voices.py +++ b/video_creation/voices.py @@ -6,10 +6,10 @@ from TTS.aws_polly import AWSPolly from TTS.elevenlabs import elevenlabs from TTS.engine_wrapper import TTSEngine from TTS.GTTS import GTTS +from TTS.openai_tts import OpenAITTS from TTS.pyttsx import pyttsx from TTS.streamlabs_polly import StreamlabsPolly from TTS.TikTok import TikTok -from TTS.openai_tts import OpenAITTS from utils import settings from utils.console import print_step, print_table