|
|
@ -1,39 +1,47 @@
|
|
|
|
import random
|
|
|
|
import random
|
|
|
|
|
|
|
|
|
|
|
|
import requests
|
|
|
|
import requests
|
|
|
|
|
|
|
|
|
|
|
|
from utils import settings
|
|
|
|
from utils import settings
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class OpenAITTS:
|
|
|
|
class OpenAITTS:
|
|
|
|
"""
|
|
|
|
"""
|
|
|
|
A Text-to-Speech engine that uses an OpenAI-like TTS API endpoint to generate audio from text.
|
|
|
|
A Text-to-Speech engine that uses an OpenAI-like TTS API endpoint to generate audio from text.
|
|
|
|
|
|
|
|
|
|
|
|
Attributes:
|
|
|
|
Attributes:
|
|
|
|
max_chars (int): Maximum number of characters allowed per API call.
|
|
|
|
max_chars (int): Maximum number of characters allowed per API call.
|
|
|
|
api_key (str): API key loaded from settings.
|
|
|
|
api_key (str): API key loaded from settings.
|
|
|
|
api_url (str): The complete API endpoint URL, built from a base URL provided in the config.
|
|
|
|
api_url (str): The complete API endpoint URL, built from a base URL provided in the config.
|
|
|
|
available_voices (list): Static list of supported voices (according to current docs).
|
|
|
|
available_voices (list): Static list of supported voices (according to current docs).
|
|
|
|
"""
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
|
|
def __init__(self):
|
|
|
|
def __init__(self):
|
|
|
|
# Set maximum input size based on API limits (4096 characters per request)
|
|
|
|
# Set maximum input size based on API limits (4096 characters per request)
|
|
|
|
self.max_chars = 4096
|
|
|
|
self.max_chars = 4096
|
|
|
|
self.api_key = settings.config["settings"]["tts"].get("openai_api_key")
|
|
|
|
self.api_key = settings.config["settings"]["tts"].get("openai_api_key")
|
|
|
|
if not self.api_key:
|
|
|
|
if not self.api_key:
|
|
|
|
raise ValueError("No OpenAI API key provided in settings! Please set 'openai_api_key' in your config.")
|
|
|
|
raise ValueError(
|
|
|
|
|
|
|
|
"No OpenAI API key provided in settings! Please set 'openai_api_key' in your config."
|
|
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
# Lese den Basis-URL aus der Konfiguration (z. B. "https://api.openai.com/v1" oder "https://api.openai.com/v1/")
|
|
|
|
# Lese den Basis-URL aus der Konfiguration (z. B. "https://api.openai.com/v1" oder "https://api.openai.com/v1/")
|
|
|
|
base_url = settings.config["settings"]["tts"].get("openai_api_url", "https://api.openai.com/v1")
|
|
|
|
base_url = settings.config["settings"]["tts"].get(
|
|
|
|
|
|
|
|
"openai_api_url", "https://api.openai.com/v1"
|
|
|
|
|
|
|
|
)
|
|
|
|
# Entferne ggf. den abschließenden Slash
|
|
|
|
# Entferne ggf. den abschließenden Slash
|
|
|
|
if base_url.endswith("/"):
|
|
|
|
if base_url.endswith("/"):
|
|
|
|
base_url = base_url[:-1]
|
|
|
|
base_url = base_url[:-1]
|
|
|
|
# Hänge den TTS-spezifischen Pfad an
|
|
|
|
# Hänge den TTS-spezifischen Pfad an
|
|
|
|
self.api_url = base_url + "/audio/speech"
|
|
|
|
self.api_url = base_url + "/audio/speech"
|
|
|
|
|
|
|
|
|
|
|
|
# Set the available voices to a static list as per OpenAI TTS documentation.
|
|
|
|
# Set the available voices to a static list as per OpenAI TTS documentation.
|
|
|
|
self.available_voices = self.get_available_voices()
|
|
|
|
self.available_voices = self.get_available_voices()
|
|
|
|
|
|
|
|
|
|
|
|
def get_available_voices(self):
|
|
|
|
def get_available_voices(self):
|
|
|
|
"""
|
|
|
|
"""
|
|
|
|
Return a static list of supported voices for the OpenAI TTS API.
|
|
|
|
Return a static list of supported voices for the OpenAI TTS API.
|
|
|
|
|
|
|
|
|
|
|
|
According to the documentation, supported voices include:
|
|
|
|
According to the documentation, supported voices include:
|
|
|
|
"alloy", "ash", "coral", "echo", "fable", "onyx", "nova", "sage", "shimmer"
|
|
|
|
"alloy", "ash", "coral", "echo", "fable", "onyx", "nova", "sage", "shimmer"
|
|
|
|
"""
|
|
|
|
"""
|
|
|
@ -48,7 +56,7 @@ class OpenAITTS:
|
|
|
|
def run(self, text, filepath, random_voice: bool = False):
|
|
|
|
def run(self, text, filepath, random_voice: bool = False):
|
|
|
|
"""
|
|
|
|
"""
|
|
|
|
Convert the provided text to speech and save the resulting audio to the specified filepath.
|
|
|
|
Convert the provided text to speech and save the resulting audio to the specified filepath.
|
|
|
|
|
|
|
|
|
|
|
|
Args:
|
|
|
|
Args:
|
|
|
|
text (str): The input text to convert.
|
|
|
|
text (str): The input text to convert.
|
|
|
|
filepath (str): The file path where the generated audio will be saved.
|
|
|
|
filepath (str): The file path where the generated audio will be saved.
|
|
|
@ -69,12 +77,9 @@ class OpenAITTS:
|
|
|
|
"model": model,
|
|
|
|
"model": model,
|
|
|
|
"voice": voice,
|
|
|
|
"voice": voice,
|
|
|
|
"input": text,
|
|
|
|
"input": text,
|
|
|
|
"response_format": "mp3" # allowed formats: "mp3", "aac", "opus", "flac", "pcm" or "wav"
|
|
|
|
"response_format": "mp3", # allowed formats: "mp3", "aac", "opus", "flac", "pcm" or "wav"
|
|
|
|
}
|
|
|
|
|
|
|
|
headers = {
|
|
|
|
|
|
|
|
"Authorization": f"Bearer {self.api_key}",
|
|
|
|
|
|
|
|
"Content-Type": "application/json"
|
|
|
|
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
headers = {"Authorization": f"Bearer {self.api_key}", "Content-Type": "application/json"}
|
|
|
|
try:
|
|
|
|
try:
|
|
|
|
response = requests.post(self.api_url, headers=headers, json=payload)
|
|
|
|
response = requests.post(self.api_url, headers=headers, json=payload)
|
|
|
|
if response.status_code != 200:
|
|
|
|
if response.status_code != 200:
|
|
|
@ -83,4 +88,4 @@ class OpenAITTS:
|
|
|
|
with open(filepath, "wb") as f:
|
|
|
|
with open(filepath, "wb") as f:
|
|
|
|
f.write(response.content)
|
|
|
|
f.write(response.content)
|
|
|
|
except Exception as e:
|
|
|
|
except Exception as e:
|
|
|
|
raise RuntimeError(f"Failed to generate audio with OpenAI TTS API: {str(e)}")
|
|
|
|
raise RuntimeError(f"Failed to generate audio with OpenAI TTS API: {str(e)}")
|
|
|
|