You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
RedditVideoMakerBot/TTS/openai_tts.py

92 lines
3.8 KiB

import random
import requests
from utils import settings
class OpenAITTS:
"""
A Text-to-Speech engine that uses an OpenAI-like TTS API endpoint to generate audio from text.
Attributes:
max_chars (int): Maximum number of characters allowed per API call.
api_key (str): API key loaded from settings.
api_url (str): The complete API endpoint URL, built from a base URL provided in the config.
available_voices (list): Static list of supported voices (according to current docs).
"""
def __init__(self):
# Set maximum input size based on API limits (4096 characters per request)
self.max_chars = 4096
self.api_key = settings.config["settings"]["tts"].get("openai_api_key")
if not self.api_key:
raise ValueError(
"No OpenAI API key provided in settings! Please set 'openai_api_key' in your config."
)
# Lese den Basis-URL aus der Konfiguration (z. B. "https://api.openai.com/v1" oder "https://api.openai.com/v1/")
base_url = settings.config["settings"]["tts"].get(
"openai_api_url", "https://api.openai.com/v1"
)
# Entferne ggf. den abschließenden Slash
if base_url.endswith("/"):
base_url = base_url[:-1]
# Hänge den TTS-spezifischen Pfad an
self.api_url = base_url + "/audio/speech"
# Set the available voices to a static list as per OpenAI TTS documentation.
self.available_voices = self.get_available_voices()
def get_available_voices(self):
"""
Return a static list of supported voices for the OpenAI TTS API.
According to the documentation, supported voices include:
"alloy", "ash", "coral", "echo", "fable", "onyx", "nova", "sage", "shimmer"
"""
return ["alloy", "ash", "coral", "echo", "fable", "onyx", "nova", "sage", "shimmer"]
def randomvoice(self):
"""
Select and return a random voice from the available voices.
"""
return random.choice(self.available_voices)
def run(self, text, filepath, random_voice: bool = False):
"""
Convert the provided text to speech and save the resulting audio to the specified filepath.
Args:
text (str): The input text to convert.
filepath (str): The file path where the generated audio will be saved.
random_voice (bool): If True, select a random voice from the available voices.
"""
# Choose voice based on configuration or randomly if requested.
if random_voice:
voice = self.randomvoice()
else:
voice = settings.config["settings"]["tts"].get("openai_voice_name", "alloy")
voice = str(voice).lower() # Ensure lower-case as expected by the API
# Select the model from configuration; default to 'tts-1'
model = settings.config["settings"]["tts"].get("openai_model", "tts-1")
# Create Payload for API-request
payload = {
"model": model,
"voice": voice,
"input": text,
"response_format": "mp3", # allowed formats: "mp3", "aac", "opus", "flac", "pcm" or "wav"
}
headers = {"Authorization": f"Bearer {self.api_key}", "Content-Type": "application/json"}
try:
response = requests.post(self.api_url, headers=headers, json=payload)
if response.status_code != 200:
raise RuntimeError(f"Error from TTS API: {response.status_code} {response.text}")
# Write response as binary into file.
with open(filepath, "wb") as f:
f.write(response.content)
except Exception as e:
raise RuntimeError(f"Failed to generate audio with OpenAI TTS API: {str(e)}")