You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
RedditVideoMakerBot/TTS/minimax_tts.py

111 lines
3.4 KiB

import os
import random
import requests
from utils import settings
MINIMAX_TTS_VOICES = [
"English_Graceful_Lady",
"English_Insightful_Speaker",
"English_radiant_girl",
"English_Persuasive_Man",
"English_Lucky_Robot",
"English_expressive_narrator",
]
class MiniMaxTTS:
"""
A Text-to-Speech engine that uses the MiniMax TTS API to generate audio from text.
Attributes:
max_chars (int): Maximum number of characters allowed per API call.
api_key (str): MiniMax API key loaded from settings or environment.
base_url (str): The base URL for the MiniMax API.
available_voices (list): Supported voice IDs.
"""
def __init__(self):
self.max_chars = 4096
self.api_key = settings.config["settings"]["tts"].get("minimax_api_key") or os.environ.get(
"MINIMAX_API_KEY"
)
if not self.api_key:
raise ValueError(
"No MiniMax API key provided! Set 'minimax_api_key' in your config or "
"the MINIMAX_API_KEY environment variable."
)
self.base_url = settings.config["settings"]["tts"].get(
"minimax_api_url", "https://api.minimax.io"
).rstrip("/")
self.available_voices = MINIMAX_TTS_VOICES
def randomvoice(self):
"""Return a random voice ID from the available voices."""
return random.choice(self.available_voices)
def run(self, text, filepath, random_voice: bool = False):
"""
Convert the provided text to speech and save the resulting audio to the specified filepath.
Args:
text (str): The input text to convert.
filepath (str): The file path where the generated audio will be saved.
random_voice (bool): If True, select a random voice from the available voices.
"""
if random_voice:
voice = self.randomvoice()
else:
voice = settings.config["settings"]["tts"].get(
"minimax_voice_name", "English_Graceful_Lady"
)
model = settings.config["settings"]["tts"].get("minimax_tts_model", "speech-2.8-hd")
payload = {
"model": model,
"text": text,
"stream": False,
"voice_setting": {
"voice_id": voice,
"speed": 1,
"vol": 1,
"pitch": 0,
},
"audio_setting": {
"sample_rate": 32000,
"bitrate": 128000,
"format": "mp3",
"channel": 1,
},
}
headers = {
"Authorization": f"Bearer {self.api_key}",
"Content-Type": "application/json",
}
response = requests.post(
f"{self.base_url}/v1/t2a_v2",
headers=headers,
json=payload,
)
if response.status_code != 200:
raise RuntimeError(
f"MiniMax TTS API error: {response.status_code} {response.text}"
)
result = response.json()
if result.get("base_resp", {}).get("status_code") != 0:
raise RuntimeError(
f"MiniMax TTS API returned error: "
f"{result.get('base_resp', {}).get('status_msg', 'Unknown error')}"
)
audio_hex = result["data"]["audio"]
audio_bytes = bytes.fromhex(audio_hex)
with open(filepath, "wb") as f:
f.write(audio_bytes)