pull/2483/merge
Octopus 3 days ago committed by GitHub
commit 70db1ed7e4
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

@ -0,0 +1,110 @@
import os
import random
import requests
from utils import settings
MINIMAX_TTS_VOICES = [
"English_Graceful_Lady",
"English_Insightful_Speaker",
"English_radiant_girl",
"English_Persuasive_Man",
"English_Lucky_Robot",
"English_expressive_narrator",
]
class MiniMaxTTS:
"""
A Text-to-Speech engine that uses the MiniMax TTS API to generate audio from text.
Attributes:
max_chars (int): Maximum number of characters allowed per API call.
api_key (str): MiniMax API key loaded from settings or environment.
base_url (str): The base URL for the MiniMax API.
available_voices (list): Supported voice IDs.
"""
def __init__(self):
self.max_chars = 4096
self.api_key = settings.config["settings"]["tts"].get("minimax_api_key") or os.environ.get(
"MINIMAX_API_KEY"
)
if not self.api_key:
raise ValueError(
"No MiniMax API key provided! Set 'minimax_api_key' in your config or "
"the MINIMAX_API_KEY environment variable."
)
self.base_url = settings.config["settings"]["tts"].get(
"minimax_api_url", "https://api.minimax.io"
).rstrip("/")
self.available_voices = MINIMAX_TTS_VOICES
def randomvoice(self):
"""Return a random voice ID from the available voices."""
return random.choice(self.available_voices)
def run(self, text, filepath, random_voice: bool = False):
"""
Convert the provided text to speech and save the resulting audio to the specified filepath.
Args:
text (str): The input text to convert.
filepath (str): The file path where the generated audio will be saved.
random_voice (bool): If True, select a random voice from the available voices.
"""
if random_voice:
voice = self.randomvoice()
else:
voice = settings.config["settings"]["tts"].get(
"minimax_voice_name", "English_Graceful_Lady"
)
model = settings.config["settings"]["tts"].get("minimax_tts_model", "speech-2.8-hd")
payload = {
"model": model,
"text": text,
"stream": False,
"voice_setting": {
"voice_id": voice,
"speed": 1,
"vol": 1,
"pitch": 0,
},
"audio_setting": {
"sample_rate": 32000,
"bitrate": 128000,
"format": "mp3",
"channel": 1,
},
}
headers = {
"Authorization": f"Bearer {self.api_key}",
"Content-Type": "application/json",
}
response = requests.post(
f"{self.base_url}/v1/t2a_v2",
headers=headers,
json=payload,
)
if response.status_code != 200:
raise RuntimeError(
f"MiniMax TTS API error: {response.status_code} {response.text}"
)
result = response.json()
if result.get("base_resp", {}).get("status_code") != 0:
raise RuntimeError(
f"MiniMax TTS API returned error: "
f"{result.get('base_resp', {}).get('status_msg', 'Unknown error')}"
)
audio_hex = result["data"]["audio"]
audio_bytes = bytes.fromhex(audio_hex)
with open(filepath, "wb") as f:
f.write(audio_bytes)

@ -0,0 +1,243 @@
"""Unit and integration tests for the MiniMax TTS provider."""
import os
import tempfile
from unittest.mock import MagicMock, patch
import pytest
# ---------------------------------------------------------------------------
# Helpers fake out the settings module so we can import without a config file
# ---------------------------------------------------------------------------
FAKE_SETTINGS = {
"settings": {
"tts": {
"minimax_api_key": "test-api-key",
"minimax_api_url": "https://api.minimax.io",
"minimax_voice_name": "English_Graceful_Lady",
"minimax_tts_model": "speech-2.8-hd",
}
}
}
def _patch_settings(config=None):
"""Return a patcher that replaces utils.settings.config."""
mock_settings = MagicMock()
mock_settings.config = config or FAKE_SETTINGS
return patch.dict("sys.modules", {"utils": MagicMock(settings=mock_settings)})
# ---------------------------------------------------------------------------
# Unit tests
# ---------------------------------------------------------------------------
class TestMiniMaxTTSInit:
"""Provider instantiation and configuration parsing."""
def test_creates_instance_with_valid_config(self):
with _patch_settings():
from TTS.minimax_tts import MiniMaxTTS # noqa: PLC0415
tts = MiniMaxTTS()
assert tts is not None
def test_raises_when_api_key_missing(self):
config = {"settings": {"tts": {}}}
with _patch_settings(config), patch.dict(os.environ, {}, clear=False):
# Make sure env var is absent too
env = {k: v for k, v in os.environ.items() if k != "MINIMAX_API_KEY"}
with patch.dict(os.environ, env, clear=True):
from TTS.minimax_tts import MiniMaxTTS # noqa: PLC0415
with pytest.raises(ValueError, match="No MiniMax API key"):
MiniMaxTTS()
def test_reads_api_key_from_env(self):
config = {"settings": {"tts": {}}}
with _patch_settings(config), patch.dict(os.environ, {"MINIMAX_API_KEY": "env-key"}):
from TTS.minimax_tts import MiniMaxTTS # noqa: PLC0415
tts = MiniMaxTTS()
assert tts.api_key == "env-key"
def test_default_base_url(self):
with _patch_settings():
from TTS.minimax_tts import MiniMaxTTS # noqa: PLC0415
tts = MiniMaxTTS()
assert tts.base_url == "https://api.minimax.io"
def test_available_voices_not_empty(self):
with _patch_settings():
from TTS.minimax_tts import MiniMaxTTS # noqa: PLC0415
tts = MiniMaxTTS()
assert len(tts.available_voices) > 0
def test_max_chars(self):
with _patch_settings():
from TTS.minimax_tts import MiniMaxTTS # noqa: PLC0415
tts = MiniMaxTTS()
assert tts.max_chars == 4096
class TestMiniMaxTTSRandomVoice:
def test_randomvoice_returns_valid_voice(self):
with _patch_settings():
from TTS.minimax_tts import MiniMaxTTS, MINIMAX_TTS_VOICES # noqa: PLC0415
tts = MiniMaxTTS()
voice = tts.randomvoice()
assert voice in MINIMAX_TTS_VOICES
class TestMiniMaxTTSRun:
"""Tests for the run() method using a mocked requests.post."""
def _make_mock_response(self, audio_hex="494433"):
mock_resp = MagicMock()
mock_resp.status_code = 200
mock_resp.json.return_value = {
"data": {"audio": audio_hex, "status": 2},
"base_resp": {"status_code": 0, "status_msg": "success"},
}
return mock_resp
def test_sends_request_to_correct_url(self):
with _patch_settings():
from TTS.minimax_tts import MiniMaxTTS # noqa: PLC0415
tts = MiniMaxTTS()
with patch("requests.post", return_value=self._make_mock_response()) as mock_post:
with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as f:
tts.run("Hello world.", f.name)
mock_post.assert_called_once()
call_url = mock_post.call_args[0][0]
assert "/v1/t2a_v2" in call_url
assert "api.minimax.io" in call_url
def test_sends_correct_payload(self):
with _patch_settings():
from TTS.minimax_tts import MiniMaxTTS # noqa: PLC0415
tts = MiniMaxTTS()
with patch("requests.post", return_value=self._make_mock_response()) as mock_post:
with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as f:
tts.run("Hello world.", f.name, random_voice=False)
payload = mock_post.call_args[1]["json"]
assert payload["model"] == "speech-2.8-hd"
assert payload["text"] == "Hello world."
assert payload["voice_setting"]["voice_id"] == "English_Graceful_Lady"
assert payload["audio_setting"]["format"] == "mp3"
def test_writes_audio_bytes_to_file(self):
audio_hex = "494433" # hex for 'ID3' — valid-ish mp3 header start
with _patch_settings():
from TTS.minimax_tts import MiniMaxTTS # noqa: PLC0415
tts = MiniMaxTTS()
with patch("requests.post", return_value=self._make_mock_response(audio_hex)):
with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as f:
tmp_path = f.name
tts.run("Hello.", tmp_path)
with open(tmp_path, "rb") as f:
content = f.read()
assert content == bytes.fromhex(audio_hex)
def test_raises_on_http_error(self):
with _patch_settings():
from TTS.minimax_tts import MiniMaxTTS # noqa: PLC0415
tts = MiniMaxTTS()
mock_resp = MagicMock()
mock_resp.status_code = 401
mock_resp.text = "Unauthorized"
with patch("requests.post", return_value=mock_resp):
with pytest.raises(RuntimeError, match="MiniMax TTS API error: 401"):
with tempfile.NamedTemporaryFile(suffix=".mp3") as f:
tts.run("Hello.", f.name)
def test_raises_on_api_status_error(self):
with _patch_settings():
from TTS.minimax_tts import MiniMaxTTS # noqa: PLC0415
tts = MiniMaxTTS()
mock_resp = MagicMock()
mock_resp.status_code = 200
mock_resp.json.return_value = {
"data": {},
"base_resp": {"status_code": 2013, "status_msg": "invalid voice_id"},
}
with patch("requests.post", return_value=mock_resp):
with pytest.raises(RuntimeError, match="invalid voice_id"):
with tempfile.NamedTemporaryFile(suffix=".mp3") as f:
tts.run("Hello.", f.name)
def test_uses_random_voice_when_requested(self):
with _patch_settings():
from TTS.minimax_tts import MiniMaxTTS, MINIMAX_TTS_VOICES # noqa: PLC0415
tts = MiniMaxTTS()
captured_payloads = []
def fake_post(url, **kwargs):
captured_payloads.append(kwargs.get("json", {}))
return self._make_mock_response()
with patch("requests.post", side_effect=fake_post):
with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as f:
tts.run("Hello.", f.name, random_voice=True)
voice_used = captured_payloads[0]["voice_setting"]["voice_id"]
assert voice_used in MINIMAX_TTS_VOICES
class TestTTSProvidersRegistry:
"""Verify MiniMax is registered in voices.py."""
def test_minimax_in_providers_source(self):
"""Verify voices.py source contains MiniMax registration."""
import pathlib
voices_path = pathlib.Path(__file__).parent.parent / "video_creation" / "voices.py"
source = voices_path.read_text()
assert "MiniMax" in source, "MiniMax not found in TTSProviders registry"
assert "MiniMaxTTS" in source, "MiniMaxTTS class not imported in voices.py"
assert "minimax_tts" in source, "minimax_tts module not imported in voices.py"
# ---------------------------------------------------------------------------
# Integration test calls the real MiniMax API (skipped if no key set)
# ---------------------------------------------------------------------------
MINIMAX_API_KEY = os.environ.get("MINIMAX_API_KEY")
@pytest.mark.skipif(not MINIMAX_API_KEY, reason="MINIMAX_API_KEY not set")
class TestMiniMaxTTSIntegration:
"""Live API calls — only run when MINIMAX_API_KEY is available."""
def test_synthesizes_speech_to_file(self):
config = {
"settings": {
"tts": {
"minimax_api_key": MINIMAX_API_KEY,
"minimax_api_url": "https://api.minimax.io",
"minimax_voice_name": "English_Graceful_Lady",
"minimax_tts_model": "speech-2.8-hd",
}
}
}
with _patch_settings(config):
from TTS.minimax_tts import MiniMaxTTS # noqa: PLC0415
tts = MiniMaxTTS()
with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as f:
tmp_path = f.name
tts.run("Hello, this is a MiniMax TTS test.", tmp_path)
size = os.path.getsize(tmp_path)
assert size > 100, f"Audio file too small ({size} bytes), likely empty or error"
os.unlink(tmp_path)

@ -45,7 +45,7 @@ background_thumbnail_font_size = { optional = true, type = "int", default = 96,
background_thumbnail_font_color = { optional = true, default = "255,255,255", example = "255,255,255", explanation = "Font color in RGB format for the thumbnail text" }
[settings.tts]
voice_choice = { optional = false, default = "tiktok", options = ["elevenlabs", "streamlabspolly", "tiktok", "googletranslate", "awspolly", "pyttsx", "OpenAI"], example = "tiktok", explanation = "The voice platform used for TTS generation. " }
voice_choice = { optional = false, default = "tiktok", options = ["elevenlabs", "streamlabspolly", "tiktok", "googletranslate", "awspolly", "pyttsx", "OpenAI", "MiniMax"], example = "tiktok", explanation = "The voice platform used for TTS generation. " }
random_voice = { optional = false, type = "bool", default = true, example = true, options = [true, false,], explanation = "Randomizes the voice used for each comment" }
elevenlabs_voice_name = { optional = false, default = "Bella", example = "Bella", explanation = "The voice used for elevenlabs", options = ["Adam", "Antoni", "Arnold", "Bella", "Domi", "Elli", "Josh", "Rachel", "Sam", ] }
elevenlabs_api_key = { optional = true, example = "21f13f91f54d741e2ae27d2ab1b99d59", explanation = "Elevenlabs API key" }
@ -61,3 +61,7 @@ openai_api_url = { optional = true, default = "https://api.openai.com/v1/", exam
openai_api_key = { optional = true, example = "sk-abc123def456...", explanation = "Your OpenAI API key for TTS generation" }
openai_voice_name = { optional = false, default = "alloy", example = "alloy", explanation = "The voice used for OpenAI TTS generation", options = ["alloy", "ash", "coral", "echo", "fable", "onyx", "nova", "sage", "shimmer", "af_heart"] }
openai_model = { optional = false, default = "tts-1", example = "tts-1", explanation = "The model variant used for OpenAI TTS generation", options = ["tts-1", "tts-1-hd", "gpt-4o-mini-tts"] }
minimax_api_key = { optional = true, example = "sk-api-xxx...", explanation = "MiniMax API key for TTS generation (also read from MINIMAX_API_KEY env var)" }
minimax_api_url = { optional = true, default = "https://api.minimax.io", example = "https://api.minimax.io", explanation = "The base URL for the MiniMax API" }
minimax_voice_name = { optional = false, default = "English_Graceful_Lady", example = "English_Graceful_Lady", explanation = "The voice used for MiniMax TTS generation", options = ["English_Graceful_Lady", "English_Insightful_Speaker", "English_radiant_girl", "English_Persuasive_Man", "English_Lucky_Robot", "English_expressive_narrator"] }
minimax_tts_model = { optional = false, default = "speech-2.8-hd", example = "speech-2.8-hd", explanation = "The model variant used for MiniMax TTS generation", options = ["speech-2.8-hd", "speech-2.8-turbo"] }

@ -6,6 +6,7 @@ from TTS.aws_polly import AWSPolly
from TTS.elevenlabs import elevenlabs
from TTS.engine_wrapper import TTSEngine
from TTS.GTTS import GTTS
from TTS.minimax_tts import MiniMaxTTS
from TTS.openai_tts import OpenAITTS
from TTS.pyttsx import pyttsx
from TTS.streamlabs_polly import StreamlabsPolly
@ -23,6 +24,7 @@ TTSProviders = {
"pyttsx": pyttsx,
"ElevenLabs": elevenlabs,
"OpenAI": OpenAITTS,
"MiniMax": MiniMaxTTS,
}

Loading…
Cancel
Save