You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
RedditVideoMakerBot/TTS/qwen_tts.py

166 lines
5.5 KiB

import random
import requests
from utils import settings
class QwenTTS:
"""
A Text-to-Speech engine that uses the Qwen3 TTS API endpoint to generate audio from text.
This TTS provider connects to a Qwen TTS server and authenticates using email/password
to obtain a bearer token, then sends TTS requests.
Attributes:
max_chars (int): Maximum number of characters allowed per API call.
api_base_url (str): Base URL for the Qwen TTS API server.
email (str): Email for authentication.
password (str): Password for authentication.
token (str): Bearer token obtained after login.
available_voices (list): List of supported Qwen TTS voices.
"""
# Available Qwen TTS speakers
AVAILABLE_SPEAKERS = [
"Chelsie",
"Ethan",
"Vivian",
"Asher",
"Aria",
"Oliver",
"Emma",
"Noah",
"Sophia",
]
# Available languages
AVAILABLE_LANGUAGES = [
"English",
"Chinese",
"Spanish",
"French",
"German",
"Japanese",
"Korean",
"Portuguese",
"Russian",
"Italian",
"Arabic",
"Hindi",
]
def __init__(self):
self.max_chars = 5000
self.token = None
# Get configuration
tts_config = settings.config["settings"]["tts"]
self.api_base_url = tts_config.get("qwen_api_url", "http://localhost:8080")
if self.api_base_url.endswith("/"):
self.api_base_url = self.api_base_url[:-1]
self.email = tts_config.get("qwen_email")
self.password = tts_config.get("qwen_password")
if not self.email or not self.password:
raise ValueError(
"Qwen TTS requires 'qwen_email' and 'qwen_password' in settings! "
"Please configure these in your config.toml file."
)
self.available_voices = self.AVAILABLE_SPEAKERS
self._authenticate()
def _authenticate(self):
"""
Authenticate with the Qwen TTS server and obtain a bearer token.
"""
login_url = f"{self.api_base_url}/api/agent/api/auth/login"
payload = {"email": self.email, "password": self.password}
headers = {"Content-Type": "application/json"}
try:
response = requests.post(login_url, json=payload, headers=headers, timeout=30)
if response.status_code != 200:
raise RuntimeError(
f"Qwen TTS authentication failed: {response.status_code} {response.text}"
)
data = response.json()
self.token = data.get("access_token")
if not self.token:
raise RuntimeError("Qwen TTS authentication failed: No access_token in response")
except requests.exceptions.RequestException as e:
raise RuntimeError(f"Failed to connect to Qwen TTS server: {str(e)}")
def get_available_voices(self):
"""
Return a list of supported voices for Qwen TTS.
"""
return self.AVAILABLE_SPEAKERS
def randomvoice(self):
"""
Select and return a random voice from the available voices.
"""
return random.choice(self.available_voices)
def run(self, text: str, filepath: str, random_voice: bool = False):
"""
Convert the provided text to speech and save the resulting audio to the specified filepath.
Args:
text (str): The input text to convert.
filepath (str): The file path where the generated audio will be saved.
random_voice (bool): If True, select a random voice from the available voices.
"""
tts_config = settings.config["settings"]["tts"]
# Choose voice based on configuration or randomly if requested
if random_voice:
speaker = self.randomvoice()
else:
speaker = tts_config.get("qwen_speaker", "Vivian")
# Get language and instruct settings
language = tts_config.get("qwen_language", "English")
instruct = tts_config.get("qwen_instruct", "Warm, friendly, conversational.")
# Build TTS request
tts_url = f"{self.api_base_url}/api/qwen-tts"
payload = {
"text": text,
"language": language,
"speaker": speaker,
"instruct": instruct,
}
headers = {
"Authorization": f"Bearer {self.token}",
"Content-Type": "application/json",
}
try:
response = requests.post(tts_url, json=payload, headers=headers, timeout=120)
# Handle token expiration - re-authenticate and retry
if response.status_code == 401:
self._authenticate()
headers["Authorization"] = f"Bearer {self.token}"
response = requests.post(tts_url, json=payload, headers=headers, timeout=120)
if response.status_code != 200:
raise RuntimeError(
f"Qwen TTS generation failed: {response.status_code} {response.text}"
)
# Write the audio response to file
with open(filepath, "wb") as f:
f.write(response.content)
except requests.exceptions.Timeout:
raise RuntimeError("Qwen TTS request timed out. The server may be overloaded.")
except requests.exceptions.RequestException as e:
raise RuntimeError(f"Failed to generate audio with Qwen TTS: {str(e)}")