You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
166 lines
5.5 KiB
166 lines
5.5 KiB
import random
|
|
import requests
|
|
|
|
from utils import settings
|
|
|
|
|
|
class QwenTTS:
|
|
"""
|
|
A Text-to-Speech engine that uses the Qwen3 TTS API endpoint to generate audio from text.
|
|
|
|
This TTS provider connects to a Qwen TTS server and authenticates using email/password
|
|
to obtain a bearer token, then sends TTS requests.
|
|
|
|
Attributes:
|
|
max_chars (int): Maximum number of characters allowed per API call.
|
|
api_base_url (str): Base URL for the Qwen TTS API server.
|
|
email (str): Email for authentication.
|
|
password (str): Password for authentication.
|
|
token (str): Bearer token obtained after login.
|
|
available_voices (list): List of supported Qwen TTS voices.
|
|
"""
|
|
|
|
# Available Qwen TTS speakers
|
|
AVAILABLE_SPEAKERS = [
|
|
"Chelsie",
|
|
"Ethan",
|
|
"Vivian",
|
|
"Asher",
|
|
"Aria",
|
|
"Oliver",
|
|
"Emma",
|
|
"Noah",
|
|
"Sophia",
|
|
]
|
|
|
|
# Available languages
|
|
AVAILABLE_LANGUAGES = [
|
|
"English",
|
|
"Chinese",
|
|
"Spanish",
|
|
"French",
|
|
"German",
|
|
"Japanese",
|
|
"Korean",
|
|
"Portuguese",
|
|
"Russian",
|
|
"Italian",
|
|
"Arabic",
|
|
"Hindi",
|
|
]
|
|
|
|
def __init__(self):
|
|
self.max_chars = 5000
|
|
self.token = None
|
|
|
|
# Get configuration
|
|
tts_config = settings.config["settings"]["tts"]
|
|
|
|
self.api_base_url = tts_config.get("qwen_api_url", "http://localhost:8080")
|
|
if self.api_base_url.endswith("/"):
|
|
self.api_base_url = self.api_base_url[:-1]
|
|
|
|
self.email = tts_config.get("qwen_email")
|
|
self.password = tts_config.get("qwen_password")
|
|
|
|
if not self.email or not self.password:
|
|
raise ValueError(
|
|
"Qwen TTS requires 'qwen_email' and 'qwen_password' in settings! "
|
|
"Please configure these in your config.toml file."
|
|
)
|
|
|
|
self.available_voices = self.AVAILABLE_SPEAKERS
|
|
self._authenticate()
|
|
|
|
def _authenticate(self):
|
|
"""
|
|
Authenticate with the Qwen TTS server and obtain a bearer token.
|
|
"""
|
|
login_url = f"{self.api_base_url}/api/agent/api/auth/login"
|
|
payload = {"email": self.email, "password": self.password}
|
|
headers = {"Content-Type": "application/json"}
|
|
|
|
try:
|
|
response = requests.post(login_url, json=payload, headers=headers, timeout=30)
|
|
if response.status_code != 200:
|
|
raise RuntimeError(
|
|
f"Qwen TTS authentication failed: {response.status_code} {response.text}"
|
|
)
|
|
|
|
data = response.json()
|
|
self.token = data.get("access_token")
|
|
if not self.token:
|
|
raise RuntimeError("Qwen TTS authentication failed: No access_token in response")
|
|
|
|
except requests.exceptions.RequestException as e:
|
|
raise RuntimeError(f"Failed to connect to Qwen TTS server: {str(e)}")
|
|
|
|
def get_available_voices(self):
|
|
"""
|
|
Return a list of supported voices for Qwen TTS.
|
|
"""
|
|
return self.AVAILABLE_SPEAKERS
|
|
|
|
def randomvoice(self):
|
|
"""
|
|
Select and return a random voice from the available voices.
|
|
"""
|
|
return random.choice(self.available_voices)
|
|
|
|
def run(self, text: str, filepath: str, random_voice: bool = False):
|
|
"""
|
|
Convert the provided text to speech and save the resulting audio to the specified filepath.
|
|
|
|
Args:
|
|
text (str): The input text to convert.
|
|
filepath (str): The file path where the generated audio will be saved.
|
|
random_voice (bool): If True, select a random voice from the available voices.
|
|
"""
|
|
tts_config = settings.config["settings"]["tts"]
|
|
|
|
# Choose voice based on configuration or randomly if requested
|
|
if random_voice:
|
|
speaker = self.randomvoice()
|
|
else:
|
|
speaker = tts_config.get("qwen_speaker", "Vivian")
|
|
|
|
# Get language and instruct settings
|
|
language = tts_config.get("qwen_language", "English")
|
|
instruct = tts_config.get("qwen_instruct", "Warm, friendly, conversational.")
|
|
|
|
# Build TTS request
|
|
tts_url = f"{self.api_base_url}/api/qwen-tts"
|
|
payload = {
|
|
"text": text,
|
|
"language": language,
|
|
"speaker": speaker,
|
|
"instruct": instruct,
|
|
}
|
|
headers = {
|
|
"Authorization": f"Bearer {self.token}",
|
|
"Content-Type": "application/json",
|
|
}
|
|
|
|
try:
|
|
response = requests.post(tts_url, json=payload, headers=headers, timeout=120)
|
|
|
|
# Handle token expiration - re-authenticate and retry
|
|
if response.status_code == 401:
|
|
self._authenticate()
|
|
headers["Authorization"] = f"Bearer {self.token}"
|
|
response = requests.post(tts_url, json=payload, headers=headers, timeout=120)
|
|
|
|
if response.status_code != 200:
|
|
raise RuntimeError(
|
|
f"Qwen TTS generation failed: {response.status_code} {response.text}"
|
|
)
|
|
|
|
# Write the audio response to file
|
|
with open(filepath, "wb") as f:
|
|
f.write(response.content)
|
|
|
|
except requests.exceptions.Timeout:
|
|
raise RuntimeError("Qwen TTS request timed out. The server may be overloaded.")
|
|
except requests.exceptions.RequestException as e:
|
|
raise RuntimeError(f"Failed to generate audio with Qwen TTS: {str(e)}")
|