You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
RedditVideoMakerBot/TTS/Zall.py

144 lines
5.1 KiB

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

import base64
import json
import os
import random
import time
from pathlib import Path
import requests
from dotenv import load_dotenv
from utils import settings
from utils.console import print_substep
# Load environment variables from .env file
load_dotenv()
ZALL_API_URL = os.getenv("ZALL_API_URL", "")
ZALL_BASE_URL = os.getenv("ZALL_BASE_URL", "")
ZALL_JWT_TOKEN = os.getenv("ZALL_JWT_TOKEN", "")
VOICES_FILE = Path(__file__).resolve().parent.parent / "config" / "zall_voices.json"
MAX_RETRIES = 3
RATE_LIMIT_WAIT = 20
def _load_voices() -> list[dict]:
try:
with open(VOICES_FILE, "r", encoding="utf-8") as f:
return json.load(f)
except (FileNotFoundError, json.JSONDecodeError) as e:
print_substep(f"Warning: Could not load voices from {VOICES_FILE}: {e}", style="yellow")
return []
class Zall:
# Load list of UserAgent strings for random header
_user_agents = None
def _load_user_agents(self) -> list[str]:
"""Read user_agents.json and cache result.
Returns empty list on failure and logs warning.
"""
if self._user_agents is not None:
return self._user_agents
try:
agents_path = Path(__file__).resolve().parent.parent / "config" / "user_agents.json"
with open(agents_path, "r", encoding="utf-8") as f:
self._user_agents = json.load(f)
except Exception as e:
print_substep(f"Warning: Could not load user agents: {e}", style="yellow")
self._user_agents = []
return self._user_agents
def _pick_user_agent(self) -> str:
"""Return a random UserAgent string from loaded list.
Falls back to generic UA on empty list.
"""
agents = self._load_user_agents()
if agents:
return random.choice(agents)
return "Mozilla/5.0"
def __init__(self):
self.max_chars = 2500
self.voices = _load_voices()
def run(self, text, filepath, random_voice: bool = False):
voice = self._pick_voice(random_voice)
audio_bytes = self._call_api(text, voice["id"])
with open(filepath, "wb") as f:
f.write(audio_bytes)
def randomvoice(self) -> dict:
lang = settings.config["settings"]["tts"].get("zall_lang", "vi")
filtered = [v for v in self.voices if v["lang"] == lang]
if not filtered:
filtered = self.voices
return random.choice(filtered)
def _pick_voice(self, random_voice: bool) -> dict:
if random_voice:
return self.randomvoice()
lang = settings.config["settings"]["tts"].get("zall_lang", "vi")
gender = settings.config["settings"]["tts"].get("zall_gender", "random")
candidates = [v for v in self.voices if v["lang"] == lang]
if gender != "random":
candidates = [v for v in candidates if v["gender"] == gender]
if not candidates:
candidates = self.voices
return random.choice(candidates)
def _call_api(self, text: str, voice_id: int) -> bytes:
payload = {
"segments": [
{
"voiceId": voice_id,
"text": text
}
],
"useNaturalVoice": settings.config["settings"]["tts"].get("zall_natural_voice", False),
"enableBrandKeywords": settings.config["settings"]["tts"].get("zall_enable_brand_keywords", False),
}
headers = {
"cache-control": "no-cache",
"content-type": "application/json",
"accept": "*/*",
"accept-language": "en-GB,en-US;q=0.9,en;q=0.8,vi;q=0.7", # important
"sec-fetch-mode": "cors", # important
"sec-fetch-site": "same-origin", # important
"Cookie": f"auth_token={ZALL_JWT_TOKEN}", # important
"user-agent": self._pick_user_agent(), # important
}
for attempt in range(MAX_RETRIES):
resp = requests.post(ZALL_API_URL, json=payload, headers=headers, stream=True)
audio_bytes = b""
for line in resp.iter_lines():
if not line:
continue
try:
event = json.loads(line.decode("utf-8"))
except json.JSONDecodeError:
continue
status = event.get("status")
if status == "audio_chunk":
audio_bytes += base64.b64decode(event["chunk"])
elif status == "error":
print_substep(
f" Rate limited, waiting {RATE_LIMIT_WAIT}s... (attempt {attempt + 1}/{MAX_RETRIES})",
style="yellow",
)
time.sleep(RATE_LIMIT_WAIT)
break
elif status == "done":
if not audio_bytes:
raise RuntimeError("Zall TTS completed without audio chunks")
return audio_bytes
else:
raise RuntimeError("Zall TTS response ended before completion")
raise RuntimeError(f"Zall TTS failed after {MAX_RETRIES} retries (rate limited)")