feat(TTS): implement Zall TTS integration with voice selection and error handling

pull/2558/head
MinhVu2711 3 weeks ago
parent 6a69319ced
commit 6e833be8a2

@ -14,10 +14,10 @@ from utils.console import print_substep
# Load environment variables from .env file
load_dotenv()
OHFREEME_API_URL = os.getenv("OHFREEME_API_URL", "")
OHFREEME_BASE_URL = os.getenv("OHFREEME_BASE_URL", "")
OHFREEME_JWT_TOKEN = os.getenv("OHFREEME_JWT_TOKEN", "")
VOICES_FILE = Path(__file__).resolve().parent.parent / "config" / "ohfreeme_voices.json"
ZALL_API_URL = os.getenv("ZALL_API_URL", "")
ZALL_BASE_URL = os.getenv("ZALL_BASE_URL", "")
ZALL_JWT_TOKEN = os.getenv("ZALL_JWT_TOKEN", "")
VOICES_FILE = Path(__file__).resolve().parent.parent / "config" / "zall_voices.json"
MAX_RETRIES = 3
RATE_LIMIT_WAIT = 20
@ -31,7 +31,7 @@ def _load_voices() -> list[dict]:
return []
class OhFreeMe:
class Zall:
# Load list of UserAgent strings for random header
_user_agents = None
@ -70,7 +70,7 @@ class OhFreeMe:
f.write(audio_bytes)
def randomvoice(self) -> dict:
lang = settings.config["settings"]["tts"].get("ohfreeme_lang", "vi")
lang = settings.config["settings"]["tts"].get("zall_lang", "vi")
filtered = [v for v in self.voices if v["lang"] == lang]
if not filtered:
filtered = self.voices
@ -79,8 +79,8 @@ class OhFreeMe:
def _pick_voice(self, random_voice: bool) -> dict:
if random_voice:
return self.randomvoice()
lang = settings.config["settings"]["tts"].get("ohfreeme_lang", "vi")
gender = settings.config["settings"]["tts"].get("ohfreeme_gender", "random")
lang = settings.config["settings"]["tts"].get("zall_lang", "vi")
gender = settings.config["settings"]["tts"].get("zall_gender", "random")
candidates = [v for v in self.voices if v["lang"] == lang]
if gender != "random":
candidates = [v for v in candidates if v["gender"] == gender]
@ -90,11 +90,14 @@ class OhFreeMe:
def _call_api(self, text: str, voice_id: int) -> bytes:
payload = {
"text": text,
"id": voice_id,
"useEnhance": settings.config["settings"]["tts"].get("ohfreeme_enhance", False),
"rate": settings.config["settings"]["tts"].get("ohfreeme_rate", 1),
"pitch": settings.config["settings"]["tts"].get("ohfreeme_pitch", 0),
"segments": [
{
"voiceId": voice_id,
"text": text
}
],
"useNaturalVoice": settings.config["settings"]["tts"].get("zall_natural_voice", False),
"enableBrandKeywords": settings.config["settings"]["tts"].get("zall_enable_brand_keywords", False),
}
headers = {
"cache-control": "no-cache",
@ -103,52 +106,38 @@ class OhFreeMe:
"accept-language": "en-GB,en-US;q=0.9,en;q=0.8,vi;q=0.7", # important
"sec-fetch-mode": "cors", # important
"sec-fetch-site": "same-origin", # important
"Cookie": f"auth_token={OHFREEME_JWT_TOKEN}", # important
"Cookie": f"auth_token={ZALL_JWT_TOKEN}", # important
"user-agent": self._pick_user_agent(), # important
# "origin": OHFREEME_BASE_URL,
# "referer": f"{OHFREEME_BASE_URL}/",
}
# streaming NDJSON response with debug logging
for attempt in range(MAX_RETRIES):
resp = requests.post(OHFREEME_API_URL, json=payload, headers=headers, stream=True)
# Ratelimit handling first line may contain error object
try:
first_line = next(resp.iter_lines())
parsed = json.loads(first_line.decode('utf-8'))
print_substep(f"[OhFreeMe debug] First line parsed: {parsed}", style="blue")
if parsed.get("status") == "error":
print_substep(
f" Rate limited, waiting {RATE_LIMIT_WAIT}s... (attempt {attempt + 1}/{MAX_RETRIES})",
style="yellow",
)
time.sleep(RATE_LIMIT_WAIT)
continue
except (StopIteration, json.JSONDecodeError):
pass
resp = requests.post(ZALL_API_URL, json=payload, headers=headers, stream=True)
audio_bytes = b""
# iterate remaining chunks until done, keeping only the final line for processing
for line in resp.iter_lines():
if not line:
continue
try:
data = json.loads(line.decode('utf-8'))
event = json.loads(line.decode("utf-8"))
except json.JSONDecodeError:
continue
# debug: print raw line (decoded) to terminal
if data.get("status") == "done":
print_substep(f"[OhFreeMe debug] Received")
return self._extract_audio(data)
raise RuntimeError(f"OhFreeMe TTS failed after {MAX_RETRIES} retries (rate limited)")
def _extract_audio(self, data: dict) -> bytes:
# Expecting a dict with a "url" field containing a data URI
url = data.get("url")
if not url:
raise RuntimeError("Missing 'url' in API response data")
# url format: "data:audio/mpeg;base64,<base64data>"
if not (url.startswith("data:") and ";base64," in url):
raise RuntimeError(f"Unexpected URL format in API response: {url}")
b64_part = url.split(";base64,", 1)[1]
return base64.b64decode(b64_part)
status = event.get("status")
if status == "audio_chunk":
audio_bytes += base64.b64decode(event["chunk"])
elif status == "error":
print_substep(
f" Rate limited, waiting {RATE_LIMIT_WAIT}s... (attempt {attempt + 1}/{MAX_RETRIES})",
style="yellow",
)
time.sleep(RATE_LIMIT_WAIT)
break
elif status == "done":
if not audio_bytes:
raise RuntimeError("Zall TTS completed without audio chunks")
return audio_bytes
else:
raise RuntimeError("Zall TTS response ended before completion")
raise RuntimeError(f"Zall TTS failed after {MAX_RETRIES} retries (rate limited)")

@ -1,73 +1,67 @@
[
{
"id": 27,
"id": "GFFzHH1GgnlSoBXpmeYS",
"name": "Trần Sơn",
"gender": "male",
"lang": "vi"
},
{
"id": 962,
"name": "Lê Quốc Khánh",
"id": "emVXmpOD9cWPjuNIV1vb",
"name": "Tùng Duy",
"gender": "male",
"lang": "vi"
},
{
"id": 1543,
"name": "Nguyễn Lam Anh",
"id": "u1GK69d224tVltnhZaD9",
"name": "Minh Quân",
"gender": "male",
"lang": "vi"
},
{
"id": 510,
"name": "Nguyễn Ngân",
"id": "Rj3ur2PrLr3JvwYhGAxT",
"name": "Khánh Ly",
"gender": "female",
"lang": "vi"
},
{
"id": 524,
"id": "gl5jjR8ul3WEOIkk7aOc",
"name": "Nguyễn Huyền Trang",
"gender": "female",
"lang": "vi"
},
{
"id": 1601,
"name": "Nguyễn Thu Huyền",
"gender": "female",
"lang": "vi"
},
{
"id": 713,
"name": "Harper Lee",
"id": "pofi4Uk4l5pDRzr9wxvt",
"name": "Ivy Le",
"gender": "female",
"lang": "en"
},
{
"id": 551,
"name": "Diana Prince",
"id": "zba1eCUoRMYf97gVI3Zd",
"name": "Cassie J",
"gender": "female",
"lang": "en"
},
{
"id": 942,
"name": "Sophie Blake",
"id": "QbFQ0nxCenuuHtiASppf",
"name": "Hope",
"gender": "female",
"lang": "en"
},
{
"id": 597,
"name": "Tom Holland",
"id": "P9HN0ybfh8Ny3A6jJH7v",
"name": "Archer Kingsley",
"gender": "male",
"lang": "en"
},
{
"id": 1371,
"name": "Jack Sparrow",
"id": "xabWAiYfcCaEBoWjJv3d",
"name": "Webb",
"gender": "male",
"lang": "en"
},
{
"id": 7,
"name": "Patrick O'Cornor",
"id": "Q0kRD2oNYVHJoswQ4IEs",
"name": "Patty Wells",
"gender": "male",
"lang": "en"
}

@ -200,7 +200,7 @@ class ManualTTSProcessor:
Reuses the TTS engines from video_creation/voices.py
"""
from TTS.GTTS import GTTS
from TTS.OhFreeMe import OhFreeMe
from TTS.Zall import Zall
from TTS.Crikk import Crikk
from TTS.TikTok import TikTok
from TTS.aws_polly import AWSPolly
@ -211,7 +211,7 @@ class ManualTTSProcessor:
providers = {
"googletranslate": GTTS,
"ohfreeme": OhFreeMe,
"zall": Zall,
"crikk": Crikk,
"awspolly": AWSPolly,
"streamlabspolly": StreamlabsPolly,

@ -104,7 +104,7 @@ _BASE_SETTINGS_DEFAULTS = {
"background_thumbnail_font_color": "255,255,255",
},
"tts": {
"voice_choice": "ohfreeme",
"voice_choice": "zall",
"random_voice": False,
"elevenlabs_voice_name": "Bella",
"elevenlabs_api_key": "",
@ -120,11 +120,12 @@ _BASE_SETTINGS_DEFAULTS = {
"openai_api_key": "",
"openai_voice_name": "alloy",
"openai_model": "tts-1",
"ohfreeme_lang": "vi",
"ohfreeme_gender": "random",
"ohfreeme_rate": 1,
"ohfreeme_pitch": 0,
"ohfreeme_enhance": False,
"zall_lang": "vi",
"zall_gender": "random",
"zall_rate": 1,
"zall_pitch": 0,
"zall_natural_voice": True,
"zall_enable_brand_keywords": False,
},
},
}

Loading…
Cancel
Save