From 6e833be8a2f446ff4a8428a51286ba69aed9e49a Mon Sep 17 00:00:00 2001 From: MinhVu2711 Date: Tue, 2 Jun 2026 13:36:43 +0000 Subject: [PATCH] feat(TTS): implement Zall TTS integration with voice selection and error handling --- TTS/{OhFreeMe.py => Zall.py} | 91 ++++++++----------- ...{ohfreeme_voices.json => zall_voices.json} | 46 ++++------ manual/tts_processor.py | 4 +- manual_main.py | 13 +-- 4 files changed, 69 insertions(+), 85 deletions(-) rename TTS/{OhFreeMe.py => Zall.py} (58%) rename config/{ohfreeme_voices.json => zall_voices.json} (50%) diff --git a/TTS/OhFreeMe.py b/TTS/Zall.py similarity index 58% rename from TTS/OhFreeMe.py rename to TTS/Zall.py index 48a41bb..dc042fd 100644 --- a/TTS/OhFreeMe.py +++ b/TTS/Zall.py @@ -14,10 +14,10 @@ from utils.console import print_substep # Load environment variables from .env file load_dotenv() -OHFREEME_API_URL = os.getenv("OHFREEME_API_URL", "") -OHFREEME_BASE_URL = os.getenv("OHFREEME_BASE_URL", "") -OHFREEME_JWT_TOKEN = os.getenv("OHFREEME_JWT_TOKEN", "") -VOICES_FILE = Path(__file__).resolve().parent.parent / "config" / "ohfreeme_voices.json" +ZALL_API_URL = os.getenv("ZALL_API_URL", "") +ZALL_BASE_URL = os.getenv("ZALL_BASE_URL", "") +ZALL_JWT_TOKEN = os.getenv("ZALL_JWT_TOKEN", "") +VOICES_FILE = Path(__file__).resolve().parent.parent / "config" / "zall_voices.json" MAX_RETRIES = 3 RATE_LIMIT_WAIT = 20 @@ -31,7 +31,7 @@ def _load_voices() -> list[dict]: return [] -class OhFreeMe: +class Zall: # Load list of User‑Agent strings for random header _user_agents = None @@ -70,7 +70,7 @@ class OhFreeMe: f.write(audio_bytes) def randomvoice(self) -> dict: - lang = settings.config["settings"]["tts"].get("ohfreeme_lang", "vi") + lang = settings.config["settings"]["tts"].get("zall_lang", "vi") filtered = [v for v in self.voices if v["lang"] == lang] if not filtered: filtered = self.voices @@ -79,8 +79,8 @@ class OhFreeMe: def _pick_voice(self, random_voice: bool) -> dict: if random_voice: return self.randomvoice() - lang = settings.config["settings"]["tts"].get("ohfreeme_lang", "vi") - gender = settings.config["settings"]["tts"].get("ohfreeme_gender", "random") + lang = settings.config["settings"]["tts"].get("zall_lang", "vi") + gender = settings.config["settings"]["tts"].get("zall_gender", "random") candidates = [v for v in self.voices if v["lang"] == lang] if gender != "random": candidates = [v for v in candidates if v["gender"] == gender] @@ -90,11 +90,14 @@ class OhFreeMe: def _call_api(self, text: str, voice_id: int) -> bytes: payload = { - "text": text, - "id": voice_id, - "useEnhance": settings.config["settings"]["tts"].get("ohfreeme_enhance", False), - "rate": settings.config["settings"]["tts"].get("ohfreeme_rate", 1), - "pitch": settings.config["settings"]["tts"].get("ohfreeme_pitch", 0), + "segments": [ + { + "voiceId": voice_id, + "text": text + } + ], + "useNaturalVoice": settings.config["settings"]["tts"].get("zall_natural_voice", False), + "enableBrandKeywords": settings.config["settings"]["tts"].get("zall_enable_brand_keywords", False), } headers = { "cache-control": "no-cache", @@ -103,52 +106,38 @@ class OhFreeMe: "accept-language": "en-GB,en-US;q=0.9,en;q=0.8,vi;q=0.7", # important "sec-fetch-mode": "cors", # important "sec-fetch-site": "same-origin", # important - "Cookie": f"auth_token={OHFREEME_JWT_TOKEN}", # important + "Cookie": f"auth_token={ZALL_JWT_TOKEN}", # important "user-agent": self._pick_user_agent(), # important - # "origin": OHFREEME_BASE_URL, - # "referer": f"{OHFREEME_BASE_URL}/", } - # streaming NDJSON response with debug logging for attempt in range(MAX_RETRIES): - resp = requests.post(OHFREEME_API_URL, json=payload, headers=headers, stream=True) - # Rate‑limit handling – first line may contain error object - try: - first_line = next(resp.iter_lines()) - parsed = json.loads(first_line.decode('utf-8')) - print_substep(f"[OhFreeMe debug] First line parsed: {parsed}", style="blue") - if parsed.get("status") == "error": - print_substep( - f" Rate limited, waiting {RATE_LIMIT_WAIT}s... (attempt {attempt + 1}/{MAX_RETRIES})", - style="yellow", - ) - time.sleep(RATE_LIMIT_WAIT) - continue - except (StopIteration, json.JSONDecodeError): - pass + resp = requests.post(ZALL_API_URL, json=payload, headers=headers, stream=True) + audio_bytes = b"" - # iterate remaining chunks until done, keeping only the final line for processing for line in resp.iter_lines(): if not line: continue + try: - data = json.loads(line.decode('utf-8')) + event = json.loads(line.decode("utf-8")) except json.JSONDecodeError: continue - # debug: print raw line (decoded) to terminal - if data.get("status") == "done": - print_substep(f"[OhFreeMe debug] Received") - return self._extract_audio(data) - - raise RuntimeError(f"OhFreeMe TTS failed after {MAX_RETRIES} retries (rate limited)") - - def _extract_audio(self, data: dict) -> bytes: - # Expecting a dict with a "url" field containing a data URI - url = data.get("url") - if not url: - raise RuntimeError("Missing 'url' in API response data") - # url format: "data:audio/mpeg;base64," - if not (url.startswith("data:") and ";base64," in url): - raise RuntimeError(f"Unexpected URL format in API response: {url}") - b64_part = url.split(";base64,", 1)[1] - return base64.b64decode(b64_part) + + status = event.get("status") + if status == "audio_chunk": + audio_bytes += base64.b64decode(event["chunk"]) + elif status == "error": + print_substep( + f" Rate limited, waiting {RATE_LIMIT_WAIT}s... (attempt {attempt + 1}/{MAX_RETRIES})", + style="yellow", + ) + time.sleep(RATE_LIMIT_WAIT) + break + elif status == "done": + if not audio_bytes: + raise RuntimeError("Zall TTS completed without audio chunks") + return audio_bytes + else: + raise RuntimeError("Zall TTS response ended before completion") + + raise RuntimeError(f"Zall TTS failed after {MAX_RETRIES} retries (rate limited)") diff --git a/config/ohfreeme_voices.json b/config/zall_voices.json similarity index 50% rename from config/ohfreeme_voices.json rename to config/zall_voices.json index ef14753..083db8b 100644 --- a/config/ohfreeme_voices.json +++ b/config/zall_voices.json @@ -1,73 +1,67 @@ [ { - "id": 27, + "id": "GFFzHH1GgnlSoBXpmeYS", "name": "Trần Sơn", "gender": "male", "lang": "vi" }, { - "id": 962, - "name": "Lê Quốc Khánh", + "id": "emVXmpOD9cWPjuNIV1vb", + "name": "Tùng Duy", "gender": "male", "lang": "vi" }, { - "id": 1543, - "name": "Nguyễn Lam Anh", + "id": "u1GK69d224tVltnhZaD9", + "name": "Minh Quân", "gender": "male", "lang": "vi" }, { - "id": 510, - "name": "Nguyễn Ngân", + "id": "Rj3ur2PrLr3JvwYhGAxT", + "name": "Khánh Ly", "gender": "female", "lang": "vi" }, { - "id": 524, + "id": "gl5jjR8ul3WEOIkk7aOc", "name": "Nguyễn Huyền Trang", "gender": "female", "lang": "vi" }, { - "id": 1601, - "name": "Nguyễn Thu Huyền", - "gender": "female", - "lang": "vi" - }, - { - "id": 713, - "name": "Harper Lee", + "id": "pofi4Uk4l5pDRzr9wxvt", + "name": "Ivy Le", "gender": "female", "lang": "en" }, { - "id": 551, - "name": "Diana Prince", + "id": "zba1eCUoRMYf97gVI3Zd", + "name": "Cassie J", "gender": "female", "lang": "en" }, { - "id": 942, - "name": "Sophie Blake", + "id": "QbFQ0nxCenuuHtiASppf", + "name": "Hope", "gender": "female", "lang": "en" }, { - "id": 597, - "name": "Tom Holland", + "id": "P9HN0ybfh8Ny3A6jJH7v", + "name": "Archer Kingsley", "gender": "male", "lang": "en" }, { - "id": 1371, - "name": "Jack Sparrow", + "id": "xabWAiYfcCaEBoWjJv3d", + "name": "Webb", "gender": "male", "lang": "en" }, { - "id": 7, - "name": "Patrick O'Cornor", + "id": "Q0kRD2oNYVHJoswQ4IEs", + "name": "Patty Wells", "gender": "male", "lang": "en" } diff --git a/manual/tts_processor.py b/manual/tts_processor.py index 0b55d0c..7626af8 100644 --- a/manual/tts_processor.py +++ b/manual/tts_processor.py @@ -200,7 +200,7 @@ class ManualTTSProcessor: Reuses the TTS engines from video_creation/voices.py """ from TTS.GTTS import GTTS - from TTS.OhFreeMe import OhFreeMe + from TTS.Zall import Zall from TTS.Crikk import Crikk from TTS.TikTok import TikTok from TTS.aws_polly import AWSPolly @@ -211,7 +211,7 @@ class ManualTTSProcessor: providers = { "googletranslate": GTTS, - "ohfreeme": OhFreeMe, + "zall": Zall, "crikk": Crikk, "awspolly": AWSPolly, "streamlabspolly": StreamlabsPolly, diff --git a/manual_main.py b/manual_main.py index 3c70080..ca71f73 100644 --- a/manual_main.py +++ b/manual_main.py @@ -104,7 +104,7 @@ _BASE_SETTINGS_DEFAULTS = { "background_thumbnail_font_color": "255,255,255", }, "tts": { - "voice_choice": "ohfreeme", + "voice_choice": "zall", "random_voice": False, "elevenlabs_voice_name": "Bella", "elevenlabs_api_key": "", @@ -120,11 +120,12 @@ _BASE_SETTINGS_DEFAULTS = { "openai_api_key": "", "openai_voice_name": "alloy", "openai_model": "tts-1", - "ohfreeme_lang": "vi", - "ohfreeme_gender": "random", - "ohfreeme_rate": 1, - "ohfreeme_pitch": 0, - "ohfreeme_enhance": False, + "zall_lang": "vi", + "zall_gender": "random", + "zall_rate": 1, + "zall_pitch": 0, + "zall_natural_voice": True, + "zall_enable_brand_keywords": False, }, }, }