RedditVideoMakerBot/TTS/Zall.py

import base64
import json
import os
import random
import time
from pathlib import Path

import requests
from dotenv import load_dotenv

from utils import settings
from utils.console import print_substep

# Load environment variables from .env file
load_dotenv()

ZALL_API_URL = os.getenv("ZALL_API_URL", "")
ZALL_BASE_URL = os.getenv("ZALL_BASE_URL", "")
ZALL_JWT_TOKEN = os.getenv("ZALL_JWT_TOKEN", "")
VOICES_FILE = Path(__file__).resolve().parent.parent / "config" / "zall_voices.json"
MAX_RETRIES = 3
RATE_LIMIT_WAIT = 20


def _load_voices() -> list[dict]:
    try:
        with open(VOICES_FILE, "r", encoding="utf-8") as f:
            return json.load(f)
    except (FileNotFoundError, json.JSONDecodeError) as e:
        print_substep(f"Warning: Could not load voices from {VOICES_FILE}: {e}", style="yellow")
        return []


class Zall:
    # Load list of User‑Agent strings for random header
    _user_agents = None

    def _load_user_agents(self) -> list[str]:
        """Read user_agents.json and cache result.
        Returns empty list on failure and logs warning.
        """
        if self._user_agents is not None:
            return self._user_agents
        try:
            agents_path = Path(__file__).resolve().parent.parent / "config" / "user_agents.json"
            with open(agents_path, "r", encoding="utf-8") as f:
                self._user_agents = json.load(f)
        except Exception as e:
            print_substep(f"Warning: Could not load user agents: {e}", style="yellow")
            self._user_agents = []
        return self._user_agents

    def _pick_user_agent(self) -> str:
        """Return a random User‑Agent string from loaded list.
        Falls back to generic UA on empty list.
        """
        agents = self._load_user_agents()
        if agents:
            return random.choice(agents)
        return "Mozilla/5.0"

    def __init__(self):
        self.max_chars = 2500
        self.voices = _load_voices()

    def run(self, text, filepath, random_voice: bool = False):
        voice = self._pick_voice(random_voice)
        audio_bytes = self._call_api(text, voice["id"])
        with open(filepath, "wb") as f:
            f.write(audio_bytes)

    def randomvoice(self) -> dict:
        lang = settings.config["settings"]["tts"].get("zall_lang", "vi")
        filtered = [v for v in self.voices if v["lang"] == lang]
        if not filtered:
            filtered = self.voices
        return random.choice(filtered)

    def _pick_voice(self, random_voice: bool) -> dict:
        if random_voice:
            return self.randomvoice()
        lang = settings.config["settings"]["tts"].get("zall_lang", "vi")
        gender = settings.config["settings"]["tts"].get("zall_gender", "random")
        candidates = [v for v in self.voices if v["lang"] == lang]
        if gender != "random":
            candidates = [v for v in candidates if v["gender"] == gender]
        if not candidates:
            candidates = self.voices
        return random.choice(candidates)

    def _call_api(self, text: str, voice_id: int) -> bytes:
        payload = {
            "segments": [
                {
                    "voiceId": voice_id,
                    "text": text
                }
            ],
            "useNaturalVoice": settings.config["settings"]["tts"].get("zall_natural_voice", False),
            "enableBrandKeywords": settings.config["settings"]["tts"].get("zall_enable_brand_keywords", False),
        }
        headers = {
            "cache-control": "no-cache",
            "content-type": "application/json",
            "accept": "*/*",
            "accept-language": "en-GB,en-US;q=0.9,en;q=0.8,vi;q=0.7", # important
            "sec-fetch-mode": "cors", # important
            "sec-fetch-site": "same-origin", # important
            "Cookie": f"auth_token={ZALL_JWT_TOKEN}", # important
            "user-agent": self._pick_user_agent(), # important
        }

        for attempt in range(MAX_RETRIES):
            resp = requests.post(ZALL_API_URL, json=payload, headers=headers, stream=True)
            audio_bytes = b""

            for line in resp.iter_lines():
                if not line:
                    continue

                try:
                    event = json.loads(line.decode("utf-8"))
                except json.JSONDecodeError:
                    continue

                status = event.get("status")
                if status == "audio_chunk":
                    audio_bytes += base64.b64decode(event["chunk"])
                elif status == "error":
                    print_substep(
                        f"  Rate limited, waiting {RATE_LIMIT_WAIT}s... (attempt {attempt + 1}/{MAX_RETRIES})",
                        style="yellow",
                    )
                    time.sleep(RATE_LIMIT_WAIT)
                    break
                elif status == "done":
                    if not audio_bytes:
                        raise RuntimeError("Zall TTS completed without audio chunks")
                    return audio_bytes
            else:
                raise RuntimeError("Zall TTS response ended before completion")

        raise RuntimeError(f"Zall TTS failed after {MAX_RETRIES} retries (rate limited)")