diff --git a/TTS/OhFreeMe.py b/TTS/OhFreeMe.py new file mode 100644 index 0000000..2ec225f --- /dev/null +++ b/TTS/OhFreeMe.py @@ -0,0 +1,183 @@ +import base64 +import json +import random +import time +from pathlib import Path + +import requests + +from utils import settings +from utils.console import print_substep + +API_URL = "https://tts.ohfree.me/api/tts" +# JWT token for authentication (replace if needed) +VOICES_FILE = Path(__file__).resolve().parent.parent / "config" / "ohfreeme_voices.json" +MAX_RETRIES = 3 +RATE_LIMIT_WAIT = 10 + + +def _load_voices() -> list[dict]: + # existing function unchanged + + # existing function unchanged + + try: + with open(VOICES_FILE, "r", encoding="utf-8") as f: + return json.load(f) + except (FileNotFoundError, json.JSONDecodeError) as e: + print_substep(f"Warning: Could not load voices from {VOICES_FILE}: {e}", style="yellow") + return [] + + +class OhFreeMe: + # Load list of User‑Agent strings for random header + _user_agents = None + + def _load_user_agents(self) -> list[str]: + """Read user_agents.json and cache result. + Returns empty list on failure and logs warning. + """ + if self._user_agents is not None: + return self._user_agents + try: + agents_path = Path(__file__).resolve().parent.parent / "config" / "user_agents.json" + with open(agents_path, "r", encoding="utf-8") as f: + self._user_agents = json.load(f) + except Exception as e: + print_substep(f"Warning: Could not load user agents: {e}", style="yellow") + self._user_agents = [] + return self._user_agents + + def _pick_user_agent(self) -> str: + """Return a random User‑Agent string from loaded list. + Falls back to generic UA on empty list. + """ + agents = self._load_user_agents() + if agents: + return random.choice(agents) + return "Mozilla/5.0" + + def __init__(self): + self.max_chars = 2500 + self.voices = _load_voices() + + def run(self, text, filepath, random_voice: bool = False): + voice = self._pick_voice(random_voice) + audio_bytes = self._call_api(text, voice["id"]) + with open(filepath, "wb") as f: + f.write(audio_bytes) + + def randomvoice(self) -> dict: + lang = settings.config["settings"]["tts"].get("ohfreeme_lang", "vi") + filtered = [v for v in self.voices if v["lang"] == lang] + if not filtered: + filtered = self.voices + return random.choice(filtered) + + def _pick_voice(self, random_voice: bool) -> dict: + if random_voice: + return self.randomvoice() + lang = settings.config["settings"]["tts"].get("ohfreeme_lang", "vi") + gender = settings.config["settings"]["tts"].get("ohfreeme_gender", "random") + candidates = [v for v in self.voices if v["lang"] == lang] + if gender != "random": + candidates = [v for v in candidates if v["gender"] == gender] + if not candidates: + candidates = self.voices + return random.choice(candidates) + + def _call_api(self, text: str, voice_id: int) -> bytes: + payload = { + "text": text, + "id": voice_id, + "useEnhance": settings.config["settings"]["tts"].get("ohfreeme_enhance", False), + "rate": settings.config["settings"]["tts"].get("ohfreeme_rate", 1), + "pitch": settings.config["settings"]["tts"].get("ohfreeme_pitch", 0), + } + headers = { + "accept": "*/*", + "cache-control": "no-cache", + "content-type": "application/json", + "origin": "https://tts.ohfree.me", + "cookie": f"auth_token={JWT_TOKEN}", + "referer": "https://tts.ohfree.me/", + "user-agent": self._pick_user_agent(), + } + + # streaming NDJSON response with debug logging + raw_response = b"" + for attempt in range(MAX_RETRIES): + resp = requests.post(API_URL, json=payload, headers=headers, stream=True) + # Rate‑limit handling – first line may contain error object + try: + first_line = next(resp.iter_lines()) + # first_line is bytes; decode for JSON parsing + parsed = json.loads(first_line.decode('utf-8')) + # debug: show parsed first line + print_substep(f"[OhFreeMe debug] First line parsed: {parsed}", style="blue") + if parsed.get("status") == "error" and "Too many requests" in parsed.get("message", ""): + print_substep( + f" Rate limited, waiting {RATE_LIMIT_WAIT}s... (attempt {attempt + 1}/{MAX_RETRIES})", + style="yellow", + ) + time.sleep(RATE_LIMIT_WAIT) + continue + raw_response += first_line + except (StopIteration, json.JSONDecodeError): + pass + + # iterate remaining chunks until done + for line in resp.iter_lines(): + if not line: + continue + raw_response += line + # debug: print raw line (decoded) to terminal + try: + decoded_line = line.decode('utf-8') + print_substep(f"[OhFreeMe debug] Received line: {decoded_line}", style="blue") + except Exception: + pass + # check for error object (e.g., server overload) + try: + obj = json.loads(line.decode('utf-8')) + if obj.get('status') == 'error': + raise RuntimeError(f"OhFreeMe API error: {obj.get('message', 'unknown')}") + except json.JSONDecodeError: + pass + if b'"status":"done"' in line: + break + if b'"status":"done"' in raw_response: + # decode to str for _extract_audio + return self._extract_audio(raw_response.decode('utf-8')) + + raise RuntimeError(f"OhFreeMe TTS failed after {MAX_RETRIES} retries (rate limited)") + + def _extract_audio(self, raw: str) -> bytes: + # API returns multiple JSON objects concatenated, e.g.: + # {"status":"audio_chunk","chunk":"..."}{"status":"done","url":"data:audio/mpeg;base64,..."} + decoder = json.JSONDecoder() + pos = 0 + audio_b64 = None + + while pos < len(raw): + # Skip whitespace + while pos < len(raw) and raw[pos].isspace(): + pos += 1 + if pos >= len(raw): + break + + obj, end = decoder.raw_decode(raw, pos) + pos = end + + if obj.get("status") == "done" and "url" in obj: + url = obj["url"] + # url format: "data:audio/mpeg;base64," + if url.startswith("data:") and ";base64," in url: + b64_part = url.split(";base64,", 1)[1] + audio_b64 = b64_part + break + + if not audio_b64: + raise RuntimeError(f"Could not extract audio from API response") + + return base64.b64decode(audio_b64) diff --git a/config/ohfreeme_voices.json b/config/ohfreeme_voices.json new file mode 100644 index 0000000..49d3af2 --- /dev/null +++ b/config/ohfreeme_voices.json @@ -0,0 +1,74 @@ +[ + { + "id": 573, + "name": "Lý Hải", + "gender": "male", + "lang": "vi" + }, + { + "id": 962, + "name": "Lê Quốc Khánh", + "gender": "male", + "lang": "vi" + }, + { + "id": 1543, + "name": "Nguyễn Lam Anh", + "gender": "male", + "lang": "vi" + }, + { + "id": 510, + "name": "Nguyễn Ngân", + "gender": "female", + "lang": "vi" + }, + { + "id": 524, + "name": "Nguyễn Huyền Trang", + "gender": "female", + "lang": "vi" + }, + { + "id": 1601, + "name": "Nguyễn Thu Huyền", + "gender": "female", + "lang": "vi" + }, + { + "id": 713, + "name": "Harper Lee", + "gender": "female", + "lang": "en" + }, + { + "id": 551, + "name": "Diana Prince", + "gender": "female", + "lang": "en" + }, + { + "id": 942, + "name": "Sophie Blake", + "gender": "female", + "lang": "en" + }, + { + "id": 597, + "name": "Tom Holland", + "gender": "male", + "lang": "en" + }, + { + "id": 1371, + "name": "Jack Sparrow", + "gender": "male", + "lang": "en" + }, + { + "id": 7, + "name": "Patrick O'Cornor", + "gender": "male", + "lang": "en" + } +] \ No newline at end of file diff --git a/config/user_agents.json b/config/user_agents.json new file mode 100644 index 0000000..f0b0ec0 --- /dev/null +++ b/config/user_agents.json @@ -0,0 +1,7 @@ +[ + "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36", + "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.6 Safari/605.1.15", + "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36", + "Mozilla/5.0 (iPhone; CPU iPhone OS 17_0 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.0 Mobile/15E148 Safari/604.1", + "Mozilla/5.0 (iPad; CPU OS 17_0 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.0 Mobile/15E148 Safari/604.1" +] \ No newline at end of file diff --git a/manual/tts_processor.py b/manual/tts_processor.py index 0ee0272..8d7281e 100644 --- a/manual/tts_processor.py +++ b/manual/tts_processor.py @@ -9,6 +9,7 @@ Reuses TTS engines from TTS/ module — no code duplication. """ import re +import time from pathlib import Path from typing import Tuple @@ -121,6 +122,10 @@ class ManualTTSProcessor: total_duration += duration processed_count += 1 + # Sleep 10s between TTS generation + print_substep(" 💤 Sleeping 10s...", style="dim") + time.sleep(10) + print_substep( f" ✓ #{idx} → {duration:.1f}s (TTS generated, {len(clean_text)} chars)", style="green", @@ -149,6 +154,7 @@ class ManualTTSProcessor: Reuses the TTS engines from video_creation/voices.py """ from TTS.GTTS import GTTS + from TTS.OhFreeMe import OhFreeMe from TTS.TikTok import TikTok from TTS.aws_polly import AWSPolly from TTS.elevenlabs import elevenlabs @@ -158,6 +164,7 @@ class ManualTTSProcessor: providers = { "googletranslate": GTTS, + "ohfreeme": OhFreeMe, "awspolly": AWSPolly, "streamlabspolly": StreamlabsPolly, "tiktok": TikTok, diff --git a/manual_main.py b/manual_main.py index 31c0023..4e351e1 100644 --- a/manual_main.py +++ b/manual_main.py @@ -101,7 +101,7 @@ _BASE_SETTINGS_DEFAULTS = { "background_thumbnail_font_color": "255,255,255", }, "tts": { - "voice_choice": "googletranslate", + "voice_choice": "ohfreeme", "random_voice": False, "elevenlabs_voice_name": "Bella", "elevenlabs_api_key": "", @@ -117,6 +117,11 @@ _BASE_SETTINGS_DEFAULTS = { "openai_api_key": "", "openai_voice_name": "alloy", "openai_model": "tts-1", + "ohfreeme_lang": "vi", + "ohfreeme_gender": "random", + "ohfreeme_rate": 1, + "ohfreeme_pitch": 0, + "ohfreeme_enhance": False, }, }, } @@ -375,6 +380,9 @@ def build_parser() -> argparse.ArgumentParser: render_parser.add_argument( "--force", action="store_true", help="Re-render even if already done" ) + render_parser.add_argument( + "--lang", type=str, default="vi", help="Override TTS language (e.g. vi, en)" + ) # list command subparsers.add_parser("list", help="List all posts and their status")