feat(tts): implement OhFreeMe TTS integration and add voice/user agent configurations

pull/2558/head
MinhVu2711 2 months ago
parent 967f86bcc3
commit 4598819ec5

@ -0,0 +1,183 @@
import base64
import json
import random
import time
from pathlib import Path
import requests
from utils import settings
from utils.console import print_substep
API_URL = "https://tts.ohfree.me/api/tts"
# JWT token for authentication (replace if needed)
VOICES_FILE = Path(__file__).resolve().parent.parent / "config" / "ohfreeme_voices.json"
MAX_RETRIES = 3
RATE_LIMIT_WAIT = 10
def _load_voices() -> list[dict]:
# existing function unchanged
# existing function unchanged
try:
with open(VOICES_FILE, "r", encoding="utf-8") as f:
return json.load(f)
except (FileNotFoundError, json.JSONDecodeError) as e:
print_substep(f"Warning: Could not load voices from {VOICES_FILE}: {e}", style="yellow")
return []
class OhFreeMe:
# Load list of UserAgent strings for random header
_user_agents = None
def _load_user_agents(self) -> list[str]:
"""Read user_agents.json and cache result.
Returns empty list on failure and logs warning.
"""
if self._user_agents is not None:
return self._user_agents
try:
agents_path = Path(__file__).resolve().parent.parent / "config" / "user_agents.json"
with open(agents_path, "r", encoding="utf-8") as f:
self._user_agents = json.load(f)
except Exception as e:
print_substep(f"Warning: Could not load user agents: {e}", style="yellow")
self._user_agents = []
return self._user_agents
def _pick_user_agent(self) -> str:
"""Return a random UserAgent string from loaded list.
Falls back to generic UA on empty list.
"""
agents = self._load_user_agents()
if agents:
return random.choice(agents)
return "Mozilla/5.0"
def __init__(self):
self.max_chars = 2500
self.voices = _load_voices()
def run(self, text, filepath, random_voice: bool = False):
voice = self._pick_voice(random_voice)
audio_bytes = self._call_api(text, voice["id"])
with open(filepath, "wb") as f:
f.write(audio_bytes)
def randomvoice(self) -> dict:
lang = settings.config["settings"]["tts"].get("ohfreeme_lang", "vi")
filtered = [v for v in self.voices if v["lang"] == lang]
if not filtered:
filtered = self.voices
return random.choice(filtered)
def _pick_voice(self, random_voice: bool) -> dict:
if random_voice:
return self.randomvoice()
lang = settings.config["settings"]["tts"].get("ohfreeme_lang", "vi")
gender = settings.config["settings"]["tts"].get("ohfreeme_gender", "random")
candidates = [v for v in self.voices if v["lang"] == lang]
if gender != "random":
candidates = [v for v in candidates if v["gender"] == gender]
if not candidates:
candidates = self.voices
return random.choice(candidates)
def _call_api(self, text: str, voice_id: int) -> bytes:
payload = {
"text": text,
"id": voice_id,
"useEnhance": settings.config["settings"]["tts"].get("ohfreeme_enhance", False),
"rate": settings.config["settings"]["tts"].get("ohfreeme_rate", 1),
"pitch": settings.config["settings"]["tts"].get("ohfreeme_pitch", 0),
}
headers = {
"accept": "*/*",
"cache-control": "no-cache",
"content-type": "application/json",
"origin": "https://tts.ohfree.me",
"cookie": f"auth_token={JWT_TOKEN}",
"referer": "https://tts.ohfree.me/",
"user-agent": self._pick_user_agent(),
}
# streaming NDJSON response with debug logging
raw_response = b""
for attempt in range(MAX_RETRIES):
resp = requests.post(API_URL, json=payload, headers=headers, stream=True)
# Ratelimit handling first line may contain error object
try:
first_line = next(resp.iter_lines())
# first_line is bytes; decode for JSON parsing
parsed = json.loads(first_line.decode('utf-8'))
# debug: show parsed first line
print_substep(f"[OhFreeMe debug] First line parsed: {parsed}", style="blue")
if parsed.get("status") == "error" and "Too many requests" in parsed.get("message", ""):
print_substep(
f" Rate limited, waiting {RATE_LIMIT_WAIT}s... (attempt {attempt + 1}/{MAX_RETRIES})",
style="yellow",
)
time.sleep(RATE_LIMIT_WAIT)
continue
raw_response += first_line
except (StopIteration, json.JSONDecodeError):
pass
# iterate remaining chunks until done
for line in resp.iter_lines():
if not line:
continue
raw_response += line
# debug: print raw line (decoded) to terminal
try:
decoded_line = line.decode('utf-8')
print_substep(f"[OhFreeMe debug] Received line: {decoded_line}", style="blue")
except Exception:
pass
# check for error object (e.g., server overload)
try:
obj = json.loads(line.decode('utf-8'))
if obj.get('status') == 'error':
raise RuntimeError(f"OhFreeMe API error: {obj.get('message', 'unknown')}")
except json.JSONDecodeError:
pass
if b'"status":"done"' in line:
break
if b'"status":"done"' in raw_response:
# decode to str for _extract_audio
return self._extract_audio(raw_response.decode('utf-8'))
raise RuntimeError(f"OhFreeMe TTS failed after {MAX_RETRIES} retries (rate limited)")
def _extract_audio(self, raw: str) -> bytes:
# API returns multiple JSON objects concatenated, e.g.:
# {"status":"audio_chunk","chunk":"..."}{"status":"done","url":"data:audio/mpeg;base64,..."}
decoder = json.JSONDecoder()
pos = 0
audio_b64 = None
while pos < len(raw):
# Skip whitespace
while pos < len(raw) and raw[pos].isspace():
pos += 1
if pos >= len(raw):
break
obj, end = decoder.raw_decode(raw, pos)
pos = end
if obj.get("status") == "done" and "url" in obj:
url = obj["url"]
# url format: "data:audio/mpeg;base64,<base64data>"
if url.startswith("data:") and ";base64," in url:
b64_part = url.split(";base64,", 1)[1]
audio_b64 = b64_part
break
if not audio_b64:
raise RuntimeError(f"Could not extract audio from API response")
return base64.b64decode(audio_b64)

@ -0,0 +1,74 @@
[
{
"id": 573,
"name": "Lý Hải",
"gender": "male",
"lang": "vi"
},
{
"id": 962,
"name": "Lê Quốc Khánh",
"gender": "male",
"lang": "vi"
},
{
"id": 1543,
"name": "Nguyễn Lam Anh",
"gender": "male",
"lang": "vi"
},
{
"id": 510,
"name": "Nguyễn Ngân",
"gender": "female",
"lang": "vi"
},
{
"id": 524,
"name": "Nguyễn Huyền Trang",
"gender": "female",
"lang": "vi"
},
{
"id": 1601,
"name": "Nguyễn Thu Huyền",
"gender": "female",
"lang": "vi"
},
{
"id": 713,
"name": "Harper Lee",
"gender": "female",
"lang": "en"
},
{
"id": 551,
"name": "Diana Prince",
"gender": "female",
"lang": "en"
},
{
"id": 942,
"name": "Sophie Blake",
"gender": "female",
"lang": "en"
},
{
"id": 597,
"name": "Tom Holland",
"gender": "male",
"lang": "en"
},
{
"id": 1371,
"name": "Jack Sparrow",
"gender": "male",
"lang": "en"
},
{
"id": 7,
"name": "Patrick O'Cornor",
"gender": "male",
"lang": "en"
}
]

@ -0,0 +1,7 @@
[
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36",
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.6 Safari/605.1.15",
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36",
"Mozilla/5.0 (iPhone; CPU iPhone OS 17_0 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.0 Mobile/15E148 Safari/604.1",
"Mozilla/5.0 (iPad; CPU OS 17_0 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.0 Mobile/15E148 Safari/604.1"
]

@ -9,6 +9,7 @@ Reuses TTS engines from TTS/ module — no code duplication.
"""
import re
import time
from pathlib import Path
from typing import Tuple
@ -121,6 +122,10 @@ class ManualTTSProcessor:
total_duration += duration
processed_count += 1
# Sleep 10s between TTS generation
print_substep(" 💤 Sleeping 10s...", style="dim")
time.sleep(10)
print_substep(
f" ✓ #{idx}{duration:.1f}s (TTS generated, {len(clean_text)} chars)",
style="green",
@ -149,6 +154,7 @@ class ManualTTSProcessor:
Reuses the TTS engines from video_creation/voices.py
"""
from TTS.GTTS import GTTS
from TTS.OhFreeMe import OhFreeMe
from TTS.TikTok import TikTok
from TTS.aws_polly import AWSPolly
from TTS.elevenlabs import elevenlabs
@ -158,6 +164,7 @@ class ManualTTSProcessor:
providers = {
"googletranslate": GTTS,
"ohfreeme": OhFreeMe,
"awspolly": AWSPolly,
"streamlabspolly": StreamlabsPolly,
"tiktok": TikTok,

@ -101,7 +101,7 @@ _BASE_SETTINGS_DEFAULTS = {
"background_thumbnail_font_color": "255,255,255",
},
"tts": {
"voice_choice": "googletranslate",
"voice_choice": "ohfreeme",
"random_voice": False,
"elevenlabs_voice_name": "Bella",
"elevenlabs_api_key": "",
@ -117,6 +117,11 @@ _BASE_SETTINGS_DEFAULTS = {
"openai_api_key": "",
"openai_voice_name": "alloy",
"openai_model": "tts-1",
"ohfreeme_lang": "vi",
"ohfreeme_gender": "random",
"ohfreeme_rate": 1,
"ohfreeme_pitch": 0,
"ohfreeme_enhance": False,
},
},
}
@ -375,6 +380,9 @@ def build_parser() -> argparse.ArgumentParser:
render_parser.add_argument(
"--force", action="store_true", help="Re-render even if already done"
)
render_parser.add_argument(
"--lang", type=str, default="vi", help="Override TTS language (e.g. vi, en)"
)
# list command
subparsers.add_parser("list", help="List all posts and their status")

Loading…
Cancel
Save