feat(TTS): refine OhFreeMe API response handling and update voice configuration

2 months ago · de093fd1d2
parent 4598819ec5
commit de093fd1d2
4 changed files with 27 additions and 54 deletions
--- a/.gitignore
+++ b/.gitignore
@ -248,6 +248,8 @@ config.toml
 *.exe
 .agents

+docs
+
 # Manual pipeline
 manual_posts/
 manual_results/
--- a/TTS/OhFreeMe.py
+++ b/TTS/OhFreeMe.py
@ -105,79 +105,45 @@ class OhFreeMe:
        }

        # streaming NDJSON response with debug logging
-        raw_response = b""
        for attempt in range(MAX_RETRIES):
            resp = requests.post(API_URL, json=payload, headers=headers, stream=True)
            # Rate‑limit handling – first line may contain error object
            try:
                first_line = next(resp.iter_lines())
-                # first_line is bytes; decode for JSON parsing
                parsed = json.loads(first_line.decode('utf-8'))
-                # debug: show parsed first line
                print_substep(f"[OhFreeMe debug] First line parsed: {parsed}", style="blue")
-                if parsed.get("status") == "error" and "Too many requests" in parsed.get("message", ""):
+                if parsed.get("status") == "error":
                    print_substep(
                        f"  Rate limited, waiting {RATE_LIMIT_WAIT}s... (attempt {attempt + 1}/{MAX_RETRIES})",
                        style="yellow",
                    )
                    time.sleep(RATE_LIMIT_WAIT)
                    continue
-                raw_response += first_line
            except (StopIteration, json.JSONDecodeError):
                pass

-            # iterate remaining chunks until done
+            # iterate remaining chunks until done, keeping only the final line for processing
            for line in resp.iter_lines():
                if not line:
                    continue
-                raw_response += line
-                # debug: print raw line (decoded) to terminal
-                try:
-                    decoded_line = line.decode('utf-8')
-                    print_substep(f"[OhFreeMe debug] Received line: {decoded_line}", style="blue")
-                except Exception:
-                    pass
-                # check for error object (e.g., server overload)
                try:
-                    obj = json.loads(line.decode('utf-8'))
-                    if obj.get('status') == 'error':
-                        raise RuntimeError(f"OhFreeMe API error: {obj.get('message', 'unknown')}")
+                    data = json.loads(line.decode('utf-8'))
                except json.JSONDecodeError:
-                    pass
-                if b'"status":"done"' in line:
-                    break
-            if b'"status":"done"' in raw_response:
-                # decode to str for _extract_audio
-                return self._extract_audio(raw_response.decode('utf-8'))
+                    continue
+                # debug: print raw line (decoded) to terminal
+                if data.get("status") == "done":
+                    print_substep(f"[OhFreeMe debug] Received line: {data}", style="blue")
+                    return self._extract_audio(data)

        raise RuntimeError(f"OhFreeMe TTS failed after {MAX_RETRIES} retries (rate limited)")

-    def _extract_audio(self, raw: str) -> bytes:
-        # API returns multiple JSON objects concatenated, e.g.:
-        # {"status":"audio_chunk","chunk":"..."}{"status":"done","url":"data:audio/mpeg;base64,..."}
-        decoder = json.JSONDecoder()
-        pos = 0
-        audio_b64 = None
-
-        while pos < len(raw):
-            # Skip whitespace
-            while pos < len(raw) and raw[pos].isspace():
-                pos += 1
-            if pos >= len(raw):
-                break
-
-            obj, end = decoder.raw_decode(raw, pos)
-            pos = end
-
-            if obj.get("status") == "done" and "url" in obj:
-                url = obj["url"]
-                # url format: "data:audio/mpeg;base64,<base64data>"
-                if url.startswith("data:") and ";base64," in url:
-                    b64_part = url.split(";base64,", 1)[1]
-                    audio_b64 = b64_part
-                break
-
-        if not audio_b64:
-            raise RuntimeError(f"Could not extract audio from API response")
-
-        return base64.b64decode(audio_b64)
+    def _extract_audio(self, data: dict) -> bytes:
+        # Expecting a dict with a "url" field containing a data URI
+        url = data.get("url")
+        if not url:
+            raise RuntimeError("Missing 'url' in API response data")
+        # url format: "data:audio/mpeg;base64,<base64data>"
+        if not (url.startswith("data:") and ";base64," in url):
+            raise RuntimeError(f"Unexpected URL format in API response: {url}")
+        b64_part = url.split(";base64,", 1)[1]
+        return base64.b64decode(b64_part)
--- a/config/ohfreeme_voices.json
+++ b/config/ohfreeme_voices.json
@ -1,7 +1,7 @@
 [
  {
-    "id": 573,
-    "name": "Lý Hải",
+    "id": 27,
+    "name": "Trần Sơn",
    "gender": "male",
    "lang": "vi"
  },
--- a/utils/background_audios.json
+++ b/utils/background_audios.json
@ -14,5 +14,10 @@
        "https://www.youtube.com/watch?v=4sFVeqvJu-0",
        "lofi-3.mp3",
        "Chill - Copyright Free Music"
+    ],
+    "lofi-4":[
+        "https://www.youtube.com/watch?v=BHIPSUlCaWk",
+        "lofi-4.mp3",
+        "Adrián Berenguer"
    ]
 }