From 1186efbe4d15df83c16b57701d3a8ecc2002d868 Mon Sep 17 00:00:00 2001
From: Hong Phuc <hongphuc.dthp@gmail.com>
Date: Mon, 25 May 2026 16:20:49 +0700
Subject: [PATCH] chore: cleanup code

---
 .gitignore                               |   1 +
 .gitmodules                              |   3 -
 CLAUDE.md                                | 129 +++++++++++------------
 TTS/supertonic_tts.py                    |  49 +++++++--
 platforms/threads/scraper.py             |  15 +++
 utils/.config.template.toml              |   1 +
 vendor/FullyAutomatedRedditVideoMakerBot |   1 -
 7 files changed, 121 insertions(+), 78 deletions(-)
 delete mode 100644 .gitmodules
 delete mode 160000 vendor/FullyAutomatedRedditVideoMakerBot
diff --git a/.gitignore b/.gitignore
index fa8e08c..675fe1b 100644
--- a/.gitignore
+++ b/.gitignore
@@ -274,3 +274,4 @@ pipeline-ui-*.png
 .agents
 .config
 .local
+.gstack/
diff --git a/.gitmodules b/.gitmodules
deleted file mode 100644
index c5bf4d4..0000000
--- a/.gitmodules
+++ /dev/null
@@ -1,3 +0,0 @@
-[submodule "vendor/FullyAutomatedRedditVideoMakerBot"]
-	path = vendor/FullyAutomatedRedditVideoMakerBot
-	url = https://github.com/raga70/FullyAutomatedRedditVideoMakerBot.git
diff --git a/CLAUDE.md b/CLAUDE.md
index 4baa332..c968927 100644
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -4,19 +4,19 @@
 
 **VideoMakerBot** — Automated short-form video creator from social media content.
 
-**Status:** Production-ready, actively maintained (v3.4.0)
-**Language:** Python 3.14+ (host + Docker image)
-**Runtime:** **Docker only** — all CLI, GUI, and test invocations go through `docker compose`. Do not invoke `python` on the host.
+**Status:** Production (v3.4.0)
+**Language:** Python 3.14+ (host + Docker)
+**Runtime:** **Docker only** — CLI, GUI, test go through `docker compose`. Never `python` on host.
 **Platforms:** Reddit (PRAW API), Threads (Graph API + Web Scraping)
 
 ### Core Mission
-Transforms social media threads (post + comments/replies) into complete short-form videos with:
+Transforms social media threads (post + comments/replies) into short-form videos:
 - AI-generated speech (7+ TTS providers)
-- UI screenshots (Playwright, headless Chromium pre-installed in image)
+- UI screenshots (Playwright, headless Chromium in image)
 - Background video/audio overlays
-- FFmpeg composition & output (Linux ffmpeg with full filter set, including `drawtext`)
+- FFmpeg composition & output (Linux ffmpeg, full filter set + `drawtext`)
 - Optional YouTube upload
-- Modern web UI (Tailwind CSS + DaisyUI + Lucide + vanilla ES6) on `localhost:4000`
+- Web UI (Tailwind CSS + DaisyUI + Lucide + vanilla ES6) on `localhost:4000`
 
 ---
 
@@ -195,7 +195,7 @@ client_secret_path = ""        # Path to youtube_client_secret.json
 ### Threads — Web Scraping (discovery_method = "scrape")
 
 **DOM Structure:**
-- Threads.net uses **div-based card layout** — NO `<article>` elements anywhere
+- Threads.net uses **div-based card layout** — NO `<article>` elements
 - Feed posts: `a[href*="/post/"]` links inside `<div>` cards (class contains `x1a2a7pz`)
 - Post pages: same structure; main post link appears first, replies follow
 - Screenshots: Use `a[href*="/post/"]` → ancestor div card, NOT `page.locator("article")`
@@ -212,7 +212,7 @@ Last 1-4: engagement metrics (likes, replies, reposts, quotes)
 - Numbers can be plain ("266") or abbreviated ("1K", "2.5M")
 - `likes` = first trailing number, `replies` = second, `reposts` = third
 - `min_engagement` filters by `likes + reposts` total
-- Posts are sorted by engagement descending before selection
+- Posts sorted by engagement descending before selection
 
 **Login Flow:**
 - Threads uses Instagram auth (`threads.net/login`)
@@ -220,11 +220,11 @@ Last 1-4: engagement metrics (likes, replies, reposts, quotes)
 - Button: `get_by_role("button", name="Log in", exact=True).first`
 - After click: `page.wait_for_url("https://www.threads.net/", timeout=15000)` — event-wait, not fixed delay
 - Cookies cached at `video_creation/data/cookie-threads.json`
-- Login logic is shared via `platforms/threads/auth.py`
+- Login logic shared via `platforms/threads/auth.py`
 
 **API Limitation:**
 - Graph API v1.0 only accesses YOUR OWN posts — no trending/discovery
-- Scraping bypasses this entirely — no API token needed
+- Scraping bypasses this — no API token needed
 
 ### Threads — Graph API (discovery_method = "api")
 
@@ -252,48 +252,47 @@ Last 1-4: engagement metrics (likes, replies, reposts, quotes)
 5. **Default to `googletranslate` TTS** for headless containers — no API key, fast, free
 6. **Use `libx264` encoder** — `h264_nvenc` is NVIDIA-only and not available in the slim image
 7. **Test both Threads discovery methods:** `api` and `scrape`
-8. **Bind-mount preserves state** — edits to `config.toml`, `results/`, `assets/temp/`, `video_creation/data/`, and the `utils/background_*.json` catalogs persist across container runs
-9. **GUI must bind to `0.0.0.0`** in Docker (already enforced via `GUI_HOST=0.0.0.0` env)
-10. **Use `/video/<id>` to serve renders** — the route looks up the file by id in `videos.json`, sanitizes the `Content-Disposition` filename, and avoids 404s caused by literal newlines in titles
+8. **Bind-mount preserves state** — edits to `config.toml`, `results/`, `assets/temp/`, `video_creation/data/`, and `utils/background_*.json` catalogs persist across container runs
+9. **GUI must bind to `0.0.0.0`** in Docker (enforced via `GUI_HOST=0.0.0.0` env)
+10. **Use `/video/<id>` to serve renders** — the route looks up the file by id in `videos.json`, sanitizes `Content-Disposition` filename, avoids 404s from literal newlines in titles
 
 ### ❌ DON'T:
 
 1. **Don't run `python GUI.py` or `python main.py` on the host** — Docker is the only supported path
-2. **Don't use `<article>` selectors** on Threads.net — the DOM is div-based
+2. **Don't use `<article>` selectors** on Threads.net — DOM is div-based
 3. **Don't hardcode `h264_nvenc`** — use `libx264` for cross-platform compatibility
 4. **Don't import platform modules directly** in main.py/utils
 5. **Don't assume config keys exist** without `.get()` fallback
-6. **Don't reintroduce jQuery, Bootstrap, or ClipboardJS** — the UI is vanilla ES6 + Tailwind + DaisyUI + Lucide
-7. **Don't write to `utils/backgrounds.json`** — it is a legacy empty file. Use `utils/background_videos.json` and `utils/background_audios.json`
+6. **Don't reintroduce jQuery, Bootstrap, or ClipboardJS** — UI is vanilla ES6 + Tailwind + DaisyUI + Lucide
+7. **Don't write to `utils/backgrounds.json`** — legacy empty file. Use `utils/background_videos.json` and `utils/background_audios.json`
 
 ### 🔒 Security (hardened May 2026)
 
-1. **No `eval()`** — use `{"int": int, "float": float, "bool": bool, "str": str}` dict dispatch for type coercion. `utils/settings.py` has module-level `_TYPE_COERCION`.
-2. **No `os.system()`** — use `subprocess.run([...])` with argument lists. No shell interpretation of paths.
+1. **No `eval()`** — use `{"int": int, "float": float, "bool": bool, "str": str}` dict dispatch. `utils/settings.py` has module-level `_TYPE_COERCION`.
+2. **No `os.system()`** — use `subprocess.run([...])` with argument lists. No shell interpretation.
 3. **No `shell=True`** — removed from all `subprocess.run()` and `Popen()` calls.
-4. **No bare `except:`** — always catch specific exception types. Bare excepts swallow `KeyboardInterrupt` and `SystemExit`.
-5. **Redact secrets before printing** — `main.py` error handler deep-copies config and masks all credential fields before logging.
-6. **Settings page secrets** — `GUI.py` redacts API keys/passwords from the data dict passed to `settings.html`. Sensitive fields show as `********`.
-7. **CSRF protection** — `GUI.py` has `@app.before_request` that checks `Origin` header on all mutating requests.
-8. **Security headers** — `X-Content-Type-Options: nosniff`, `X-Frame-Options: DENY` on every response.
-9. **Flask secret key** — loaded from `FLASK_SECRET_KEY` env var, falls back to `os.urandom(32)` per startup.
-10. **Docker non-root** — container runs as `appuser`, not root.
-11. **Path traversal** — `/video/<id>` uses `Path.resolve().relative_to()` guard; `add_background()` sanitizes citation with `re.sub(r"[./\\\\]", "_", citation)`.
-12. **No hardcoded credentials** in source — all secrets loaded from `config.toml` (gitignored). Rotate passwords regularly.
+4. **No bare `except:`** — catch specific exception types. Bare excepts swallow `KeyboardInterrupt` and `SystemExit`.
+5. **Redact secrets before printing** — `main.py` error handler deep-copies config and masks all credential fields. `GUI.py` redacts API keys/passwords from settings page data. Sensitive fields show as `********`.
+6. **CSRF protection** — `GUI.py` `@app.before_request` checks `Origin` header on all mutating requests.
+7. **Security headers** — `X-Content-Type-Options: nosniff`, `X-Frame-Options: DENY` on every response.
+8. **Flask secret key** — `FLASK_SECRET_KEY` env var, fallback `os.urandom(32)` per startup.
+9. **Docker non-root** — container runs as `appuser`, not root.
+10. **Path traversal** — `/video/<id>` uses `Path.resolve().relative_to()` guard; `add_background()` sanitizes citation with `re.sub(r"[./\\\\]", "_", citation)`.
+11. **No hardcoded credentials** in source — all secrets from `config.toml` (gitignored). Rotate passwords regularly.
 
 ---
 
 ## Web UI (Flask, served by `gui` service)
 
-- **Stack:** Tailwind CSS, DaisyUI, Lucide Icons, vanilla ES6 (no jQuery, no Bootstrap, no ClipboardJS)
+- **Stack:** Tailwind CSS, DaisyUI, Lucide Icons, vanilla ES6 (no jQuery, Bootstrap, ClipboardJS)
 - **Routes:**
-  - `/` — Video Library; cards show source-post link, download, and copy-link buttons
-  - `/video/<id>` — serves the rendered mp4 by id (lookup via `videos.json`); guards path-traversal and sanitizes the filename for `Content-Disposition`
+  - `/` — Video Library; cards show source-post link, download, copy-link buttons
+  - `/video/<id>` — serves rendered mp4 by id (lookup via `videos.json`); path-traversal guard, sanitized `Content-Disposition`
   - `/backgrounds` — Background Manager UI
-  - `/backgrounds.json` — serves `utils/background_videos.json` (the videos catalog)
-  - `/background/add`, `/background/delete` — POST endpoints; mutate **both** `utils/background_videos.json` and the `settings.background.background_video.options` array in `utils/.config.template.toml`
+  - `/backgrounds.json` — serves `utils/background_videos.json` (videos catalog)
+  - `/background/add`, `/background/delete` — POST; mutate **both** `utils/background_videos.json` and `settings.background.background_video.options` in `utils/.config.template.toml`
   - `/settings` — config editor; loads from `config.toml`, validates against `utils/.config.template.toml`, persists via `utils/gui_utils.modify_settings` (preserves comments/formatting via `tomlkit`)
-- **HTML escaping:** the `h()` helper in `index.html` escapes `& " < >` for any user-controlled string embedded in attributes — use it for any new dynamic data on the Library page
+- **HTML escaping:** `h()` helper in `index.html` escapes `& " < >` for user-controlled strings in attributes
 
 ---
 
@@ -303,19 +302,19 @@ Last 1-4: engagement metrics (likes, replies, reposts, quotes)
 |------|---------|
 | `main.py` | CLI entry; pipeline orchestration via factory |
 | `platforms/__init__.py` | Factory dispatch (platform + discovery_method) |
-| `platforms/threads/scraper.py` | **NEW** — Web scraping fetcher with engagement parsing |
-| `platforms/threads/auth.py` | **NEW** — Shared Playwright login + cookie management |
+| `platforms/threads/scraper.py` | Web scraping fetcher with engagement parsing |
+| `platforms/threads/auth.py` | Shared Playwright login + cookie management |
 | `platforms/threads/fetcher.py` | Graph API client (own posts only) |
 | `platforms/threads/screenshot.py` | Div-based Threads screenshotter |
-| `video_creation/final_video.py` | FFmpeg composition (libx264, platform-aware output); exports `get_output_path()` for shared path computation |
+| `video_creation/final_video.py` | FFmpeg composition (libx264, platform-aware output); exports `get_output_path()` |
 | `video_creation/background.py` | Background downloader (local files + yt-dlp); prefers already-downloaded videos |
 | `video_creation/youtube_uploader.py` | OAuth2 YouTube upload |
 | `TTS/engine_wrapper.py` | TTS provider abstraction + TikTok→pyttsx3 fallback; single-pass ffmpeg concat |
 | `TTS/TikTok.py` | Hardened TikTok TTS with graceful error handling |
-| `reddit/subreddit.py` | PRAW Reddit fetcher with auto-2FA; retry-depth limit (50) on submission search |
+| `reddit/subreddit.py` | PRAW Reddit fetcher with auto-2FA; retry-depth limit (50) |
 | `utils/settings.py` | Config loading + interactive validation; uses `_TYPE_COERCION` dict (no eval) |
 | `utils/videos.py` | Video dedup tracking (`check_done`, `check_done_by_id`, `save_data` with truncate) |
-| `utils/.config.template.toml` | Config schema (also drives Settings page validation) |
+| `utils/.config.template.toml` | Config schema (drives Settings page validation) |
 | `utils/background_videos.json` | Background video manifest (served at `/backgrounds.json`) |
 | `utils/background_audios.json` | Background audio manifest |
 | `utils/gui_utils.py` | `add_background`, `delete_background`, `modify_settings`, `get_checks` (no eval) |
@@ -329,52 +328,52 @@ Last 1-4: engagement metrics (likes, replies, reposts, quotes)
 ## Debugging Tips
 
 ### FFmpeg "Unknown encoder 'h264_nvenc'"
-→ Use `libx264`. Find-and-replace `h264_nvenc` → `libx264` in `video_creation/final_video.py`. The slim image does not ship with NVIDIA encoders.
+→ Use `libx264`. Find-and-replace `h264_nvenc` → `libx264` in `video_creation/final_video.py`. Slim image doesn't ship NVIDIA encoders.
 
 ### yt-dlp "Requested format is not available"
-→ Bump the pinned version in `requirements.txt` and rebuild (`docker compose build`). Also prefer `best[height<=1080]` over `bestvideo` in `video_creation/background.py` — many videos lack video-only streams.
+→ Bump pinned version in `requirements.txt` and rebuild (`docker compose build`). Prefer `best[height<=1080]` over `bestvideo` in `video_creation/background.py` — many videos lack video-only streams.
 
 ### Threads screenshots fail ("Main post article not found")
-→ Threads.net uses div cards, not `<article>`. Ensure screenshot code uses `a[href*="/post/"]` → ancestor div approach.
+→ Threads.net uses div cards, not `<article>`. Use `a[href*="/post/"]` → ancestor div approach.
 
 ### Config validator EOFError in non-interactive mode
-→ `check_toml()` prompts for ALL platform sections regardless of `platform` setting. Either fill all required fields, edit through `/settings`, or pre-populate `config.toml` before `docker compose run cli`.
+→ `check_toml()` prompts for ALL platform sections regardless of `platform` setting. Fill all required fields, edit through `/settings`, or pre-populate `config.toml` before `docker compose run cli`.
 
 ### Playwright timeout on Threads login
-→ Cookies corrupted. Delete `video_creation/data/cookie-threads.json` for fresh login (the file is bind-mounted, so deleting on host clears the container too). Also confirm selectors: button uses `exact=True` due to multiple "Log in" buttons.
+→ Cookies corrupted. Delete `video_creation/data/cookie-threads.json` for fresh login (file is bind-mounted, host delete clears container too). Confirm selectors: button uses `exact=True` for multiple "Log in" buttons.
 
 ### No viral posts found
 → Lower `min_engagement` in config. Most Threads feed posts have <100 likes — 10000 filters almost everything.
 
 ### Background Manager grid is empty
-→ `/backgrounds.json` must serve `utils/background_videos.json` (split catalog), **not** the legacy `utils/backgrounds.json` (empty `{}`). Verify in `GUI.py:backgrounds_json`.
+→ `/backgrounds.json` must serve `utils/background_videos.json` (split catalog), **not** legacy `utils/backgrounds.json` (empty `{}`). Verify in `GUI.py:backgrounds_json`.
 
 ### `/video/<id>` returns 404
-→ The route looks up the entry in `video_creation/data/videos.json` by `id` and resolves the file under `results/<thread_category>/<filename>.mp4`. Confirm both the JSON entry and the file exist; the file may have been pruned.
+→ Route looks up entry in `video_creation/data/videos.json` by `id`, resolves file under `results/<thread_category>/<filename>.mp4`. Confirm both JSON entry and file exist; file may have been pruned.
 
 ### JS "Unexpected end of input" on Library page
-→ Any user-controlled string interpolated into an HTML attribute must go through the `h()` helper in `index.html`. Avoid inline `onclick=` with `${JSON.stringify(...)}`.
+→ User-controlled strings in HTML attributes must go through `h()` helper in `index.html`. Avoid inline `onclick=` with `${JSON.stringify(...)}`.
 
 ### Stale image after editing `requirements.txt` or `Dockerfile`
-→ `docker compose build` to rebuild. Code changes alone do NOT need a rebuild because the repo root is bind-mounted to `/app`.
+→ `docker compose build` to rebuild. Code-only changes don't need rebuild — repo root is bind-mounted to `/app`.
 
 ### Python bytecode caching in long-running GUI container
-→ The GUI process caches imported modules in `sys.modules`. After editing pipeline code (`final_video.py`, `background.py`, `screenshot.py`), restart the GUI (`docker compose restart gui`) or trigger a pipeline run which now calls `importlib.reload()` on all pipeline modules automatically.
+→ GUI caches imported modules in `sys.modules`. After editing pipeline code, restart GUI (`docker compose restart gui`) or trigger pipeline run which calls `importlib.reload()` on pipeline modules.
 
 ### Reddit image template appearing in Threads videos
-→ Verify `platform` in config.toml is `"threads"` (not `"reddit"`). The `if platform == "reddit"` guard in `final_video.py` blocks the Reddit template. If it still appears, restart the GUI container to flush Python bytecode cache.
+→ Verify `platform` in config.toml is `"threads"` (not `"reddit"`). The `if platform == "reddit"` guard in `final_video.py` blocks Reddit template. Restart GUI container to flush Python bytecode cache.
 
 ### Background video download fails (yt-dlp HTTP 403)
-→ `get_background_config()` now prefers already-downloaded videos. Set `background_video` in config.toml to a downloaded video name (check `assets/backgrounds/video/`). If empty, it randomly picks from downloaded videos first.
+→ `get_background_config()` prefers already-downloaded videos. Set `background_video` in config.toml to a downloaded video name (check `assets/backgrounds/video/`). If empty, randomly picks from downloaded videos first.
 
 ### TTS output has wrong number of audio clips
-→ `engine_wrapper.run()` returns `idx + 1` (count, not last index). If you're getting one fewer clip than expected, check the return value consumers — they should treat it as a count.
+→ `engine_wrapper.run()` returns `idx + 1` (count, not last index). If getting one fewer clip than expected, check return value consumers — treat as count.
 
 ### videos.json corruption (trailing garbage after save)
-→ Fixed: `save_data()` now calls `raw_vids.truncate()` after `json.dump()`. If you have an existing corrupted file, delete `video_creation/data/videos.json` and it will be recreated.
+→ Fixed: `save_data()` calls `raw_vids.truncate()` after `json.dump()`. Delete `video_creation/data/videos.json` if existing file is corrupted.
 
 ### Infinite recursion in Reddit post discovery
-→ Fixed: `get_subreddit_threads()` has a retry-depth limit of 50. If you hit this, your subreddit may have no undone posts — try a different subreddit or clear `videos.json`.
+→ Fixed: `get_subreddit_threads()` has retry-depth limit of 50. If hit, subreddit may have no undone posts — try different subreddit or clear `videos.json`.
 
 ---
 
@@ -388,43 +387,43 @@ docker compose build
 docker compose up gui
 # → http://localhost:4000
 
-# Run the GUI in the background
+# Run the GUI in background
 docker compose up -d gui
 docker compose logs -f gui
 docker compose down
 
-# Run the CLI pipeline (one-off, removed on exit)
+# Run CLI pipeline (one-off, removed on exit)
 docker compose run --rm cli
 docker compose run --rm cli python main.py <post_id>
 
-# Run the test suite
+# Run test suite
 docker compose run --rm test
 
-# Open a shell in a fresh container for ad-hoc commands
+# Shell in fresh container for ad-hoc commands
 docker compose run --rm --entrypoint /bin/bash gui
-# inside:  python -m py_compile main.py platforms/threads/scraper.py
+# inside: python -m py_compile main.py platforms/threads/scraper.py
 
-# Tail a running GUI container
+# Tail running GUI container
 docker compose exec gui ls /app/results/threads/
 ```
 
-> Anything that needs `pip install`, `playwright install`, or `apt-get` belongs in `Dockerfile` followed by `docker compose build` — never run those on the host.
+> Anything needing `pip install`, `playwright install`, or `apt-get` belongs in `Dockerfile` + `docker compose build` — never on host.
 
 ---
 
 ## Recent Changes (May 2026 Security Hardening)
 
-**eval() removal:** All `eval(checks["type"])(value)` patterns replaced with `{"int": int, "float": float, "bool": bool, "str": str}` dict dispatch in `utils/settings.py`, `utils/console.py`, `utils/gui_utils.py`.
+**eval() removal:** `eval(checks["type"])(value)` replaced with `{"int": int, "float": float, "bool": bool, "str": str}` dict dispatch in `utils/settings.py`, `utils/console.py`, `utils/gui_utils.py`.
 
-**os.system() removal:** `TTS/engine_wrapper.py:split_post` now uses `subprocess.run([...])` with argument lists. `utils/posttextparser.py` spacy download uses `subprocess.run([sys.executable, "-m", "spacy", ...])`.
+**os.system() removal:** `TTS/engine_wrapper.py:split_post` uses `subprocess.run([...])` with argument lists. `utils/posttextparser.py` spacy download uses `subprocess.run([sys.executable, "-m", "spacy", ...])`.
 
 **shell=True removal:** All `subprocess.run(..., shell=True)` and `Popen(..., shell=True)` replaced with argument lists in `main.py` and `utils/ffmpeg_install.py`.
 
-**Credential leak prevention:** `main.py` error handler deep-copies config and redacts all secrets before printing. `GUI.py` masks sensitive keys as `********` in settings page data.
+**Credential leak prevention:** `main.py` error handler deep-copies config and redacts all secrets. `GUI.py` masks sensitive keys as `********` in settings page data.
 
 **CSRF + security headers:** `GUI.py` checks `Origin` header on POST/PUT/DELETE. `X-Content-Type-Options`, `X-Frame-Options` headers added.
 
-**Docker hardening:** Container runs as `appuser` (non-root). Digest pinning + pip version comments added for production.
+**Docker hardening:** Container runs as `appuser` (non-root). Digest pinning + pip version comments added.
 
 **Bug fixes (18 total):**
 - Config overwrite crash (config=None after empty file write)
diff --git a/TTS/supertonic_tts.py b/TTS/supertonic_tts.py
index 3264b58..78a0172 100644
--- a/TTS/supertonic_tts.py
+++ b/TTS/supertonic_tts.py
@@ -1,4 +1,5 @@
 import random
+import re
 import subprocess
 import tempfile
 from pathlib import Path
@@ -7,6 +8,11 @@ from supertonic import TTS
 
 from utils import settings
 
+# Zero-width and invisible formatting characters rejected by supertonic.
+# ​ ZWSP, ‌ ZWNJ, ‍ ZWJ, ‎ LTR, ‏ RTL,
+# ⁠ word joiner, ﻿ BOM
+_ZW_RE = re.compile("[​-‏⁠﻿]")
+
 
 class SupertonicTTS:
     def __init__(self):
@@ -17,18 +23,43 @@ class SupertonicTTS:
         output_path = Path(filepath)
         output_path.parent.mkdir(parents=True, exist_ok=True)
 
+        # Strip known invisible characters first, then retry on any others
+        # that supertonic rejects — social media text has unpredictable Unicode.
+        text = _ZW_RE.sub("", text)
+
         tts_settings = settings.config["settings"].get("tts", {})
         voice_style = self._voice_style(tts_settings, random_voice)
+        synth_kwargs = {
+            "voice_style": voice_style,
+            "lang": tts_settings.get("supertonic_lang", "na"),
+            "total_steps": int(tts_settings.get("supertonic_steps", 8)),
+            "speed": float(tts_settings.get("supertonic_speed", 1.05)),
+            "max_chunk_length": self.max_chars,
+            "verbose": False,
+        }
 
-        wav, _duration = self.tts.synthesize(
-            text,
-            voice_style=voice_style,
-            lang=tts_settings.get("supertonic_lang", "na"),
-            total_steps=int(tts_settings.get("supertonic_steps", 8)),
-            speed=float(tts_settings.get("supertonic_speed", 1.05)),
-            max_chunk_length=self.max_chars,
-            verbose=False,
-        )
+        for _ in range(3):
+            try:
+                wav, _duration = self.tts.synthesize(text, **synth_kwargs)
+                break
+            except ValueError as e:
+                msg = str(e)
+                if "unsupported character" not in msg:
+                    raise
+                # Extract and strip the reported characters
+                chars = re.findall(r"'(.+?)'", msg)
+                if not chars:
+                    raise
+                for c in chars:
+                    text = text.replace(c, "")
+                if not text.strip():
+                    raise ValueError(
+                        "Text is empty after stripping unsupported characters"
+                    ) from e
+        else:
+            raise RuntimeError(
+                "Supertonic still rejecting characters after 3 retries"
+            )
 
         with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_wav:
             wav_path = Path(temp_wav.name)
diff --git a/platforms/threads/scraper.py b/platforms/threads/scraper.py
index 3cfc715..ec6ee28 100644
--- a/platforms/threads/scraper.py
+++ b/platforms/threads/scraper.py
@@ -317,6 +317,17 @@ def _contains_blocked(text: str, blocked_raw: str) -> bool:
     return any(word in text_lower for word in blocked)
 
 
+def _is_english_text(text: str) -> bool:
+    """Return True if text is predominantly English (Latin alphabet)."""
+    if not text or not text.strip():
+        return False
+    alpha_chars = [c for c in text if c.isalpha()]
+    if not alpha_chars:
+        return False
+    latin = sum(1 for c in alpha_chars if "a" <= c.lower() <= "z")
+    return latin / len(alpha_chars) >= 0.70
+
+
 def _filter_candidates(posts: list[dict]) -> list[dict]:
     """Filter feed posts by engagement, blocked words, and duplicates.
 
@@ -337,6 +348,8 @@ def _filter_candidates(posts: list[dict]) -> list[dict]:
             continue
         if not post["body"] or len(post["body"].strip()) < 10:
             continue
+        if t_config.get("english_only", False) and not _is_english_text(post["body"]):
+            continue
         # Age filter
         if max_age_hours is not None:
             post_hours = _parse_timestamp_to_hours(post.get("timestamp", ""))
@@ -502,6 +515,8 @@ def _build_content_object(post: dict, replies: list[dict]) -> dict:
             continue
         if _contains_blocked(body, blocked_raw):
             continue
+        if t_config.get("english_only", False) and not _is_english_text(body):
+            continue
         if not (min_len <= len(body) <= max_len):
             continue
         sanitised = sanitize_text(body)
diff --git a/utils/.config.template.toml b/utils/.config.template.toml
index 849f895..392cb8e 100644
--- a/utils/.config.template.toml
+++ b/utils/.config.template.toml
@@ -40,6 +40,7 @@ min_engagement = { default = 0, optional = true, nmin = 0, type = "int", explana
 max_post_age = { optional = true, default = "", options = ["", "1h", "6h", "24h", "3d", "7d", "30d"], type = "str", explanation = "Maximum age of posts to consider. Empty = no limit.", example = "7d" }
 search_queries = { optional = true, default = "news,politics,trending", type = "str", explanation = "Comma-separated search queries to find trending content on Threads. Combined with main feed results.", example = "news,politics,viral" }
 blocked_words = { optional = true, default = "", type = "str", explanation = "Comma-separated list of blocked words/phrases. Posts and replies containing any of these will be skipped.", example = "nsfw, spoiler, politics" }
+english_only = { optional = true, default = false, options = [true, false], type = "bool", explanation = "Only include English-language posts and replies (Latin alphabet heuristic). Set to true for English-only content." }
 
 [youtube]
 enabled = { optional = true, type = "bool", default = false, options = [true, false], explanation = "Enable automatic YouTube upload after video creation" }
diff --git a/vendor/FullyAutomatedRedditVideoMakerBot b/vendor/FullyAutomatedRedditVideoMakerBot
deleted file mode 160000
index 6e5c9ff..0000000
--- a/vendor/FullyAutomatedRedditVideoMakerBot
+++ /dev/null
@@ -1 +0,0 @@
-Subproject commit 6e5c9ffacc5ac601cb596cfaa72fc947824a2ca3