From 5e183d8e2c1993fc045f617a010c2c16d8c0ba9f Mon Sep 17 00:00:00 2001
From: Hong Phuc <hongphuc.dthp@gmail.com>
Date: Tue, 5 May 2026 01:44:07 +0700
Subject: [PATCH] =?UTF-8?q?feat:=20Threads=20trending=20scraper=20?=
 =?UTF-8?q?=E2=80=94=20web=20scraping,=20engagement=20filtering,=20av=20mi?=
 =?UTF-8?q?gration?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Web scraper (platforms/threads/scraper.py) with div-based card parsing
- Multi-source discovery: For You feed + configurable search queries
- Engagement filtering (min_engagement) and post age filter (max_post_age)
- Shared Playwright auth module (platforms/threads/auth.py)
- Migrated ffmpeg-python to av (PyAV) for in-process media probing
- Video composition uses subprocess ffmpeg (av filter graph segfault workaround)
- Updated CLAUDE.md with Threads scraping and macOS-specific notes

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
---
 CLAUDE.md                       | 513 +++++++++++-----------------
 platforms/__init__.py           |   9 +-
 platforms/threads/auth.py       |  95 ++++++
 platforms/threads/scraper.py    | 587 ++++++++++++++++++++++++++++++++
 platforms/threads/screenshot.py | 112 ++----
 requirements.txt                |   2 +-
 utils/.config.template.toml     |   8 +-
 utils/background_audios.json    |   5 +
 utils/background_videos.json    |   6 +
 video_creation/background.py    |   2 +-
 video_creation/final_video.py   | 538 +++++++++++++++--------------
 11 files changed, 1206 insertions(+), 671 deletions(-)
 create mode 100644 platforms/threads/auth.py
 create mode 100644 platforms/threads/scraper.py

diff --git a/CLAUDE.md b/CLAUDE.md
index ed446b8..91222fd 100644
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -6,7 +6,7 @@
 
 **Status:** Production-ready, actively maintained (v3.4.0)
 **Language:** Python 3.10+
-**Platforms:** Reddit (original), Threads (NEW), X/Twitter (planned)
+**Platforms:** Reddit (PRAW API), Threads (Graph API + Web Scraping)
 
 ### Core Mission
 Transforms social media threads (post + comments/replies) into complete short-form videos with:
@@ -14,6 +14,7 @@ Transforms social media threads (post + comments/replies) into complete short-fo
 - UI screenshots (Playwright)
 - Background video/audio overlays
 - FFmpeg composition & output
+- Optional YouTube upload
 
 ---
 
@@ -23,162 +24,120 @@ Transforms social media threads (post + comments/replies) into complete short-fo
 main.py (CLI)
     ↓ [platform factory]
     ├─→ reddit/subreddit.py [PRAW API]
-    └─→ platforms/threads/fetcher.py [Graph API]
-        ↓ [standard data dict]
-        ├─→ TTS/engine_wrapper.py [7+ providers]
-        ├─→ screenshot_downloader.py (Reddit)
-        │   or platforms/threads/screenshot.py (Threads)
-        ├─→ video_creation/background.py
-        └─→ video_creation/final_video.py [FFmpeg]
-            ↓
-            results/{category}/{video.mp4}
+    └─→ platforms/threads/
+        ├─→ fetcher.py [Graph API — your own posts]
+        ├─→ scraper.py [Web scraping — trending For You feed]
+        └─→ auth.py [Shared Playwright login + cookies]
+            ↓ [standard data dict]
+            ├─→ TTS/engine_wrapper.py [7+ providers, auto-fallback]
+            ├─→ screenshot_downloader.py (Reddit)
+            │   or platforms/threads/screenshot.py (Threads)
+            ├─→ video_creation/background.py [local or yt-dlp]
+            ├─→ video_creation/youtube_uploader.py [optional auto-upload]
+            └─→ video_creation/final_video.py [FFmpeg with libx264]
+                ↓
+                results/{category}/{video.mp4}
 ```
 
-### Key Design: Platform Abstraction via Factory Pattern
-
-**Why:** Single codebase supports multiple platforms without tight coupling.
-
-**How:** `platforms/__init__.py` exports:
-- `get_content_object(POST_ID=None)` — routes to right fetcher
-- `get_screenshot_fn()` — routes to right screenshotter
-
-**Result:** Adding X/Twitter requires only: new module + config section + two `elif` branches.
-
 ---
 
 ## Data Contract: The "content_object" Dict
 
-All fetchers return this shape (defined in `platforms/__init__.py`):
+All fetchers return this shape:
 
 ```python
 {
-    # Unique identifiers
     "thread_id":       str,           # Used for temp folder: assets/temp/{id}/
-    "thread_category": str,           # "reddit", "threads", etc. → output folder
-
-    # Content
-    "thread_title":    str,           # TTS as title + output filename
+    "thread_category": str,           # "reddit", "threads" → output folder
+    "thread_title":    str,           # TTS + output filename (clean, no metadata)
     "thread_url":      str,           # Playwright navigates here for screenshot
-    "is_nsfw":         bool,          # Content filter flag
-
-    # Replies/Comments (mutually exclusive with thread_post)
+    "is_nsfw":         bool,
     "comments": [
         {
-            "comment_body": str,      # TTS per reply
+            "comment_body": str,      # TTS per reply (clean body text)
             "comment_url":  str,      # Playwright navigates here
-            "comment_id":   str,      # CSS selector ID or unique identifier
+            "comment_id":   str,      # Unique identifier (URL-based for scraper)
         }
     ],
-
-    # OR Story mode:
-    "thread_post":     str | list,    # Long-form text (no comments)
+    "thread_post":     str | list,    # Story mode (no comments)
 }
 ```
 
-**Why:** Loose coupling—TTS, backgrounds, and video composition don't need platform-specific logic.
-
 ---
 
 ## File Organization
 
 ```
 VideoMakerBot/
-├── platforms/                      # Multi-platform abstraction
-│   ├── __init__.py                # Factory: get_content_object(), get_screenshot_fn()
-│   └── threads/                   # Threads (Meta) implementation
-│       ├── fetcher.py             # Graph API → content_object
-│       └── screenshot.py          # Playwright Threads screenshotter
+├── platforms/
+│   ├── __init__.py                    # Factory: get_content_object(), get_screenshot_fn()
+│   └── threads/
+│       ├── auth.py                    # Shared Playwright login + cookie management
+│       ├── fetcher.py                 # Graph API → content_object (your own posts)
+│       ├── scraper.py                 # Web scraping → content_object (trending feed)
+│       └── screenshot.py             # Playwright Threads screenshotter (div-based)
 │
-├── reddit/                        # Reddit implementation (kept as-is)
-│   └── subreddit.py              # PRAW API → content_object + thread_category
+├── reddit/
+│   └── subreddit.py                  # PRAW API → content_object
 │
 ├── video_creation/
-│   ├── final_video.py            # FFmpeg composition (platform-aware folder naming)
-│   ├── screenshot_downloader.py  # Playwright Reddit UI capturer
-│   ├── voices.py                 # TTS orchestrator (platform-agnostic)
-│   ├── background.py             # Video/audio downloader (platform-agnostic)
-│   └── data/
-│       ├── videos.json           # Dedup tracker
-│       ├── cookie-dark-mode.json # Reddit theme cookie
-│       └── cookie-threads.json   # Threads session cookie (auto-created)
+│   ├── final_video.py                # FFmpeg composition (libx264, no drawtext on macOS)
+│   ├── background.py                 # Video/audio downloader (local files or yt-dlp)
+│   ├── screenshot_downloader.py      # Playwright Reddit UI capturer
+│   ├── voices.py                     # TTS orchestrator
+│   └── youtube_uploader.py           # YouTube OAuth2 upload (post-render hook)
 │
-├── TTS/                          # Text-to-Speech
-│   ├── engine_wrapper.py         # Provider abstraction + post_lang fallback
-│   ├── elevenlabs.py, aws_polly.py, etc. # 7+ provider implementations
+├── TTS/
+│   ├── engine_wrapper.py             # Provider abstraction + TikTok→pyttsx3 fallback
+│   ├── TikTok.py                     # TikTok TTS (hardened error handling)
+│   └── ...                           # 7+ provider implementations
 │
 ├── utils/
-│   ├── settings.py               # Config loading + validation
-│   ├── videos.py                 # check_done() + check_done_by_id()
-│   ├── console.py                # Rich terminal output
-│   ├── .config.template.toml     # Config schema (platform sections)
-│   └── ... (id, voice, cleanup, etc.)
+│   ├── settings.py                   # Config loading + interactive validation
+│   ├── videos.py                     # check_done() + check_done_by_id()
+│   ├── console.py                    # Rich terminal output
+│   ├── .config.template.toml         # Config schema
+│   ├── background_videos.json        # Background video manifest
+│   ├── background_audios.json        # Background audio manifest
+│   └── ...
 │
-├── main.py                       # CLI entry (platform-routed via factory)
-├── GUI.py                        # Flask web UI (localhost:4000)
-├── requirements.txt              # Dependencies
-└── CLAUDE.md / AGENT.md          # This file + agent guidelines
+├── main.py                           # CLI entry (platform-routed via factory)
+├── GUI.py                            # Flask web UI (localhost:4000)
+├── requirements.txt
+└── CLAUDE.md
 ```
 
 ---
 
 ## Configuration
 
-**File:** `utils/.config.template.toml` (schema) → `config.toml` (user config)
+### Threads (full config)
 
-### Platform Selection
 ```toml
 [settings]
-platform = "reddit"     # or "threads"
-post_lang = "es-cr"     # Optional: translation language (all platforms)
-```
-
-### Reddit Config
-```toml
-[reddit.creds]
-client_id = "..."       # OAuth app
-client_secret = "..."
-username = "..."
-password = "..."
-2fa = true/false
+platform = "threads"
 
-[reddit.thread]
-subreddit = "AskReddit"
-post_id = ""            # Leave blank for auto-pick
-max_comment_length = 500
-min_comment_length = 1
-min_comments = 20
-blocked_words = "..."
-```
+[threads]
+discovery_method = "scrape"    # "api" (Graph API, own posts) or "scrape" (trending feed)
 
-### Threads Config (NEW)
-```toml
 [threads.creds]
-access_token = "EAABsbCS..."  # Meta Graph API token (60-day expiry)
-user_id = "12345678901234567"
-username = "your_insta"       # For Playwright login
+username = "your_insta"        # For Playwright login (always needed)
 password = "your_password"
+access_token = ""              # Only for discovery_method="api"
+user_id = ""                   # Only for discovery_method="api"
 
 [threads.thread]
-post_id = ""            # Leave blank for auto-pick
+post_id = ""                   # Specific post ID; blank = auto-pick from feed
 max_reply_length = 500
 min_reply_length = 1
-min_replies = 5
-blocked_words = "..."
-```
-
-### Generic Settings
-```toml
-[settings]
-theme = "dark"
-resolution_w = 1080
-resolution_h = 1920
-storymode = false
-times_to_run = 1
+min_replies = 5                # Minimum replies for post eligibility
+min_engagement = 0             # Minimum likes+reposts for viral filter (0=disabled, 10000=viral)
+blocked_words = ""
 
 [settings.tts]
-voice_choice = "tiktok"     # or "elevenlabs", "awspolly", "googletranslate", etc.
-random_voice = true
-silence_duration = 0.3
+voice_choice = "googletranslate"  # Best for macOS: no API key, fast, free
+# voice_choice = "tiktok"         # Needs tiktok_sessionid; auto-falls back to pyttsx3
+# voice_choice = "OpenAI"         # Needs openai_api_key
 
 [settings.background]
 background_video = "minecraft"
@@ -186,167 +145,117 @@ background_audio = "lofi"
 background_audio_volume = 0.15
 ```
 
----
+### Reddit (reference)
 
-## Development Guidelines
+```toml
+[settings]
+platform = "reddit"
 
-### ✅ DO:
+[reddit.creds]
+client_id = "..."
+client_secret = "..."
+username = "..."
+password = "..."
+2fa = false
+2fa_secret = ""               # TOTP base32 secret for auto-2FA
 
-1. **Use platform factory in main.py**
-   ```python
-   from platforms import get_content_object, get_screenshot_fn
-   reddit_object = get_content_object(POST_ID)
-   screenshot_fn = get_screenshot_fn()
-   screenshot_fn(reddit_object, number_of_comments)
-   ```
-
-2. **Return standard content dict** from all fetchers
-   ```python
-   return {
-       "thread_id": ...,
-       "thread_category": ...,  # NEW: replaces hardcoded subreddit
-       "comments": [...]
-   }
-   ```
-
-3. **Use config fallback chains** for cross-platform keys
-   ```python
-   lang = (settings.config["settings"].get("post_lang") or
-           settings.config.get("reddit", {}).get("thread", {}).get("post_lang", ""))
-   ```
-
-4. **Read thread_category from dict** instead of config
-   ```python
-   # WRONG:
-   subreddit = settings.config["reddit"]["thread"]["subreddit"]
-
-   # RIGHT:
-   platform = settings.config["settings"].get("platform", "reddit")
-   if platform == "reddit":
-       subreddit = settings.config["reddit"]["thread"]["subreddit"]
-   else:
-       subreddit = reddit_obj.get("thread_category", platform)
-   ```
-
-5. **Test both platforms** after core pipeline changes
-   ```bash
-   # Test Reddit (must not regress)
-   sed -i 's/platform = "threads"/platform = "reddit"/' config.toml
-   python3 main.py
-
-   # Test Threads
-   sed -i 's/platform = "reddit"/platform = "threads"/' config.toml
-   python3 main.py --post-id <threads-id>
-   ```
+[reddit.thread]
+subreddit = "AskReddit"
+min_comments = 20
+```
 
-### ❌ DON'T:
+### YouTube upload
 
-1. **Don't import platform modules directly** in main.py/utils
-   ```python
-   # WRONG: from reddit.subreddit import get_subreddit_threads
-   # RIGHT: from platforms import get_content_object
-   ```
+```toml
+[youtube]
+enabled = false                # Set true to auto-upload after render
+privacy = "public"             # or "private", "unlisted"
+client_secret_path = ""        # Path to youtube_client_secret.json
+```
 
-2. **Don't hardcode platform names** in generic modules
-   ```python
-   # WRONG in final_video.py:
-   subreddit = settings.config["reddit"]["thread"]["subreddit"]
+---
 
-   # RIGHT:
-   subreddit = reddit_obj.get("thread_category", "unknown")
-   ```
+## Platform-Specific Knowledge
 
-3. **Don't add platform-specific UI selectors** outside `platforms/{platform}/screenshot.py`
-   - Reddit selectors stay in `video_creation/screenshot_downloader.py`
-   - Threads selectors stay in `platforms/threads/screenshot.py`
+### Threads — Web Scraping (discovery_method = "scrape")
 
-4. **Don't assume config keys exist** without fallback
-   ```python
-   # WRONG: lang = settings.config["reddit"]["thread"]["post_lang"]
-   # RIGHT: lang = settings.config.get("settings", {}).get("post_lang", "")
-   ```
+**DOM Structure:**
+- Threads.net uses **div-based card layout** — NO `<article>` elements anywhere
+- Feed posts: `a[href*="/post/"]` links inside `<div>` cards (class contains `x1a2a7pz`)
+- Post pages: same structure; main post link appears first, replies follow
+- Screenshots: Use `a[href*="/post/"]` → ancestor div card, NOT `page.locator("article")`
 
----
+**Card Text Format (used by `_parse_card_text()`):**
+```
+Line 0:   username
+Line 1:   timestamp (e.g., "14h", "1d")
+Line 2..N: post body text
+Last 1-4: engagement metrics (likes, replies, reposts, quotes)
+```
 
-## Platform-Specific Knowledge
+**Engagement Parsing:**
+- Numbers can be plain ("266") or abbreviated ("1K", "2.5M")
+- `likes` = first trailing number, `replies` = second, `reposts` = third
+- `min_engagement` filters by `likes + reposts` total
+- Posts are sorted by engagement descending before selection
 
-### Reddit
-- **API:** PRAW (Python Reddit API Wrapper)
-- **Auth:** OAuth app (client_id, secret) + username/password
-- **Screenshot:** Playwright on reddit.com/new.reddit.com
-  - Login form: `input[name="username"]`, `input[name="password"]`
-  - Post selector: `[data-test-id="post-content"]`
-  - Comment selector: `#t1_{comment_id}`
-- **NSFW:** `submission.over_18`
-- **Output folder:** `results/{subreddit}/`
-
-### Threads
-- **API:** Meta Graph API (v18.0+)
-- **Auth:** User access token (60-day lifetime) via https://developers.facebook.com/
-- **Screenshot:** Playwright on threads.net
-  - Login form: `input[autocomplete="username"]`, `input[autocomplete="current-password"]`
-  - Post selector: `article` (universal, more stable than Reddit)
-  - Cookies saved to: `video_creation/data/cookie-threads.json`
-- **NSFW:** API doesn't provide; always False
-- **Output folder:** `results/threads/`
-
-### Future: X/Twitter
-Create: `platforms/twitter/fetcher.py` + `platforms/twitter/screenshot.py` + config section
-Update: `platforms/__init__.py` with `elif platform == "twitter"` branches
+**Login Flow:**
+- Threads uses Instagram auth (`threads.net/login`)
+- Selectors: `input[autocomplete="username"]`, `input[autocomplete="current-password"]`
+- Button: `get_by_role("button", name="Log in", exact=True).first`
+- Cookies cached at `video_creation/data/cookie-threads.json`
+- Login logic is shared via `platforms/threads/auth.py`
 
----
+**API Limitation:**
+- Graph API v1.0 only accesses YOUR OWN posts — no trending/discovery
+- Scraping bypasses this entirely — no API token needed
 
-## Extending the Project
+### Threads — Graph API (discovery_method = "api")
 
-### Adding a New TTS Provider
-1. Create `TTS/my_provider.py` with a class implementing the TTS interface
-2. Add config keys to `[settings.tts]` in `.config.template.toml`
-3. Update `TTS/engine_wrapper.py` to call your provider
-4. Test with `settings.config["settings"]["tts"]["voice_choice"] = "my_provider"`
+- Auth: Bearer token, 60-day expiry
+- Only accesses authenticated user's own threads + replies
+- Use when you have your own content with replies
 
-### Adding a New Platform (e.g., X/Twitter)
-1. **Create fetcher:** `platforms/twitter/fetcher.py`
-   - Implement `get_twitter_content(POST_ID=None)` returning standard dict
-2. **Create screenshotter:** `platforms/twitter/screenshot.py`
-   - Implement `get_screenshots_of_twitter_posts(content_object, screenshot_num)`
-3. **Update config:** Add `[twitter.creds]` and `[twitter.thread]` sections
-4. **Update factory:** Add `elif platform == "twitter"` in `platforms/__init__.py`
-5. **Update CLI helper:** Add case to `_get_platform_post_id()` in `main.py`
-6. **Test:** Verify Reddit mode still works, test Twitter mode end-to-end
+### Reddit
 
-**Zero changes needed to:** TTS, backgrounds, video composition, or utils.
+- **API:** PRAW (Python Reddit API Wrapper)
+- **Post discovery:** `subreddit.hot(limit=25)` → `get_subreddit_undone()` → fallback to `top(day/hour/month/week/year/all)`
+- **Screenshot:** Playwright on new.reddit.com
+- **2FA:** Auto-TOTP via `pyotp` when `2fa_secret` is configured in config.toml
 
 ---
 
-## Debugging Tips
-
-### "No matching distribution found for yt-dlp==2026.3.17"
-→ yt-dlp uses date versioning (YYYY.M.DD, no leading zeros). Use `2025.10.14` (latest stable).
+## Development Guidelines
 
-### "Threads API: Invalid or expired access_token"
-→ Meta tokens expire every 60 days. Refresh at https://developers.facebook.com/tools/explorer/
+### ✅ DO:
 
-### Playwright timeout on Threads screenshot
-→ Login cookies corrupted or expired. Delete `video_creation/data/cookie-threads.json` to force fresh login next run.
+1. **Use platform factory** — never import platform modules directly
+2. **Return standard content_object** from all fetchers
+3. **Use clean body text** for TTS — parse out username/timestamp metadata
+4. **Default to `googletranslate` TTS on macOS** — pyttsx3 hangs in headless environments
+5. **Use `libx264` encoder on macOS** — `h264_nvenc` is NVIDIA-only
+6. **Test both Threads discovery methods:** `api` and `scrape`
 
-### "No eligible Threads posts found"
-→ Configure `[threads.thread].min_replies = 5` (or lower). Ensure your Threads account has public posts with replies.
+### ❌ DON'T:
 
-### Video dedup not working
-→ Check `video_creation/data/videos.json` is writable. Ensure `check_done_by_id()` is called before fetching content.
+1. **Don't use `<article>` selectors** on Threads.net — the DOM is div-based
+2. **Don't hardcode `h264_nvenc`** — use `libx264` for cross-platform compatibility
+3. **Don't rely on `drawtext` FFmpeg filter** — not available in Homebrew builds
+4. **Don't import platform modules directly** in main.py/utils
+5. **Don't assume config keys exist** without `.get()` fallback
 
 ---
 
-## Testing Checklist
+## macOS-Specific Notes
 
-- [ ] Reddit mode: `platform = "reddit"` produces video to `results/{subreddit}/`
-- [ ] Threads mode: `platform = "threads"` produces video to `results/threads/`
-- [ ] Video dedup: Running same post_id twice skips second run
-- [ ] Translation: `post_lang = "es"` translates filenames
-- [ ] TTS providers: Test with different voice_choice values
-- [ ] Background selection: Custom background video/audio works
-- [ ] Story mode: storymode=true only uses thread_post, not comments
-- [ ] Error handling: Invalid credentials show clear messages
+- **TTS:** `googletranslate` (gTTS) is the most reliable — free, fast, no API key
+  - `tiktok` auto-falls back to `pyttsx3` if sessionid missing, but pyttsx3 is very slow
+  - `pyttsx3` works but takes ~60s to initialize NSSpeechSynthesizer
+- **FFmpeg encoder:** MUST use `libx264` — `h264_nvenc` is NVIDIA GPU only
+- **FFmpeg filters:** `drawtext` missing from Homebrew bottle — credit text is disabled
+- **yt-dlp:** Keep updated (`pip install --upgrade yt-dlp`) — YouTube changes APIs frequently
+  - Format selector: `best[height<=1080]` not `bestvideo` (many videos lack video-only streams)
+  - Upgrade path: `pip install --upgrade yt-dlp`
 
 ---
 
@@ -354,96 +263,78 @@ Update: `platforms/__init__.py` with `elif platform == "twitter"` branches
 
 | File | Purpose |
 |------|---------|
-| `main.py` | CLI entry; orchestrates pipeline via factory |
-| `platforms/__init__.py` | Factory dispatch for multi-platform support |
-| `platforms/threads/fetcher.py` | Threads Graph API client |
-| `platforms/threads/screenshot.py` | Threads.net Playwright screenshotter |
-| `video_creation/final_video.py` | FFmpeg composition; platform-aware output naming |
-| `TTS/engine_wrapper.py` | TTS provider abstraction; post_lang fallback |
-| `utils/settings.py` | Config loading & validation |
+| `main.py` | CLI entry; pipeline orchestration via factory |
+| `platforms/__init__.py` | Factory dispatch (platform + discovery_method) |
+| `platforms/threads/scraper.py` | **NEW** — Web scraping fetcher with engagement parsing |
+| `platforms/threads/auth.py` | **NEW** — Shared Playwright login + cookie management |
+| `platforms/threads/fetcher.py` | Graph API client (own posts only) |
+| `platforms/threads/screenshot.py` | Div-based Threads screenshotter |
+| `video_creation/final_video.py` | FFmpeg composition (libx264, platform-aware output) |
+| `video_creation/background.py` | Background downloader (local files + yt-dlp) |
+| `video_creation/youtube_uploader.py` | **NEW** — OAuth2 YouTube upload |
+| `TTS/engine_wrapper.py` | TTS provider abstraction + TikTok fallback |
+| `TTS/TikTok.py` | Hardened TikTok TTS with graceful error handling |
+| `reddit/subreddit.py` | PRAW Reddit fetcher with auto-2FA |
+| `utils/settings.py` | Config loading + interactive validation |
 | `utils/videos.py` | Video dedup tracking |
 | `utils/.config.template.toml` | Config schema |
-| `requirements.txt` | Dependencies |
+| `utils/background_videos.json` | Background video manifest |
+| `utils/background_audios.json` | Background audio manifest |
 
 ---
 
-## Useful Commands
-
-```bash
-# Install dependencies
-pip install -r requirements.txt
-
-# Run CLI
-python3 main.py
-
-# Run with specific post
-python3 main.py <post_id>
-
-# Run Flask GUI
-python3 GUI.py
-
-# Check syntax
-python3 -m py_compile main.py platforms/threads/fetcher.py
-
-# Format code
-black main.py platforms/ utils/
+## Debugging Tips
 
-# Lint
-pylint main.py
-```
+### FFmpeg "Unknown encoder 'h264_nvenc'"
+→ On macOS, change to `libx264`. Find-and-replace `h264_nvenc` → `libx264` in `video_creation/final_video.py`.
 
----
+### FFmpeg "No such filter: 'drawtext'"
+→ Homebrew FFmpeg lacks drawtext. The credit text overlay is automatically skipped.
 
-## When You Get Stuck
+### yt-dlp "Requested format is not available"
+→ Update yt-dlp: `pip install --upgrade yt-dlp`. Also change format selector from `bestvideo` to `best` in `video_creation/background.py`.
 
-1. **"What does this module do?"** → Check imports in `main.py` or docstrings
-2. **"How do I add support for platform X?"** → See "Adding a New Platform" section above
-3. **"Why is my config not being read?"** → Check `utils/settings.py:check_toml()` and `.config.template.toml` schema
-4. **"Why isn't my TTS provider being called?"** → Check `TTS/engine_wrapper.py:make_voice()` and config `voice_choice`
-5. **"How do I debug the Playwright screenshot?"** → Uncomment `page.pause()` in screenshot downloader, run headful browser
+### pyttsx3 hang on macOS
+→ NSSpeechSynthesizer needs GUI session. Switch to `voice_choice = "googletranslate"` for headless use.
 
-Good luck! 🚀
+### Threads screenshots fail ("Main post article not found")
+→ Threads.net uses div cards, not `<article>`. Ensure screenshot code uses `a[href*="/post/"]` → ancestor div approach.
 
-<!-- gitnexus:start -->
-# GitNexus — Code Intelligence
+### Config validator EOFError in non-interactive mode
+→ `check_toml()` prompts for ALL platform sections regardless of `platform` setting. Fill ALL required fields or load config directly with `toml.load()` + `settings.config = ...`.
 
-This project is indexed by GitNexus as **VideoMakerBot** (802 symbols, 1287 relationships, 32 execution flows). Use the GitNexus MCP tools to understand code, assess impact, and navigate safely.
+### Playwright timeout on Threads login
+→ Cookies corrupted. Delete `video_creation/data/cookie-threads.json` for fresh login. Also check button selector: must use `exact=True` due to multiple "Log in" buttons.
 
-> If any GitNexus tool warns the index is stale, run `npx gitnexus analyze` in terminal first.
+### No viral posts found
+→ Lower `min_engagement` in config. Most Threads feed posts have <100 likes — 10000 filters almost everything.
 
-## Always Do
-
-- **MUST run impact analysis before editing any symbol.** Before modifying a function, class, or method, run `gitnexus_impact({target: "symbolName", direction: "upstream"})` and report the blast radius (direct callers, affected processes, risk level) to the user.
-- **MUST run `gitnexus_detect_changes()` before committing** to verify your changes only affect expected symbols and execution flows.
-- **MUST warn the user** if impact analysis returns HIGH or CRITICAL risk before proceeding with edits.
-- When exploring unfamiliar code, use `gitnexus_query({query: "concept"})` to find execution flows instead of grepping. It returns process-grouped results ranked by relevance.
-- When you need full context on a specific symbol — callers, callees, which execution flows it participates in — use `gitnexus_context({name: "symbolName"})`.
+---
 
-## Never Do
+## Useful Commands
 
-- NEVER edit a function, class, or method without first running `gitnexus_impact` on it.
-- NEVER ignore HIGH or CRITICAL risk warnings from impact analysis.
-- NEVER rename symbols with find-and-replace — use `gitnexus_rename` which understands the call graph.
-- NEVER commit changes without running `gitnexus_detect_changes()` to check affected scope.
+```bash
+# Install dependencies
+pip install -r requirements.txt
 
-## Resources
+# Run CLI
+python3 main.py
 
-| Resource | Use for |
-|----------|---------|
-| `gitnexus://repo/VideoMakerBot/context` | Codebase overview, check index freshness |
-| `gitnexus://repo/VideoMakerBot/clusters` | All functional areas |
-| `gitnexus://repo/VideoMakerBot/processes` | All execution flows |
-| `gitnexus://repo/VideoMakerBot/process/{name}` | Step-by-step execution trace |
+# Run bypassing config validator (non-interactive)
+python3 -c "
+import sys, toml
+sys.path.insert(0, '.')
+from utils import settings
+settings.config = toml.load('config.toml')
+from main import main; main()
+"
 
-## CLI
+# Update yt-dlp (YouTube downloads fix)
+pip install --upgrade yt-dlp
 
-| Task | Read this skill file |
-|------|---------------------|
-| Understand architecture / "How does X work?" | `.claude/skills/gitnexus/gitnexus-exploring/SKILL.md` |
-| Blast radius / "What breaks if I change X?" | `.claude/skills/gitnexus/gitnexus-impact-analysis/SKILL.md` |
-| Trace bugs / "Why is X failing?" | `.claude/skills/gitnexus/gitnexus-debugging/SKILL.md` |
-| Rename / extract / split / refactor | `.claude/skills/gitnexus/gitnexus-refactoring/SKILL.md` |
-| Tools, resources, schema reference | `.claude/skills/gitnexus/gitnexus-guide/SKILL.md` |
-| Index, status, clean, wiki CLI commands | `.claude/skills/gitnexus/gitnexus-cli/SKILL.md` |
+# Check syntax
+python3 -m py_compile main.py platforms/threads/scraper.py
 
-<!-- gitnexus:end -->
+# Run Flask GUI
+python3 GUI.py
+```
diff --git a/platforms/__init__.py b/platforms/__init__.py
index 736163a..0b21d84 100644
--- a/platforms/__init__.py
+++ b/platforms/__init__.py
@@ -26,8 +26,13 @@ def get_content_object(POST_ID=None) -> dict:
         return get_subreddit_threads(POST_ID)
 
     elif platform == "threads":
-        from platforms.threads.fetcher import get_threads_content
-        return get_threads_content(POST_ID)
+        discovery = settings.config.get("threads", {}).get("discovery_method", "api")
+        if discovery == "scrape":
+            from platforms.threads.scraper import get_trending_threads_content
+            return get_trending_threads_content(POST_ID)
+        else:
+            from platforms.threads.fetcher import get_threads_content
+            return get_threads_content(POST_ID)
 
     else:
         raise ValueError(
diff --git a/platforms/threads/auth.py b/platforms/threads/auth.py
new file mode 100644
index 0000000..9f957ee
--- /dev/null
+++ b/platforms/threads/auth.py
@@ -0,0 +1,95 @@
+"""Shared Playwright authentication for Threads.net.
+
+Used by both the screenshotter (screenshot.py) and the web scraper (scraper.py).
+"""
+
+import json
+from pathlib import Path
+
+from playwright.sync_api import Browser, BrowserContext, Page, ViewportSize
+
+from utils import settings
+from utils.console import print_substep
+
+THREADS_LOGIN_URL = "https://www.threads.net/login"
+THREADS_COOKIE_FILE = "./video_creation/data/cookie-threads.json"
+DEFAULT_USER_AGENT = (
+    "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
+    "AppleWebKit/537.36 (KHTML, like Gecko) "
+    "Chrome/120.0.0.0 Safari/537.36"
+)
+
+
+def login_to_threads(page: Page, _context: BrowserContext) -> None:
+    """Log into threads.net via Instagram credentials and persist session cookies."""
+    username = settings.config["threads"]["creds"].get("username", "").strip()
+    password = settings.config["threads"]["creds"].get("password", "").strip()
+
+    if not username or not password:
+        raise RuntimeError(
+            "Threads login requires credentials. "
+            "Set threads.creds.username and threads.creds.password in config.toml"
+        )
+
+    print_substep("Logging into Threads (via Instagram)...")
+    page.goto(THREADS_LOGIN_URL, timeout=0)
+    page.wait_for_load_state("networkidle")
+
+    page.locator('input[autocomplete="username"]').fill(username)
+    page.locator('input[autocomplete="current-password"]').fill(password)
+    page.get_by_role("button", name="Log in", exact=True).first.click()
+
+    page.wait_for_timeout(6000)
+
+    cookies = _context.cookies()
+    Path(THREADS_COOKIE_FILE).parent.mkdir(parents=True, exist_ok=True)
+    with open(THREADS_COOKIE_FILE, "w") as f:
+        json.dump(cookies, f)
+
+    print_substep("Logged into Threads and saved session cookies.", style="bold green")
+
+
+def ensure_authenticated_context(browser: Browser, **kwargs) -> BrowserContext:
+    """Create a Playwright browser context with Threads session cookies loaded.
+
+    Loads saved cookies from cookie-threads.json. If no valid session exists,
+    performs a fresh login and persists the cookies.
+
+    Keyword arguments override defaults for locale, viewport, device_scale_factor,
+    color_scheme, and user_agent.
+    """
+    theme = settings.config["settings"]["theme"]
+    W = int(settings.config["settings"]["resolution_w"])
+    H = int(settings.config["settings"]["resolution_h"])
+    dsf = (W // 600) + 1
+
+    defaults = {
+        "locale": "en-US",
+        "color_scheme": "dark" if theme == "dark" else "light",
+        "viewport": ViewportSize(width=W, height=H),
+        "device_scale_factor": dsf,
+        "user_agent": DEFAULT_USER_AGENT,
+    }
+    defaults.update(kwargs)
+
+    context = browser.new_context(**defaults)
+
+    cookie_path = Path(THREADS_COOKIE_FILE)
+    if cookie_path.exists():
+        try:
+            with open(cookie_path, encoding="utf-8") as f:
+                saved_cookies = json.load(f)
+            context.add_cookies(saved_cookies)
+            print_substep("Loaded saved Threads session cookies.")
+        except (json.JSONDecodeError, IOError):
+            print_substep("Saved cookies corrupted. Logging in fresh...")
+            page = context.new_page()
+            login_to_threads(page, context)
+            page.close()
+    else:
+        print_substep("No saved cookies found. Logging in...")
+        page = context.new_page()
+        login_to_threads(page, context)
+        page.close()
+
+    return context
diff --git a/platforms/threads/scraper.py b/platforms/threads/scraper.py
new file mode 100644
index 0000000..b1af673
--- /dev/null
+++ b/platforms/threads/scraper.py
@@ -0,0 +1,587 @@
+"""Web scraping-based trending post discovery for Threads.net.
+
+Bypasses the Meta Graph API (which only accesses your own posts) by using Playwright
+to scrape threads.net directly — the "For You" feed, post pages, and replies.
+Returns the standard content_object dict consumed by the rest of the pipeline.
+"""
+
+import re
+from typing import Optional
+
+from playwright.sync_api import BrowserContext, Locator, sync_playwright
+
+from platforms.threads.auth import ensure_authenticated_context
+from utils import settings
+from utils.console import print_step, print_substep
+from utils.voice import sanitize_text
+from utils.videos import check_done_by_id
+
+FEED_URL = "https://www.threads.net"
+SCROLL_DELAY_MS = 2000
+MAX_FEED_SCROLLS = 36
+POST_LINK_SELECTOR = 'a[href*="/post/"]'
+CARD_XPATH = 'xpath=ancestor::div[contains(@class, "x1a2a7pz")][1]'
+
+
+def _post_id_from_url(url: str) -> str:
+    return url.rstrip("/").split("/")[-1]
+
+
+def _to_absolute_url(href: str) -> str:
+    if href.startswith("http"):
+        return href
+    return "https://www.threads.net" + href
+
+
+def _parse_abbreviated_number(s: str) -> int:
+    """Parse abbreviated numbers like '1K', '2.5M' into integers."""
+    s = s.strip().upper().replace(",", "")
+    if not s:
+        return 0
+    multipliers = {"K": 1_000, "M": 1_000_000}
+    if s[-1] in multipliers:
+        try:
+            return int(float(s[:-1]) * multipliers[s[-1]])
+        except ValueError:
+            return 0
+    try:
+        return int(s)
+    except ValueError:
+        return 0
+
+
+def _parse_card_text(text: str) -> dict:
+    """Parse a Threads card's raw text into structured data.
+
+    Threads card format:
+      line 0:   username
+      line 1:   timestamp (e.g. "14h", "1d")
+      lines 2..N: post body text
+      last 1-4 lines: engagement metrics (likes, replies, reposts, quotes)
+
+    Returns dict with keys: username, timestamp, body, likes, replies, reposts
+    """
+    if not text:
+        return {"username": "", "timestamp": "", "body": "", "likes": 0, "replies": 0, "reposts": 0}
+
+    lines = text.strip().split("\n")
+    if len(lines) < 3:
+        return {"username": "", "timestamp": "", "body": text, "likes": 0, "replies": 0, "reposts": 0}
+
+    username = lines[0].strip()
+    timestamp = lines[1].strip()
+
+    # Find where engagement metrics start (trailing numeric/abbreviated lines)
+    metric_start = len(lines)
+    for i in range(len(lines) - 1, 1, -1):
+        line = lines[i].strip()
+        if re.match(r'^[\d,.]+[KkMm]?$', line):
+            metric_start = i
+        else:
+            break
+
+    # Body is everything between timestamp and metrics
+    body_lines = lines[2:metric_start]
+    body = "\n".join(body_lines).strip()
+
+    # Parse engagement metrics from the end
+    metrics = lines[metric_start:]
+    likes = 0
+    replies_count = 0
+    reposts = 0
+
+    if len(metrics) >= 1:
+        likes = _parse_abbreviated_number(metrics[0])
+    if len(metrics) >= 2:
+        replies_count = _parse_abbreviated_number(metrics[1])
+    if len(metrics) >= 3:
+        reposts = _parse_abbreviated_number(metrics[2])
+
+    return {
+        "username": username,
+        "timestamp": timestamp,
+        "body": body,
+        "likes": likes,
+        "replies": replies_count,
+        "reposts": reposts,
+    }
+
+
+def _extract_text_from_card(link: Locator) -> str:
+    """Walk up from a post link to the card container and extract its raw text."""
+    try:
+        card = link.locator(CARD_XPATH)
+        if card.count():
+            return card.first.inner_text(timeout=3000).strip()
+    except Exception:
+        pass
+    return ""
+
+
+# --- Feed scraping ---
+
+
+def _scrape_feed_posts(context: BrowserContext, max_scrolls: int = MAX_FEED_SCROLLS) -> list[dict]:
+    """Navigate to threads.net feed, scroll, extract post metadata with engagement metrics."""
+    print_step("Scraping Threads trending feed...")
+    page = context.new_page()
+    posts: list[dict] = []
+    seen_ids: set[str] = set()
+
+    try:
+        page.goto(FEED_URL, timeout=0)
+        page.wait_for_timeout(4000)
+
+        last_height = 0
+
+        for i in range(max_scrolls):
+            links = page.locator(POST_LINK_SELECTOR).all()
+            new_found = 0
+
+            for link in links:
+                href = link.get_attribute("href")
+                if not href:
+                    continue
+                post_id = _post_id_from_url(href)
+                if post_id in seen_ids:
+                    continue
+                seen_ids.add(post_id)
+
+                raw_text = _extract_text_from_card(link)
+                parsed = _parse_card_text(raw_text)
+
+                posts.append({
+                    "url": _to_absolute_url(href),
+                    "text": raw_text,
+                    "body": parsed["body"],
+                    "username": parsed["username"],
+                    "timestamp": parsed["timestamp"],
+                    "likes": parsed["likes"],
+                    "replies_shown": parsed["replies"],
+                    "reposts": parsed["reposts"],
+                    "post_id": post_id,
+                })
+                new_found += 1
+
+            if new_found > 0:
+                top = posts[-1]
+                print_substep(
+                    f"Scroll {i + 1}: +{new_found} posts | top: "
+                    f"♥{top['likes']:,} 💬{top['replies_shown']} 🔁{top['reposts']} "
+                    f"'{top['body'][:50]}...'",
+                    style="dim",
+                )
+
+            if new_found == 0 and i > 5:
+                break
+
+            page.evaluate("window.scrollBy(0, document.body.scrollHeight)")
+            page.wait_for_timeout(SCROLL_DELAY_MS)
+
+            new_height = page.evaluate("document.body.scrollHeight")
+            if new_height == last_height:
+                break
+            last_height = new_height
+
+    finally:
+        page.close()
+
+    print_substep(f"Scraped {len(posts)} posts from feed.", style="bold green")
+    return posts
+
+
+def _scrape_search_page(context: BrowserContext, query: str, max_scrolls: int = 5) -> list[dict]:
+    """Search Threads for a query and scrape the results.
+
+    Uses the same card extraction as the main feed.
+    """
+    print_step(f"Scraping Threads search: '{query}'...")
+    page = context.new_page()
+    posts: list[dict] = []
+    seen_ids: set[str] = set()
+    search_url = f"https://www.threads.net/search?q={query}&serp_type=tags"
+
+    try:
+        page.goto(search_url, timeout=0)
+        page.wait_for_timeout(4000)
+
+        for i in range(max_scrolls):
+            links = page.locator(POST_LINK_SELECTOR).all()
+            new_found = 0
+
+            for link in links:
+                href = link.get_attribute("href")
+                if not href:
+                    continue
+                post_id = _post_id_from_url(href)
+                if post_id in seen_ids:
+                    continue
+                seen_ids.add(post_id)
+
+                raw_text = _extract_text_from_card(link)
+                parsed = _parse_card_text(raw_text)
+
+                posts.append({
+                    "url": _to_absolute_url(href),
+                    "text": raw_text,
+                    "body": parsed["body"],
+                    "username": parsed["username"],
+                    "timestamp": parsed["timestamp"],
+                    "likes": parsed["likes"],
+                    "replies_shown": parsed["replies"],
+                    "reposts": parsed["reposts"],
+                    "post_id": post_id,
+                })
+                new_found += 1
+
+            if new_found == 0:
+                break
+
+            page.evaluate("window.scrollBy(0, document.body.scrollHeight)")
+            page.wait_for_timeout(SCROLL_DELAY_MS)
+
+    finally:
+        page.close()
+
+    print_substep(f"Search '{query}': {len(posts)} posts.", style="dim")
+    return posts
+
+
+# --- Candidate filtering ---
+
+
+def _parse_timestamp_to_hours(ts: str) -> float | None:
+    """Convert a Threads timestamp like '14h', '1d', '3d' to hours.
+
+    Returns None if the format is unrecognized.
+    """
+    if not ts:
+        return None
+    ts = ts.strip().lower()
+    if ts.endswith("h"):
+        try:
+            return float(ts[:-1])
+        except ValueError:
+            return None
+    elif ts.endswith("d"):
+        try:
+            return float(ts[:-1]) * 24
+        except ValueError:
+            return None
+    elif ts.endswith("w"):
+        try:
+            return float(ts[:-1]) * 24 * 7
+        except ValueError:
+            return None
+    elif ts.endswith("m") and not ts.endswith("min"):
+        try:
+            return float(ts[:-1]) * 24 * 30
+        except ValueError:
+            return None
+    return None
+
+
+def _age_from_config() -> float | None:
+    """Parse max_post_age config value into hours. Returns None if disabled."""
+    raw = settings.config["threads"]["thread"].get("max_post_age", "")
+    if not raw:
+        return None
+    return _parse_timestamp_to_hours(raw)
+
+
+def _contains_blocked(text: str, blocked_raw: str) -> bool:
+    if not blocked_raw:
+        return False
+    blocked = [w.strip().lower() for w in blocked_raw.split(",") if w.strip()]
+    text_lower = text.lower()
+    return any(word in text_lower for word in blocked)
+
+
+def _filter_candidates(posts: list[dict]) -> list[dict]:
+    """Filter feed posts by engagement, blocked words, and duplicates.
+
+    Sorts by total engagement (likes + replies) descending so the most
+    viral posts are tried first.
+    """
+    t_config = settings.config["threads"]["thread"]
+    blocked_raw = t_config.get("blocked_words", "")
+    min_engagement = int(t_config.get("min_engagement", 0))
+
+    max_age_hours = _age_from_config()
+
+    candidates = []
+    for post in posts:
+        if check_done_by_id(post["post_id"]):
+            continue
+        if _contains_blocked(post["body"], blocked_raw):
+            continue
+        if not post["body"] or len(post["body"].strip()) < 10:
+            continue
+        # Age filter
+        if max_age_hours is not None:
+            post_hours = _parse_timestamp_to_hours(post.get("timestamp", ""))
+            if post_hours is not None and post_hours > max_age_hours:
+                continue
+        total_engagement = post.get("likes", 0) + post.get("reposts", 0)
+        if total_engagement < min_engagement:
+            continue
+        post["_total_engagement"] = total_engagement
+        candidates.append(post)
+
+    # Sort by engagement descending — most viral first
+    candidates.sort(key=lambda p: p.get("_total_engagement", 0), reverse=True)
+
+    age_str = f", max age ≤{max_age_hours}h" if max_age_hours else ""
+    if min_engagement > 0:
+        print_substep(
+            f"Filtered {len(posts)} posts -> {len(candidates)} viral candidates "
+            f"(min ♥+🔁 ≥ {min_engagement:,}{age_str})",
+            style="dim",
+        )
+    else:
+        print_substep(
+            f"Filtered {len(posts)} posts -> {len(candidates)} candidates"
+            f"{' (max age ≤' + str(max_age_hours) + 'h)' if max_age_hours else ''}",
+            style="dim",
+        )
+    return candidates
+
+
+# --- Reply scraping on post pages ---
+
+
+def _scrape_post_replies(context: BrowserContext, post_url: str, max_replies: int = 100) -> list[dict]:
+    """Navigate to a post page, scroll to load replies, extract reply data.
+
+    Uses _parse_card_text to separate reply body from metadata (username, timestamp, etc.).
+    """
+    page = context.new_page()
+    replies: list[dict] = []
+    seen_ids: set[str] = set()
+    main_post_id = _post_id_from_url(post_url)
+
+    try:
+        page.goto(post_url, timeout=0)
+        page.wait_for_timeout(4000)
+
+        stable_count = 0
+        last_count = 0
+
+        for _ in range(15):
+            links = page.locator(POST_LINK_SELECTOR).all()
+
+            for link in links:
+                href = link.get_attribute("href")
+                if not href:
+                    continue
+                reply_id = _post_id_from_url(href)
+                if reply_id == main_post_id:
+                    continue
+                if reply_id in seen_ids:
+                    continue
+                seen_ids.add(reply_id)
+
+                raw_text = _extract_text_from_card(link)
+                if not raw_text:
+                    continue
+
+                parsed = _parse_card_text(raw_text)
+                cleaned_body = parsed["body"]
+
+                replies.append({
+                    "comment_body": cleaned_body,
+                    "comment_url": _to_absolute_url(href),
+                    "comment_id": reply_id,
+                })
+
+                if len(replies) >= max_replies:
+                    break
+
+            if len(replies) >= max_replies:
+                break
+
+            if len(replies) == last_count:
+                stable_count += 1
+                if stable_count >= 3:
+                    break
+            else:
+                stable_count = 0
+            last_count = len(replies)
+
+            page.evaluate("window.scrollBy(0, document.body.scrollHeight)")
+            page.wait_for_timeout(1500)
+
+    finally:
+        page.close()
+
+    return replies
+
+
+def _scrape_main_post_text(context: BrowserContext, post_url: str) -> str:
+    """Extract and clean the main post text from a post page."""
+    page = context.new_page()
+    try:
+        page.goto(post_url, timeout=0)
+        page.wait_for_timeout(3000)
+
+        links = page.locator(POST_LINK_SELECTOR).all()
+        for link in links:
+            href = link.get_attribute("href")
+            if href and _post_id_from_url(href) == _post_id_from_url(post_url):
+                raw = _extract_text_from_card(link)
+                if raw:
+                    parsed = _parse_card_text(raw)
+                    return parsed["body"] or raw
+        return ""
+    finally:
+        page.close()
+
+
+# --- Content object builder ---
+
+
+def _build_content_object(post: dict, replies: list[dict]) -> dict:
+    """Build the standard content_object from scraped post + replies.
+
+    Uses cleaned body text for title and comment bodies.
+    """
+    t_config = settings.config["threads"]["thread"]
+    max_len = int(t_config["max_reply_length"])
+    min_len = int(t_config["min_reply_length"])
+    blocked_raw = t_config.get("blocked_words", "")
+
+    storymode = settings.config["settings"].get("storymode", False)
+
+    # Use cleaned body text for the title, fall back to raw text
+    title = post.get("body") or post.get("text") or ""
+
+    content: dict = {
+        "thread_id": post["post_id"],
+        "thread_title": title[:280],
+        "thread_url": post["url"],
+        "is_nsfw": False,
+        "thread_category": "threads",
+        "comments": [],
+    }
+
+    if storymode:
+        content["thread_post"] = title
+        print_substep("Storymode: using post text as thread_post.", style="dim")
+        return content
+
+    for reply in replies:
+        body = reply.get("comment_body", "").strip()
+        if not body:
+            continue
+        if _contains_blocked(body, blocked_raw):
+            continue
+        if not (min_len <= len(body) <= max_len):
+            continue
+        sanitised = sanitize_text(body)
+        if not sanitised:
+            continue
+
+        content["comments"].append({
+            "comment_body": body,
+            "comment_url": reply["comment_url"],
+            "comment_id": reply["comment_id"],
+        })
+
+    return content
+
+
+# --- Main entry point ---
+
+
+def get_trending_threads_content(POST_ID: Optional[str] = None) -> dict:
+    """Discover trending Threads posts via web scraping and return a content_object."""
+    print_step("Discovering trending Threads content via web scraping...")
+
+    min_replies = int(settings.config["threads"]["thread"]["min_replies"])
+    min_engagement = int(settings.config["threads"]["thread"].get("min_engagement", 0))
+
+    with sync_playwright() as p:
+        browser = p.chromium.launch(headless=True)
+        try:
+            context = ensure_authenticated_context(browser)
+
+            if POST_ID:
+                post_url = f"https://www.threads.net/t/{POST_ID}"
+                post = {"url": post_url, "post_id": POST_ID, "text": "", "body": ""}
+                replies = _scrape_post_replies(context, post_url)
+                content = _build_content_object(post, replies)
+                if content["comments"] or content.get("thread_post"):
+                    return content
+                raise RuntimeError(
+                    f"No replies found for post {POST_ID}. "
+                    f"Minimum required: {min_replies}."
+                )
+
+            # Scrape from multiple sources: main feed + trending search queries
+            posts = _scrape_feed_posts(context)
+            # Also search for popular topics to find high-engagement content
+            trending_queries = settings.config["threads"]["thread"].get(
+                "search_queries", "news,politics,trending"
+            )
+            for query in trending_queries.split(","):
+                query = query.strip()
+                if query:
+                    try:
+                        search_posts = _scrape_search_page(context, query)
+                        # Merge avoiding duplicates
+                        existing_ids = {p["post_id"] for p in posts}
+                        for sp in search_posts:
+                            if sp["post_id"] not in existing_ids:
+                                posts.append(sp)
+                    except Exception:
+                        pass
+
+            if not posts:
+                raise RuntimeError("No posts found in feed. Try again later.")
+
+            candidates = _filter_candidates(posts)
+            if not candidates:
+                raise RuntimeError(
+                    f"No eligible posts in feed after filtering. "
+                    f"Try lowering min_engagement (currently {min_engagement:,}) "
+                    f"or min_replies (currently {min_replies})."
+                )
+
+            for i, candidate in enumerate(candidates):
+                eng = candidate.get("_total_engagement", 0)
+                print_substep(
+                    f"Trying #{i + 1}: ♥{candidate['likes']:,} "
+                    f"💬{candidate['replies_shown']} "
+                    f"'{candidate['body'][:60]}...'",
+                    style="dim",
+                )
+                try:
+                    replies = _scrape_post_replies(context, candidate["url"])
+                    if len(replies) >= min_replies:
+                        if not candidate.get("body") or len(candidate.get("body", "")) < 50:
+                            full_text = _scrape_main_post_text(context, candidate["url"])
+                            if full_text:
+                                candidate["body"] = full_text
+                        content = _build_content_object(candidate, replies)
+                        title_preview = content["thread_title"][:60]
+                        print_substep(
+                            f"Selected: '{title_preview}...' "
+                            f"♥{candidate['likes']:,} 💬{len(content['comments'])} replies",
+                            style="bold green",
+                        )
+                        return content
+                    print_substep(
+                        f"  Only {len(replies)} replies (need {min_replies}). Trying next...",
+                        style="yellow",
+                    )
+                except Exception as e:
+                    print_substep(f"  Failed: {e}. Trying next...", style="yellow")
+                    continue
+
+            raise RuntimeError(
+                f"No eligible posts with {min_replies}+ replies found "
+                f"after trying {len(candidates)} candidates."
+            )
+
+        finally:
+            browser.close()
diff --git a/platforms/threads/screenshot.py b/platforms/threads/screenshot.py
index cd371ea..03383fc 100644
--- a/platforms/threads/screenshot.py
+++ b/platforms/threads/screenshot.py
@@ -1,62 +1,16 @@
 """Captures screenshots of Threads posts via Playwright."""
 
-import json
 import re
 from pathlib import Path
 from typing import Final
 
 from playwright.sync_api import ViewportSize, sync_playwright
 
+from platforms.threads.auth import ensure_authenticated_context
 from utils import settings
 from utils.console import print_step, print_substep
 
 
-THREADS_LOGIN_URL = "https://www.threads.net/login"
-THREADS_COOKIE_FILE = "./video_creation/data/cookie-threads.json"
-
-
-def _login_to_threads(page, context) -> None:
-    """
-    Performs Threads login via Instagram credentials (Threads uses Instagram auth).
-    Saves session cookies to cookie-threads.json for reuse on future runs.
-
-    Args:
-        page: Playwright page object
-        context: Playwright browser context
-
-    Raises:
-        RuntimeError: If login credentials are not configured.
-    """
-    username = settings.config["threads"]["creds"].get("username", "").strip()
-    password = settings.config["threads"]["creds"].get("password", "").strip()
-
-    if not username or not password:
-        raise RuntimeError(
-            "Threads screenshot login requires credentials. "
-            "Set threads.creds.username and threads.creds.password in config.toml"
-        )
-
-    print_substep("Logging into Threads (via Instagram)...")
-    page.goto(THREADS_LOGIN_URL, timeout=0)
-    page.wait_for_load_state("networkidle")
-
-    # Threads login form uses Instagram auth with these selectors
-    page.locator('input[autocomplete="username"]').fill(username)
-    page.locator('input[autocomplete="current-password"]').fill(password)
-    page.get_by_role("button", name="Log in").click()
-
-    # Wait for login to complete
-    page.wait_for_timeout(6000)
-
-    # Persist cookies for reuse
-    cookies = context.cookies()
-    Path(THREADS_COOKIE_FILE).parent.mkdir(parents=True, exist_ok=True)
-    with open(THREADS_COOKIE_FILE, "w") as f:
-        json.dump(cookies, f)
-
-    print_substep("Logged into Threads and saved session cookies.", style="bold green")
-
-
 def get_screenshots_of_threads_posts(content_object: dict, screenshot_num: int) -> None:
     """
     Downloads screenshots of Threads posts via Playwright.
@@ -89,37 +43,13 @@ def get_screenshots_of_threads_posts(content_object: dict, screenshot_num: int)
     with sync_playwright() as p:
         print_substep("Launching headless browser...")
         browser = p.chromium.launch(headless=True)
-        context = browser.new_context(
-            locale="en-US",
+        context = ensure_authenticated_context(
+            browser,
             color_scheme="dark" if theme == "dark" else "light",
             viewport=ViewportSize(width=W, height=H),
             device_scale_factor=dsf,
-            user_agent=(
-                "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
-                "AppleWebKit/537.36 (KHTML, like Gecko) "
-                "Chrome/120.0.0.0 Safari/537.36"
-            ),
         )
 
-        # Try to load saved cookies; if not found or invalid, do a fresh login
-        cookie_path = Path(THREADS_COOKIE_FILE)
-        if cookie_path.exists():
-            try:
-                with open(cookie_path, encoding="utf-8") as f:
-                    saved_cookies = json.load(f)
-                context.add_cookies(saved_cookies)
-                print_substep("Loaded saved Threads session cookies.")
-            except (json.JSONDecodeError, IOError):
-                print_substep("Saved cookies corrupted. Logging in fresh...")
-                page = context.new_page()
-                _login_to_threads(page, context)
-                page.close()
-        else:
-            print_substep("No saved cookies found. Logging in...")
-            page = context.new_page()
-            _login_to_threads(page, context)
-            page.close()
-
         # Screenshot the main post
         page = context.new_page()
         page.goto(content_object["thread_url"], timeout=0)
@@ -128,13 +58,21 @@ def get_screenshots_of_threads_posts(content_object: dict, screenshot_num: int)
 
         postcontentpath = f"assets/temp/{thread_id}/png/title.png"
         try:
-            # On Threads.net post permalink pages, the main post is the first article element
-            post_locator = page.locator("article").first
-            if not post_locator.is_visible():
-                raise RuntimeError(
-                    "Main post article not found on page. "
-                    "Check if you're logged in correctly or if the post is deleted."
-                )
+            # Threads.net uses div-based cards, not <article> elements.
+            # Find the first post link and screenshot its parent card.
+            post_link = page.locator('a[href*="/post/"]').first
+            if post_link.count() and post_link.is_visible():
+                # Screenshot the card container, or fall back to the link's parent
+                card = post_link.locator('xpath=ancestor::div[contains(@class, "x1a2a7pz")][1]')
+                if card.count():
+                    post_locator = card.first
+                else:
+                    post_locator = post_link
+            else:
+                # Fallback: try article (older Threads layout) or full page
+                post_locator = page.locator("article").first
+                if not post_locator.count() or not post_locator.is_visible():
+                    post_locator = page.locator("body")
 
             if settings.config["settings"].get("zoom", 1) != 1:
                 zoom = settings.config["settings"]["zoom"]
@@ -163,10 +101,16 @@ def get_screenshots_of_threads_posts(content_object: dict, screenshot_num: int)
                     page.wait_for_load_state("networkidle")
                     page.wait_for_timeout(2000)
 
-                    # Each reply permalink page shows that reply as the first article
-                    reply_locator = page.locator("article").first
-                    if not reply_locator.is_visible():
-                        print_substep(f"Reply {idx} article not found. Skipping...", style="yellow")
+                    # Threads.net uses div-based cards for replies too.
+                    # Find the first post link and screenshot its card container.
+                    reply_link = page.locator('a[href*="/post/"]').first
+                    if reply_link.count() and reply_link.is_visible():
+                        card = reply_link.locator('xpath=ancestor::div[contains(@class, "x1a2a7pz")][1]')
+                        reply_locator = card.first if card.count() else reply_link
+                    else:
+                        reply_locator = page.locator("article").first
+                    if not reply_locator.count() or not reply_locator.is_visible():
+                        print_substep(f"Reply {idx} not found. Skipping...", style="yellow")
                         continue
 
                     if settings.config["settings"].get("zoom", 1) != 1:
diff --git a/requirements.txt b/requirements.txt
index 6e115f2..49dfe69 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -17,7 +17,7 @@ unidecode==1.4.0
 torch==2.11.0
 transformers==4.57.6
 # spacy==3.8.7  # Optional: only for advanced text parsing (not yet Python 3.14 compatible)
-ffmpeg-python==0.2.0
+av>=14.0
 elevenlabs==2.44.0
 yt-dlp==2025.10.14
 google-auth-oauthlib==1.2.1
diff --git a/utils/.config.template.toml b/utils/.config.template.toml
index d3db198..042efde 100644
--- a/utils/.config.template.toml
+++ b/utils/.config.template.toml
@@ -21,6 +21,9 @@ blocked_words = { optional = true, default = "", type = "str", explanation = "Co
 ai_similarity_enabled = {optional = true, option = [true, false], default = false, type = "bool", explanation = "Threads read from Reddit are sorted based on their similarity to the keywords given below"}
 ai_similarity_keywords = {optional = true, type="str", example= 'Elon Musk, Twitter, Stocks', explanation = "Every keyword or even sentence, seperated with comma, is used to sort the reddit threads based on similarity"}
 
+[threads]
+discovery_method = { optional = true, default = "api", options = ["api", "scrape"], type = "str", explanation = "How to discover Threads content: 'api' uses Graph API (your own posts), 'scrape' uses web scraping (trending ForYou feed). Requires threads.creds.username/password for Playwright login." }
+
 [threads.creds]
 access_token = { optional = false, explanation = "Meta Threads long-lived user access token (User token from Graph API, valid for 60 days)", example = "EAABsbCS..." }
 user_id = { optional = false, explanation = "Numeric Threads user ID", example = "12345678901234567" }
@@ -32,6 +35,9 @@ post_id = { optional = true, default = "", regex = "^((?!://|://)[+a-zA-Z0-9])*$
 max_reply_length = { default = 500, optional = false, nmin = 10, nmax = 10000, type = "int", explanation = "Max characters per reply", example = 500, oob_error = "Max reply length should be between 10 and 10000" }
 min_reply_length = { default = 1, optional = true, nmin = 0, nmax = 10000, type = "int", explanation = "Min characters per reply", example = 1, oob_error = "Min reply length should be between 0 and 10000" }
 min_replies = { default = 5, optional = false, nmin = 1, type = "int", explanation = "Minimum number of replies for a post to be eligible", example = 5, oob_error = "Minimum replies should be at least 1" }
+min_engagement = { default = 0, optional = true, nmin = 0, type = "int", explanation = "Minimum total engagement (likes + reposts) to consider a post viral. Set to 0 to disable. Example: 10000 means only posts with 10K+ total likes+reposts.", example = 10000 }
+max_post_age = { optional = true, default = "", options = ["", "1h", "6h", "24h", "3d", "7d", "30d"], type = "str", explanation = "Maximum age of posts to consider. Empty = no limit.", example = "7d" }
+search_queries = { optional = true, default = "news,politics,trending", type = "str", explanation = "Comma-separated search queries to find trending content on Threads. Combined with main feed results.", example = "news,politics,viral" }
 blocked_words = { optional = true, default = "", type = "str", explanation = "Comma-separated list of blocked words/phrases. Posts and replies containing any of these will be skipped.", example = "nsfw, spoiler, politics" }
 
 [youtube]
@@ -58,7 +64,7 @@ zoom = { optional = true, default = 1, example = 1.1, explanation = "Sets the br
 channel_name = { optional = true, default = "Reddit Tales", example = "Reddit Stories", explanation = "Sets the channel name for the video" }
 
 [settings.background]
-background_video = { optional = true, default = "minecraft", example = "rocket-league", options = ["minecraft", "gta", "rocket-league", "motor-gta", "csgo-surf", "cluster-truck", "minecraft-2","multiversus","fall-guys","steep", ""], explanation = "Sets the background for the video based on game name" }
+background_video = { optional = true, default = "minecraft", example = "rocket-league", options = ["minecraft", "gta", "rocket-league", "motor-gta", "csgo-surf", "cluster-truck", "minecraft-2","multiversus","fall-guys","steep", "black", ""], explanation = "Sets the background for the video based on game name" }
 background_audio = { optional = true, default = "lofi", example = "chill-summer", options = ["lofi","lofi-2","chill-summer",""], explanation = "Sets the background audio for the video" }
 background_audio_volume = { optional = true, type = "float", nmin = 0, nmax = 1, default = 0.15, example = 0.05, explanation="Sets the volume of the background audio. If you don't want background audio, set it to 0.", oob_error = "The volume HAS to be between 0 and 1", input_error = "The volume HAS to be a float number between 0 and 1"}
 enable_extra_audio = { optional = true, type = "bool", default = false, example = false, explanation="Used if you want to render another video without background audio in a separate folder", input_error = "The value HAS to be true or false"}
diff --git a/utils/background_audios.json b/utils/background_audios.json
index 752436d..abcfc6d 100644
--- a/utils/background_audios.json
+++ b/utils/background_audios.json
@@ -14,5 +14,10 @@
         "https://www.youtube.com/watch?v=EZE8JagnBI8",
         "chill-summer.mp3",
         "Mellow Vibes Radio"
+    ],
+    "silent": [
+        "",
+        "silent.mp3",
+        "local"
     ]
 }
diff --git a/utils/background_videos.json b/utils/background_videos.json
index 6e00992..a36d227 100644
--- a/utils/background_videos.json
+++ b/utils/background_videos.json
@@ -59,5 +59,11 @@
         "steep.mp4",
         "joel",
         "center"
+    ],
+    "black": [
+        "",
+        "black-background.mp4",
+        "local",
+        "center"
     ]
 }
diff --git a/video_creation/background.py b/video_creation/background.py
index aad552d..e1c6416 100644
--- a/video_creation/background.py
+++ b/video_creation/background.py
@@ -86,7 +86,7 @@ def download_background_video(background_config: Tuple[str, str, str, Any]):
     print_substep("Downloading the backgrounds videos... please be patient 🙏 ")
     print_substep(f"Downloading {filename} from {uri}")
     ydl_opts = {
-        "format": "bestvideo[height<=1080][ext=mp4]",
+        "format": "best[height<=1080][ext=mp4]/best[height<=1080]",
         "outtmpl": f"assets/backgrounds/video/{credit}-{filename}",
         "retries": 10,
     }
diff --git a/video_creation/final_video.py b/video_creation/final_video.py
index ea82683..771c3a1 100644
--- a/video_creation/final_video.py
+++ b/video_creation/final_video.py
@@ -1,15 +1,17 @@
+import json
 import multiprocessing
 import os
 import re
+import subprocess
 import tempfile
 import textwrap
 import threading
 import time
-from os.path import exists  # Needs to be imported specifically
+from os.path import exists
 from pathlib import Path
 from typing import Dict, Final, Tuple
 
-import ffmpeg
+import av
 import translators
 from PIL import Image, ImageDraw, ImageFont
 from rich.console import Console
@@ -26,7 +28,27 @@ from utils.videos import save_data
 console = Console()
 
 
+def _probe_duration(path: str) -> float:
+    """Get media duration in seconds using PyAV."""
+    with av.open(path) as container:
+        stream = container.streams[0]
+        return float(stream.duration * stream.time_base)
+
+
+def _run_ffmpeg(args: list[str], description: str = "") -> None:
+    """Run ffmpeg subprocess with error handling."""
+    result = subprocess.run(
+        ["ffmpeg", "-y"] + args,
+        capture_output=True,
+    )
+    if result.returncode != 0:
+        stderr = result.stderr.decode("utf-8", errors="replace")
+        raise RuntimeError(f"ffmpeg {description} failed: {stderr[-500:]}")
+
+
 class ProgressFfmpeg(threading.Thread):
+    """Thread that reads ffmpeg progress via a named pipe during encoding."""
+
     def __init__(self, vid_duration_seconds, progress_update_callback):
         threading.Thread.__init__(self, name="ProgressFfmpeg")
         self.stop_event = threading.Event()
@@ -36,24 +58,24 @@ class ProgressFfmpeg(threading.Thread):
 
     def run(self):
         while not self.stop_event.is_set():
-            latest_progress = self.get_latest_ms_progress()
+            latest_progress = self._get_latest_ms_progress()
             if latest_progress is not None:
                 completed_percent = latest_progress / self.vid_duration_seconds
-                self.progress_update_callback(completed_percent)
+                self.progress_update_callback(min(completed_percent, 1.0))
             time.sleep(1)
 
-    def get_latest_ms_progress(self):
-        lines = self.output_file.readlines()
-
+    def _get_latest_ms_progress(self):
+        try:
+            with open(self.output_file.name) as f:
+                lines = f.readlines()
+        except (IOError, OSError):
+            return None
         if lines:
             for line in lines:
                 if "out_time_ms" in line:
-                    out_time_ms_str = line.split("=")[1].strip()
-                    if out_time_ms_str.isnumeric():
-                        return float(out_time_ms_str) / 1000000.0
-                    else:
-                        # Handle the case when "N/A" is encountered
-                        return None
+                    val = line.split("=")[1].strip()
+                    if val.isnumeric():
+                        return float(val) / 1000000.0
         return None
 
     def stop(self):
@@ -79,34 +101,22 @@ def name_normalize(name: str) -> str:
             settings.config.get("reddit", {}).get("thread", {}).get("post_lang", ""))
     if lang:
         print_substep("Translating filename...")
-        translated_name = translators.translate_text(name, translator="google", to_language=lang)
-        return translated_name
-    else:
-        return name
+        return translators.translate_text(name, translator="google", to_language=lang)
+    return name
 
 
 def prepare_background(reddit_id: str, W: int, H: int) -> str:
+    """Crop background video to match target aspect ratio, re-encode without audio."""
+    input_path = f"assets/temp/{reddit_id}/background.mp4"
     output_path = f"assets/temp/{reddit_id}/background_noaudio.mp4"
-    output = (
-        ffmpeg.input(f"assets/temp/{reddit_id}/background.mp4")
-        .filter("crop", f"ih*({W}/{H})", "ih")
-        .output(
-            output_path,
-            an=None,
-            **{
-                "c:v": "h264_nvenc",
-                "b:v": "20M",
-                "b:a": "192k",
-                "threads": multiprocessing.cpu_count(),
-            },
-        )
-        .overwrite_output()
-    )
-    try:
-        output.run(quiet=True)
-    except ffmpeg.Error as e:
-        print(e.stderr.decode("utf8"))
-        exit(1)
+    _run_ffmpeg([
+        "-i", input_path,
+        "-vf", f"crop=ih*({W}/{H}):ih,scale={W}:{H}",
+        "-c:v", "libx264", "-b:v", "20M",
+        "-an",
+        "-threads", str(multiprocessing.cpu_count()),
+        output_path,
+    ], "prepare_background")
     return output_path
 
 
@@ -120,51 +130,38 @@ def get_text_height(draw, text, font, max_width):
 
 
 def create_fancy_thumbnail(image, text, text_color, padding, wrap=35):
-    """
-    It will take the 1px from the middle of the template and will be resized (stretched) vertically to accommodate the extra height needed for the title.
-    """
     print_step(f"Creating fancy thumbnail for: {text}")
     font_title_size = 47
     font = ImageFont.truetype(os.path.join("fonts", "Roboto-Bold.ttf"), font_title_size)
     image_width, image_height = image.size
 
-    # Calculate text height to determine new image height
     draw = ImageDraw.Draw(image)
     text_height = get_text_height(draw, text, font, wrap)
     lines = textwrap.wrap(text, width=wrap)
-    # This is -50 to reduce the empty space at the bottom of the image,
-    # change it as per your requirement if needed otherwise leave it.
     new_image_height = image_height + text_height + padding * (len(lines) - 1) - 50
 
-    # Separate the image into top, middle (1px), and bottom parts
     top_part_height = image_height // 2
-    middle_part_height = 1  # 1px height middle section
+    middle_part_height = 1
     bottom_part_height = image_height - top_part_height - middle_part_height
 
     top_part = image.crop((0, 0, image_width, top_part_height))
     middle_part = image.crop((0, top_part_height, image_width, top_part_height + middle_part_height))
     bottom_part = image.crop((0, top_part_height + middle_part_height, image_width, image_height))
 
-    # Stretch the middle part
     new_middle_height = new_image_height - top_part_height - bottom_part_height
     middle_part = middle_part.resize((image_width, new_middle_height))
 
-    # Create new image with the calculated height
     new_image = Image.new("RGBA", (image_width, new_image_height))
-
-    # Paste the top, stretched middle, and bottom parts into the new image
     new_image.paste(top_part, (0, 0))
     new_image.paste(middle_part, (0, top_part_height))
     new_image.paste(bottom_part, (0, top_part_height + new_middle_height))
 
-    # Draw the title text on the new image
     draw = ImageDraw.Draw(new_image)
     y = top_part_height + padding
     for line in lines:
         draw.text((120, y), line, font=font, fill=text_color, align="left")
         y += get_text_height(draw, line, font, wrap) + padding
 
-    # Draw the username "PlotPulse" at the specific position
     username_font = ImageFont.truetype(os.path.join("fonts", "Roboto-Bold.ttf"), 30)
     draw.text(
         (205, 825),
@@ -173,28 +170,72 @@ def create_fancy_thumbnail(image, text, text_color, padding, wrap=35):
         fill=text_color,
         align="left",
     )
-
     return new_image
 
 
-def merge_background_audio(audio: ffmpeg, reddit_id: str):
-    """Gather an audio and merge with assets/backgrounds/background.mp3
-    Args:
-        audio (ffmpeg): The TTS final audio but without background.
-        reddit_id (str): The ID of subreddit
-    """
+def merge_background_audio(tts_audio_path: str, reddit_id: str) -> str:
+    """Mix background audio into the TTS audio. Returns path to the mixed file."""
     background_audio_volume = settings.config["settings"]["background"]["background_audio_volume"]
     if background_audio_volume == 0:
-        return audio  # Return the original audio
-    else:
-        # sets volume to config
-        bg_audio = ffmpeg.input(f"assets/temp/{reddit_id}/background.mp3").filter(
-            "volume",
-            background_audio_volume,
+        return tts_audio_path
+
+    output_path = f"assets/temp/{reddit_id}/audio_mixed.mp3"
+    bg_audio_path = f"assets/temp/{reddit_id}/background.mp3"
+    _run_ffmpeg([
+        "-i", tts_audio_path,
+        "-i", bg_audio_path,
+        "-filter_complex",
+        f"[1:a]volume={background_audio_volume}[bga];[0:a][bga]amix=inputs=2:duration=longest",
+        "-b:a", "192k",
+        output_path,
+    ], "audio_mix")
+    return output_path
+
+
+def _build_audio_concat_list(input_paths: list[str], list_path: str) -> None:
+    """Write a ffmpeg concat demuxer file list."""
+    with open(list_path, "w") as f:
+        for p in input_paths:
+            f.write(f"file '{os.path.abspath(p)}'\n")
+
+
+def _build_overlay_filter_complex(overlay_items: list[dict], W: int, H: int) -> str:
+    """Build a ffmpeg filter_complex string for overlaying images on background.
+
+    Each overlay item: {path, start_time, duration, opacity, scale_w, scale_h}
+    """
+    parts = []
+    prev_label = "0:v"  # background is the first input
+
+    for i, item in enumerate(overlay_items):
+        ov_label = f"ov{i}"
+        scaled_label = f"sc{i}"
+        faded_label = f"fd{i}"
+
+        # Scale the overlay image
+        parts.append(
+            f"[{i + 1}:v]scale={item['scale_w']}:{item['scale_h']}[{scaled_label}];"
+        )
+        # Set opacity
+        parts.append(
+            f"[{scaled_label}]colorchannelmixer=aa={item['opacity']}[{faded_label}];"
         )
-        # Merges audio and background_audio
-        merged_audio = ffmpeg.filter([audio, bg_audio], "amix", duration="longest")
-        return merged_audio  # Return merged audio
+        # Overlay with timing
+        enable = f"between(t,{item['start_time']},{item['start_time'] + item['duration']})"
+        next_label = f"out{i}" if i < len(overlay_items) - 1 else "outv"
+        parts.append(
+            f"[{prev_label}][{faded_label}]overlay="
+            f"x=(main_w-overlay_w)/2:y=(main_h-overlay_h)/2:"
+            f"enable='{enable}'[{next_label}]"
+        )
+        if i < len(overlay_items) - 1:
+            parts.append(";")
+        ov_label = ov_label  # unused, keeps naming consistent
+        prev_label = next_label
+
+    # Final scale
+    parts.append(f";[{prev_label}]scale={W}:{H}[final]")
+    return "".join(parts)
 
 
 def make_final_video(
@@ -203,19 +244,10 @@ def make_final_video(
     reddit_obj: dict,
     background_config: Dict[str, Tuple],
 ):
-    """Gathers audio clips, gathers all screenshots, stitches them together and saves the final video to assets/temp
-    Args:
-        number_of_clips (int): Index to end at when going through the screenshots'
-        length (int): Length of the video
-        reddit_obj (dict): The reddit object that contains the posts to read.
-        background_config (Tuple[str, str, str, Any]): The background config to use.
-    """
-    # settings values
+    """Gathers audio clips, stitches screenshots together, encodes final video."""
     W: Final[int] = int(settings.config["settings"]["resolution_w"])
     H: Final[int] = int(settings.config["settings"]["resolution_h"])
-
     opacity = settings.config["settings"]["opacity"]
-
     reddit_id = extract_id(reddit_obj)
 
     allowOnlyTTSFolder: bool = (
@@ -225,141 +257,125 @@ def make_final_video(
 
     print_step("Creating the final video 🎥")
 
-    background_clip = ffmpeg.input(prepare_background(reddit_id, W=W, H=H))
+    # --- Step 1: Prepare background ---
+    background_path = prepare_background(reddit_id, W=W, H=H)
 
-    # Gather all audio clips
-    audio_clips = list()
-    if number_of_clips == 0 and settings.config["settings"]["storymode"] == "false":
-        print(
-            "No audio clips to gather. Please use a different TTS or post."
-        )  # This is to fix the TypeError: unsupported operand type(s) for +: 'int' and 'NoneType'
+    # --- Step 2: Concatenate all TTS audio clips ---
+    audio_clip_paths = []
+    if number_of_clips == 0 and not settings.config["settings"]["storymode"]:
+        print("No audio clips to gather. Please use a different TTS or post.")
         exit()
+
     if settings.config["settings"]["storymode"]:
         if settings.config["settings"]["storymodemethod"] == 0:
-            audio_clips = [ffmpeg.input(f"assets/temp/{reddit_id}/mp3/title.mp3")]
-            audio_clips.insert(1, ffmpeg.input(f"assets/temp/{reddit_id}/mp3/postaudio.mp3"))
-        elif settings.config["settings"]["storymodemethod"] == 1:
-            audio_clips = [
-                ffmpeg.input(f"assets/temp/{reddit_id}/mp3/postaudio-{i}.mp3")
-                for i in track(range(number_of_clips + 1), "Collecting the audio files...")
+            audio_clip_paths = [
+                f"assets/temp/{reddit_id}/mp3/title.mp3",
+                f"assets/temp/{reddit_id}/mp3/postaudio.mp3",
             ]
-            audio_clips.insert(0, ffmpeg.input(f"assets/temp/{reddit_id}/mp3/title.mp3"))
-
+        else:
+            audio_clip_paths = [f"assets/temp/{reddit_id}/mp3/title.mp3"]
+            for i in range(number_of_clips + 1):
+                audio_clip_paths.append(f"assets/temp/{reddit_id}/mp3/postaudio-{i}.mp3")
     else:
-        audio_clips = [
-            ffmpeg.input(f"assets/temp/{reddit_id}/mp3/{i}.mp3") for i in range(number_of_clips)
-        ]
-        audio_clips.insert(0, ffmpeg.input(f"assets/temp/{reddit_id}/mp3/title.mp3"))
-
-        audio_clips_durations = [
-            float(ffmpeg.probe(f"assets/temp/{reddit_id}/mp3/{i}.mp3")["format"]["duration"])
-            for i in range(number_of_clips)
-        ]
-        audio_clips_durations.insert(
-            0,
-            float(ffmpeg.probe(f"assets/temp/{reddit_id}/mp3/title.mp3")["format"]["duration"]),
-        )
-    audio_concat = ffmpeg.concat(*audio_clips, a=1, v=0)
-    ffmpeg.output(
-        audio_concat, f"assets/temp/{reddit_id}/audio.mp3", **{"b:a": "192k"}
-    ).overwrite_output().run(quiet=True)
+        audio_clip_paths = [f"assets/temp/{reddit_id}/mp3/title.mp3"]
+        for i in range(number_of_clips):
+            audio_clip_paths.append(f"assets/temp/{reddit_id}/mp3/{i}.mp3")
+
+    existing = [p for p in audio_clip_paths if os.path.exists(p)]
+    concat_audio_path = f"assets/temp/{reddit_id}/audio.mp3"
+    concat_list_path = concat_audio_path + ".concat.txt"
+    _build_audio_concat_list(existing, concat_list_path)
+    _run_ffmpeg([
+        "-f", "concat", "-safe", "0", "-i", concat_list_path,
+        "-b:a", "192k", concat_audio_path,
+    ], "audio_concat")
+    os.unlink(concat_list_path)
+
+    # Probe durations
+    audio_clips_durations = [_probe_duration(p) for p in existing]
+
+    # --- Step 3: Mix background audio ---
+    mixed_audio_path = merge_background_audio(concat_audio_path, reddit_id)
 
     console.log(f"[bold green] Video Will Be: {length} Seconds Long")
 
+    # --- Step 4: Build overlay items ---
     screenshot_width = int((W * 45) // 100)
-    audio = ffmpeg.input(f"assets/temp/{reddit_id}/audio.mp3")
-    final_audio = merge_background_audio(audio, reddit_id)
-
-    image_clips = list()
-
     Path(f"assets/temp/{reddit_id}/png").mkdir(parents=True, exist_ok=True)
 
-    # Credits to tim (beingbored)
-    # get the title_template image and draw a text in the middle part of it with the title of the thread
     title_template = Image.open("assets/title_template.png")
-
     title = reddit_obj["thread_title"]
-
     title = name_normalize(title)
+    title_img = create_fancy_thumbnail(title_template, title, "#000000", 5)
+    title_img.save(f"assets/temp/{reddit_id}/png/title.png")
 
-    font_color = "#000000"
-    padding = 5
-
-    # create_fancy_thumbnail(image, text, text_color, padding
-    title_img = create_fancy_thumbnail(title_template, title, font_color, padding)
+    overlay_items = []
+    current_time = 0.0
 
-    title_img.save(f"assets/temp/{reddit_id}/png/title.png")
-    image_clips.insert(
-        0,
-        ffmpeg.input(f"assets/temp/{reddit_id}/png/title.png")["v"].filter(
-            "scale", screenshot_width, -1
-        ),
-    )
+    overlay_items.append({
+        "path": f"assets/temp/{reddit_id}/png/title.png",
+        "start_time": current_time,
+        "duration": audio_clips_durations[0],
+        "opacity": opacity,
+        "scale_w": screenshot_width,
+        "scale_h": -1,
+    })
+    current_time += audio_clips_durations[0]
 
-    current_time = 0
     if settings.config["settings"]["storymode"]:
-        audio_clips_durations = [
-            float(
-                ffmpeg.probe(f"assets/temp/{reddit_id}/mp3/postaudio-{i}.mp3")["format"]["duration"]
-            )
-            for i in range(number_of_clips)
-        ]
-        audio_clips_durations.insert(
-            0,
-            float(ffmpeg.probe(f"assets/temp/{reddit_id}/mp3/title.mp3")["format"]["duration"]),
-        )
         if settings.config["settings"]["storymodemethod"] == 0:
-            image_clips.insert(
-                1,
-                ffmpeg.input(f"assets/temp/{reddit_id}/png/story_content.png").filter(
-                    "scale", screenshot_width, -1
-                ),
-            )
-            background_clip = background_clip.overlay(
-                image_clips[0],
-                enable=f"between(t,{current_time},{current_time + audio_clips_durations[0]})",
-                x="(main_w-overlay_w)/2",
-                y="(main_h-overlay_h)/2",
-            )
-            current_time += audio_clips_durations[0]
+            story_path = f"assets/temp/{reddit_id}/png/story_content.png"
+            if os.path.exists(story_path):
+                overlay_items.append({
+                    "path": story_path,
+                    "start_time": current_time,
+                    "duration": audio_clips_durations[1] if len(audio_clips_durations) > 1 else 5,
+                    "opacity": opacity,
+                    "scale_w": screenshot_width,
+                    "scale_h": -1,
+                })
         elif settings.config["settings"]["storymodemethod"] == 1:
-            for i in track(range(0, number_of_clips + 1), "Collecting the image files..."):
-                image_clips.append(
-                    ffmpeg.input(f"assets/temp/{reddit_id}/png/img{i}.png")["v"].filter(
-                        "scale", screenshot_width, -1
-                    )
-                )
-                background_clip = background_clip.overlay(
-                    image_clips[i],
-                    enable=f"between(t,{current_time},{current_time + audio_clips_durations[i]})",
-                    x="(main_w-overlay_w)/2",
-                    y="(main_h-overlay_h)/2",
-                )
-                current_time += audio_clips_durations[i]
+            for i in range(number_of_clips + 1):
+                img_path = f"assets/temp/{reddit_id}/png/img{i}.png"
+                if not os.path.exists(img_path):
+                    continue
+                dur_idx = i + 1
+                if dur_idx >= len(audio_clips_durations):
+                    break
+                overlay_items.append({
+                    "path": img_path,
+                    "start_time": current_time,
+                    "duration": audio_clips_durations[dur_idx],
+                    "opacity": opacity,
+                    "scale_w": screenshot_width,
+                    "scale_h": -1,
+                })
+                current_time += audio_clips_durations[dur_idx]
     else:
-        for i in range(0, number_of_clips + 1):
-            image_clips.append(
-                ffmpeg.input(f"assets/temp/{reddit_id}/png/comment_{i}.png")["v"].filter(
-                    "scale", screenshot_width, -1
-                )
-            )
-            image_overlay = image_clips[i].filter("colorchannelmixer", aa=opacity)
-            assert (
-                audio_clips_durations is not None
-            ), "Please make a GitHub issue if you see this. Ping @JasonLovesDoggo on GitHub."
-            background_clip = background_clip.overlay(
-                image_overlay,
-                enable=f"between(t,{current_time},{current_time + audio_clips_durations[i]})",
-                x="(main_w-overlay_w)/2",
-                y="(main_h-overlay_h)/2",
-            )
+        for i in range(number_of_clips + 1):
+            img_path = f"assets/temp/{reddit_id}/png/comment_{i}.png"
+            if not os.path.exists(img_path):
+                continue
+            if i >= len(audio_clips_durations):
+                break
+            overlay_items.append({
+                "path": img_path,
+                "start_time": current_time,
+                "duration": audio_clips_durations[i],
+                "opacity": opacity,
+                "scale_w": screenshot_width,
+                "scale_h": -1,
+            })
             current_time += audio_clips_durations[i]
 
-    title = extract_id(reddit_obj, "thread_title")
+    # --- Step 5: Build filter_complex and render ---
+    filter_complex = _build_overlay_filter_complex(overlay_items, W, H)
+
+    title_clean = extract_id(reddit_obj, "thread_title")
     idx = extract_id(reddit_obj)
     title_thumb = reddit_obj["thread_title"]
+    filename = f"{name_normalize(title_clean)[:251]}"
 
-    filename = f"{name_normalize(title)[:251]}"
     platform = settings.config["settings"].get("platform", "reddit")
     if platform == "reddit":
         subreddit = settings.config["reddit"]["thread"]["subreddit"]
@@ -371,58 +387,36 @@ def make_final_video(
         os.makedirs(f"./results/{subreddit}")
 
     if not exists(f"./results/{subreddit}/OnlyTTS") and allowOnlyTTSFolder:
-        print_substep("The 'OnlyTTS' folder could not be found so it was automatically created.")
         os.makedirs(f"./results/{subreddit}/OnlyTTS")
 
-    # create a thumbnail for the video
+    # Thumbnail
     settingsbackground = settings.config["settings"]["background"]
-
     if settingsbackground["background_thumbnail"]:
         if not exists(f"./results/{subreddit}/thumbnails"):
-            print_substep(
-                "The 'results/thumbnails' folder could not be found so it was automatically created."
-            )
             os.makedirs(f"./results/{subreddit}/thumbnails")
-        # get the first file with the .png extension from assets/backgrounds and use it as a background for the thumbnail
         first_image = next(
-            (file for file in os.listdir("assets/backgrounds") if file.endswith(".png")),
+            (f for f in os.listdir("assets/backgrounds") if f.endswith(".png")),
             None,
         )
-        if first_image is None:
-            print_substep("No png files found in assets/backgrounds", "red")
-
-        else:
+        if first_image:
             font_family = settingsbackground["background_thumbnail_font_family"]
             font_size = settingsbackground["background_thumbnail_font_size"]
             font_color = settingsbackground["background_thumbnail_font_color"]
             thumbnail = Image.open(f"assets/backgrounds/{first_image}")
             width, height = thumbnail.size
             thumbnailSave = create_thumbnail(
-                thumbnail,
-                font_family,
-                font_size,
-                font_color,
-                width,
-                height,
-                title_thumb,
+                thumbnail, font_family, font_size, font_color, width, height, title_thumb,
             )
             thumbnailSave.save(f"./assets/temp/{reddit_id}/thumbnail.png")
             print_substep(f"Thumbnail - Building Thumbnail in assets/temp/{reddit_id}/thumbnail.png")
 
-    text = f"Background by {background_config['video'][2]}"
-    background_clip = ffmpeg.drawtext(
-        background_clip,
-        text=text,
-        x=f"(w-text_w)",
-        y=f"(h-text_h)",
-        fontsize=5,
-        fontcolor="White",
-        fontfile=os.path.join("fonts", "Roboto-Regular.ttf"),
-    )
-    background_clip = background_clip.filter("scale", W, H)
+    # --- Step 6: Render ---
+    defaultPath = f"results/{subreddit}"
+    video_output_path = defaultPath + f"/{filename}"
+    video_output_path = video_output_path[:251] + ".mp4"
+
     print_step("Rendering the video 🎥")
     from tqdm import tqdm
-
     pbar = tqdm(total=100, desc="Progress: ", bar_format="{l_bar}{bar}", unit=" %")
 
     def on_update_example(progress) -> None:
@@ -430,68 +424,70 @@ def make_final_video(
         old_percentage = pbar.n
         pbar.update(status - old_percentage)
 
-    defaultPath = f"results/{subreddit}"
+    # Build ffmpeg command: background + overlay images → filter_complex → video only
+    ffmpeg_inputs = ["-i", background_path]
+    for item in overlay_items:
+        ffmpeg_inputs.extend(["-i", item["path"]])
+
     with ProgressFfmpeg(length, on_update_example) as progress:
-        path = defaultPath + f"/{filename}"
-        path = (
-            path[:251] + ".mp4"
-        )  # Prevent a error by limiting the path length, do not change this.
-        try:
-            ffmpeg.output(
-                background_clip,
-                final_audio,
-                path,
-                f="mp4",
-                **{
-                    "c:v": "h264_nvenc",
-                    "b:v": "20M",
-                    "b:a": "192k",
-                    "threads": multiprocessing.cpu_count(),
-                },
-            ).overwrite_output().global_args("-progress", progress.output_file.name).run(
-                quiet=True,
-                overwrite_output=True,
-                capture_stdout=False,
-                capture_stderr=False,
-            )
-        except ffmpeg.Error as e:
-            print(e.stderr.decode("utf8"))
-            exit(1)
+        # First pass: render video with overlays (no audio)
+        video_only_path = video_output_path + ".video.mp4"
+        _run_ffmpeg(
+            ffmpeg_inputs + [
+                "-filter_complex", filter_complex,
+                "-map", "[final]",
+                "-c:v", "libx264", "-b:v", "20M",
+                "-pix_fmt", "yuv420p",
+                "-threads", str(multiprocessing.cpu_count()),
+                "-progress", progress.output_file.name,
+                video_only_path,
+            ],
+            "overlay_render"
+        )
+
+    # Second pass: mux video with audio
+    _run_ffmpeg([
+        "-i", video_only_path,
+        "-i", mixed_audio_path,
+        "-c:v", "copy", "-c:a", "aac", "-b:a", "192k",
+        "-shortest", "-map", "0:v:0", "-map", "1:a:0",
+        video_output_path,
+    ], "audio_mux")
+    os.unlink(video_only_path)
+
     old_percentage = pbar.n
     pbar.update(100 - old_percentage)
+
+    # OnlyTTS variant
     if allowOnlyTTSFolder:
-        path = defaultPath + f"/OnlyTTS/{filename}"
-        path = (
-            path[:251] + ".mp4"
-        )  # Prevent a error by limiting the path length, do not change this.
+        only_tts_path = defaultPath + f"/OnlyTTS/{filename}"
+        only_tts_path = only_tts_path[:251] + ".mp4"
+        only_tts_video = only_tts_path + ".video.mp4"
         print_step("Rendering the Only TTS Video 🎥")
-        with ProgressFfmpeg(length, on_update_example) as progress:
-            try:
-                ffmpeg.output(
-                    background_clip,
-                    audio,
-                    path,
-                    f="mp4",
-                    **{
-                        "c:v": "h264_nvenc",
-                        "b:v": "20M",
-                        "b:a": "192k",
-                        "threads": multiprocessing.cpu_count(),
-                    },
-                ).overwrite_output().global_args("-progress", progress.output_file.name).run(
-                    quiet=True,
-                    overwrite_output=True,
-                    capture_stdout=False,
-                    capture_stderr=False,
-                )
-            except ffmpeg.Error as e:
-                print(e.stderr.decode("utf8"))
-                exit(1)
+        with ProgressFfmpeg(length, on_update_example) as progress2:
+            _run_ffmpeg(
+                ffmpeg_inputs + [
+                    "-filter_complex", filter_complex,
+                    "-map", "[final]",
+                    "-c:v", "libx264", "-b:v", "20M",
+                    "-pix_fmt", "yuv420p",
+                    "-threads", str(multiprocessing.cpu_count()),
+                    "-progress", progress2.output_file.name,
+                    only_tts_video,
+                ],
+                "only_tts_render"
+            )
+        _run_ffmpeg([
+            "-i", only_tts_video,
+            "-i", concat_audio_path,
+            "-c:v", "copy", "-c:a", "aac", "-b:a", "192k",
+            "-shortest", "-map", "0:v:0", "-map", "1:a:0",
+            only_tts_path,
+        ], "only_tts_mux")
+        os.unlink(only_tts_video)
 
-        old_percentage = pbar.n
-        pbar.update(100 - old_percentage)
     pbar.close()
-    save_data(subreddit, filename + ".mp4", title, idx, background_config["video"][2])
+    save_data(subreddit, filename + ".mp4", title_clean, idx, background_config["video"][2])
     print_step("Removing temporary files 🗑")
     cleanups = cleanup(reddit_id)
     print_substep(f"Removed {cleanups} temporary files 🗑")