From faaaa85be8fdd09ce1dd1c9dfeb0da2fc2921055 Mon Sep 17 00:00:00 2001 From: Hong Phuc Date: Tue, 5 May 2026 17:29:52 +0700 Subject: [PATCH] feat: video creation dashboard with real-time progress tracking Add /create page with pipeline stage polling, /video/ route for safe file serving, modernized Tailwind/DaisyUI UI, and pytest regression tests. Consolidate AGENT.md + AGENTS.md into CLAUDE.md. Co-Authored-By: Claude Opus 4.7 --- AGENT.md | 392 -------------- AGENTS.md | 457 ---------------- CLAUDE.md | 143 ++++-- Dockerfile | 2 +- GUI.py | 166 +++++- GUI/backgrounds.html | 412 +++++++-------- GUI/create.html | 245 +++++++++ GUI/index.html | 458 ++++++++++++----- GUI/layout.html | 215 ++++---- GUI/settings.html | 972 +++++++++++++++-------------------- README.md | 30 +- docker-compose.yml | 12 + tests/test_gui_utils.py | 72 +++ utils/background_videos.json | 2 +- utils/console.py | 11 + utils/gui_utils.py | 127 +++-- 16 files changed, 1711 insertions(+), 2005 deletions(-) delete mode 100644 AGENT.md delete mode 100644 AGENTS.md create mode 100644 GUI/create.html create mode 100644 tests/test_gui_utils.py diff --git a/AGENT.md b/AGENT.md deleted file mode 100644 index bc6dedb..0000000 --- a/AGENT.md +++ /dev/null @@ -1,392 +0,0 @@ -# AGENT.md — Guidance for Agents & AI Working on VideoMakerBot - -This document guides **agents, bots, and AI assistants** on how to work effectively with the VideoMakerBot codebase. - ---- - -## Quick Start for Agents - -### Core Principle -**VideoMakerBot uses a platform-agnostic factory pattern.** Always respect the abstraction: -- Don't import platform-specific modules (reddit/, threads/) directly -- Always use `platforms/__init__.py` factory functions -- Keep platform-specific logic in `platforms/{platform}/` - -### The "Do This" Checklist -1. ✅ Read existing CLAUDE.md for architecture context -2. ✅ Use factory: `from platforms import get_content_object, get_screenshot_fn` -3. ✅ Return standard `content_object` dict from all fetchers -4. ✅ Test both Reddit and Threads modes before declaring completion -5. ✅ Use config fallback chains for cross-platform keys -6. ✅ Document platform-specific logic in docstrings - -### The "Don't Do This" List -1. ❌ Import `reddit.subreddit` directly in main.py or generic modules -2. ❌ Hardcode subreddit/platform names in core video pipeline -3. ❌ Add platform-specific selectors outside `platforms/{platform}/` -4. ❌ Assume config keys exist without `.get()` and fallbacks -5. ❌ Modify screenshot_downloader.py for non-Reddit platforms - ---- - -## Understanding the Codebase Structure - -### Entry Point -**`main.py`** — Single CLI entry point using platform factory -- Calls `get_content_object(POST_ID)` from factory -- Calls `get_screenshot_fn()` from factory -- Everything else is platform-agnostic - -### Platform Layer (`platforms/`) -- **`__init__.py`** — Factory dispatch functions (add new platforms here) -- **`threads/fetcher.py`** — Threads Graph API client (returns standard dict) -- **`threads/screenshot.py`** — Threads.net Playwright screenshotter - -### Legacy Platform (`reddit/`) -- **`subreddit.py`** — PRAW API client (returns standard dict) -- No changes needed; called via factory - -### Video Pipeline (`video_creation/`) -- **`final_video.py`** — FFmpeg composition (platform-aware output folder only) -- **`screenshot_downloader.py`** — Reddit Playwright screenshotter (not called for Threads) -- **`voices.py`** — TTS orchestration (platform-agnostic) -- **`background.py`** — Video/audio download (platform-agnostic) - -### TTS Layer (`TTS/`) -- **`engine_wrapper.py`** — Provider abstraction (handles `post_lang` fallback) -- **`*.py`** — Individual provider implementations (elevenlabs, aws_polly, etc.) - -### Config & Utils (`utils/`) -- **`settings.py`** — TOML config loading & validation -- **`videos.py`** — Dedup tracking (`check_done()` + `check_done_by_id()`) -- **`.config.template.toml`** — Config schema with `[settings]`, `[reddit.*]`, `[threads.*]`, `[ai]` - ---- - -## How to Approach Common Tasks - -### Adding a New Social Platform (e.g., X/Twitter) - -**Steps:** -1. Create `platforms/twitter/fetcher.py`: - ```python - def get_twitter_content(POST_ID=None) -> dict: - """Fetch post + replies, return standard content_object.""" - # Implement API fetching logic here - return { - "thread_id": ..., - "thread_category": "twitter", # NEW: generic field for output folder - "thread_title": ..., - "thread_url": ..., - "comments": [...] - } - ``` - -2. Create `platforms/twitter/screenshot.py`: - ```python - def get_screenshots_of_twitter_posts(content_object: dict, screenshot_num: int): - """Use Playwright to screenshot X/Twitter posts.""" - # Implement Playwright logic here - ``` - -3. Update `platforms/__init__.py`: - ```python - elif platform == "twitter": - from platforms.twitter.fetcher import get_twitter_content - return get_twitter_content(POST_ID) - ``` - -4. Add config section to `utils/.config.template.toml`: - ```toml - [twitter.creds] - api_key = { ... } - api_secret = { ... } - - [twitter.thread] - post_id = { ... } - ``` - -5. Update `main.py` helper: - ```python - elif platform == "twitter": - return config.get("twitter", {}).get("thread", {}).get("post_id", "") - ``` - -6. **Zero changes needed to:** TTS, backgrounds, video composition, utils. - -**Verification:** -```bash -# Test Reddit (regression check) -sed -i 's/platform = "twitter"/platform = "reddit"/' config.toml -python3 main.py -# Verify results/{subreddit}/ output - -# Test Twitter -sed -i 's/platform = "reddit"/platform = "twitter"/' config.toml -python3 main.py --post-id -# Verify results/twitter/ output -``` - ---- - -### Modifying the Video Pipeline - -**Scenario:** You need to change FFmpeg composition or add a new processing step. - -**Approach:** -1. Check which data the modified code consumes (`content_object` dict) -2. Verify it works with both Reddit and Threads content structures -3. If platform-specific: move logic to `platforms/{platform}/` -4. If generic: keep in `video_creation/` -5. Test both modes before merging - -**Example:** Adding video filters -```python -# In final_video.py (generic, works for all platforms) -def apply_filter(video_clip, filter_type): - # No platform-specific logic here - return video_clip.filter(...) - -# Test: -# - Reddit mode produces filtered video -# - Threads mode produces filtered video -``` - ---- - -### Fixing a Bug in Config Handling - -**Scenario:** `post_lang` is not being applied correctly. - -**Debug Path:** -1. Check `utils/settings.py` — how is config loaded? -2. Check `TTS/engine_wrapper.py:182` — uses fallback chain: - ```python - lang = (settings.config["settings"].get("post_lang") or - settings.config.get("reddit", {}).get("thread", {}).get("post_lang", "")) - ``` -3. Check `video_creation/final_video.py:78` — same fallback logic -4. If still broken: verify `utils/.config.template.toml` has the key defined -5. Test both platforms with `post_lang = "es"` in config - ---- - -### Adding Support for a New TTS Provider - -**Scenario:** User wants Whisper TTS support. - -**Steps:** -1. Create `TTS/whisper_tts.py`: - ```python - class WhisperTTS: - def make_voice(self, text): - # Call Whisper API - return audio_bytes - ``` - -2. Update `TTS/engine_wrapper.py:make_voice()`: - ```python - elif voice_choice == "whisper": - from TTS.whisper_tts import WhisperTTS - return WhisperTTS().make_voice(text) - ``` - -3. Add config to `utils/.config.template.toml`: - ```toml - [settings.tts] - whisper_api_key = { optional = true, ... } - ``` - -4. Test: - ```bash - # In config.toml: - voice_choice = "whisper" - # Run: python3 main.py - ``` - ---- - -## Common Pitfalls & How to Avoid Them - -### Pitfall 1: Platform-Specific Code in Generic Modules -**Problem:** -```python -# BAD: In video_creation/final_video.py -subreddit = settings.config["reddit"]["thread"]["subreddit"] -``` -**Will break** when platform = "threads" (no reddit.thread.subreddit). - -**Solution:** -```python -# GOOD: -platform = settings.config["settings"].get("platform", "reddit") -if platform == "reddit": - category = settings.config["reddit"]["thread"]["subreddit"] -else: - category = reddit_obj.get("thread_category", platform) -``` - -### Pitfall 2: Hardcoding Selectors in Platform-Agnostic Code -**Problem:** -```python -# BAD: In video_creation/voices.py -element = page.locator("#t1_{comment_id}") # Reddit-only selector! -``` -**Will fail** when running Threads mode (different DOM). - -**Solution:** -- Keep all Playwright logic in `platforms/{platform}/screenshot.py` -- Never hardcode selectors in generic modules - -### Pitfall 3: Forgetting to Test Both Modes -**Problem:** You change `final_video.py`, test with Reddit, declare done. -Threads mode breaks because you didn't test it. - -**Solution:** -```bash -# Test both before committing: -sed -i 's/platform = "threads"/platform = "reddit"/' config.toml -python3 main.py -# Check results/{subreddit}/ - -sed -i 's/platform = "reddit"/platform = "threads"/' config.toml -python3 main.py --post-id -# Check results/threads/ -``` - -### Pitfall 4: Assuming Config Keys Exist -**Problem:** -```python -# BAD: -lang = settings.config["reddit"]["thread"]["post_lang"] -``` -**Will crash** if key doesn't exist. - -**Solution:** -```python -# GOOD: -lang = (settings.config["settings"].get("post_lang") or - settings.config.get("reddit", {}).get("thread", {}).get("post_lang", "")) -``` - ---- - -## Code Review Checklist for Agents - -Before marking work complete, verify: - -- [ ] **No platform imports in main.py** — Uses factory only -- [ ] **Standard content_object dict** — All fetchers return same shape -- [ ] **Platform-specific logic isolated** — Only in `platforms/{platform}/` -- [ ] **Config fallback chains** — No hardcoded section names in generic code -- [ ] **Both modes tested** — Reddit AND Threads produce correct output -- [ ] **Docstrings updated** — New functions document platform assumptions -- [ ] **Error messages clear** — Include platform name + actionable guidance -- [ ] **Video dedup works** — No duplicate videos created - ---- - -## Understanding Data Flow - -### Happy Path: Fetch → TTS → Screenshot → Compose → Output - -``` -1. main.py:main() - └─→ platforms/__init__.py:get_content_object() - └─→ platforms/threads/fetcher.py:get_threads_content() - └─→ Returns: {thread_id, thread_title, comments, ...} - -2. video_creation/voices.py:save_text_to_mp3() - └─→ TTS/engine_wrapper.py:process_text() - └─→ TTS/engine_wrapper.py:make_voice() - └─→ TTS/{provider}.py: {elevenlabs,tiktok,etc} - └─→ Returns: audio_length, comment_count - -3. platforms/__init__.py:get_screenshot_fn() - └─→ platforms/threads/screenshot.py:get_screenshots_of_threads_posts() - └─→ Uses Playwright on threads.net - └─→ Saves: assets/temp/{thread_id}/png/{title,comment_0,etc}.png - -4. video_creation/background.py - └─→ download_background_video() & download_background_audio() - └─→ Uses yt-dlp to fetch YouTube videos/audio - └─→ Saves to: assets/temp/{thread_id}/{video,audio} - -5. video_creation/final_video.py:make_final_video() - └─→ Uses FFmpeg to compose everything - └─→ Reads: audio files, screenshot PNGs, background video - └─→ Writes: results/{thread_category}/{filename}.mp4 - -6. utils/videos.py:save_data() - └─→ Records video in videos.json for dedup -``` - -### Config Flow - -``` -config.toml (user settings) - ↓ -utils/settings.py:check_toml() - └─→ Validates against .config.template.toml schema - └─→ Returns: settings.config (dict) - - Used by: - ├─ main.py (platform selection) - ├─ platforms/reddit/ (subreddit, etc.) - ├─ platforms/threads/ (Graph API token, etc.) - ├─ TTS/engine_wrapper.py (post_lang fallback) - ├─ video_creation/ (theme, resolution, etc.) - └─ utils/videos.py (dedup behavior) -``` - ---- - -## Deployment Notes - -### Python Version -- **Minimum:** 3.10 -- **Tested:** 3.10, 3.11, 3.12 -- **Reason:** F-strings, type hints, modern async patterns - -### Critical Dependencies -- **reddit platform:** praw 7.8.1 (requires Reddit OAuth app) -- **threads platform:** requests (for Graph API calls) -- **screenshots:** playwright 1.49.1 (requires browser installation: `playwright install`) -- **video:** moviepy 2.2.1, ffmpeg-python 0.2.0 (requires FFmpeg system binary) -- **tts:** varies per provider (elevenlabs, aws_polly, openai, etc.) - -### Versions That Caused Issues -- **yt-dlp==2026.3.17** — Doesn't exist (use 2025.10.14 or latest stable) -- **playwright without browser install** — Will crash on first screenshot - ---- - -## When to Escalate - -### Escalate to User if: -- User needs new platform support (only they know requirements) -- Config changes affect backward compatibility -- Performance optimization needed (only user knows acceptable limits) -- Security concern (token handling, credential storage, etc.) - -### Safe to Implement as Agent: -- Bug fixes within existing architecture -- Adding new TTS providers -- Extending config options for existing platforms -- Performance optimizations (caching, parallelization) -- New filter/processing features that work platform-agnostically -- Documentation & refactoring - ---- - -## Final Guidance - -**Golden Rule:** The factory pattern is your friend. When in doubt, check if your change breaks the abstraction. If it does, rethink it. - -**Test Obsessively:** Always run both Reddit and Threads modes. The codebase is designed for multi-platform support, and it's easy to break one platform while fixing another. - -**Document Platform Assumptions:** If your code works differently for Reddit vs Threads, say so explicitly in docstrings and comments. - -**Ask Yourself:** "Would this work for X/Twitter?" If no, it probably belongs in `platforms/threads/`, not in generic code. - -Good luck, and happy contributing! 🎥 diff --git a/AGENTS.md b/AGENTS.md deleted file mode 100644 index cd1ed59..0000000 --- a/AGENTS.md +++ /dev/null @@ -1,457 +0,0 @@ -# AGENTS.md — VideoMakerBot Development Guide - -## Project Overview - -**VideoMakerBot** — Automated short-form video creator from social media content. - -**Status:** Production-ready, actively maintained (v3.4.0) -**Language:** Python 3.10+ -**Platforms:** Reddit (original), Threads (NEW), X/Twitter (planned) - -### Core Mission -Transforms social media threads (post + comments/replies) into complete short-form videos with: -- AI-generated speech (7+ TTS providers) -- UI screenshots (Playwright) -- Background video/audio overlays -- FFmpeg composition & output - ---- - -## Architecture at a Glance - -``` -main.py (CLI) - ↓ [platform factory] - ├─→ reddit/subreddit.py [PRAW API] - └─→ platforms/threads/fetcher.py [Graph API] - ↓ [standard data dict] - ├─→ TTS/engine_wrapper.py [7+ providers] - ├─→ screenshot_downloader.py (Reddit) - │ or platforms/threads/screenshot.py (Threads) - ├─→ video_creation/background.py - └─→ video_creation/final_video.py [FFmpeg] - ↓ - results/{category}/{video.mp4} -``` - -### Key Design: Platform Abstraction via Factory Pattern - -**Why:** Single codebase supports multiple platforms without tight coupling. - -**How:** `platforms/__init__.py` exports: -- `get_content_object(POST_ID=None)` — routes to right fetcher -- `get_screenshot_fn()` — routes to right screenshotter - -**Result:** Adding X/Twitter requires only: new module + config section + two `elif` branches. - ---- - -## Data Contract: The "content_object" Dict - -All fetchers return this shape (defined in `platforms/__init__.py`): - -```python -{ - # Unique identifiers - "thread_id": str, # Used for temp folder: assets/temp/{id}/ - "thread_category": str, # "reddit", "threads", etc. → output folder - - # Content - "thread_title": str, # TTS as title + output filename - "thread_url": str, # Playwright navigates here for screenshot - "is_nsfw": bool, # Content filter flag - - # Replies/Comments (mutually exclusive with thread_post) - "comments": [ - { - "comment_body": str, # TTS per reply - "comment_url": str, # Playwright navigates here - "comment_id": str, # CSS selector ID or unique identifier - } - ], - - # OR Story mode: - "thread_post": str | list, # Long-form text (no comments) -} -``` - -**Why:** Loose coupling—TTS, backgrounds, and video composition don't need platform-specific logic. - ---- - -## File Organization - -``` -VideoMakerBot/ -├── platforms/ # Multi-platform abstraction -│ ├── __init__.py # Factory: get_content_object(), get_screenshot_fn() -│ └── threads/ # Threads (Meta) implementation -│ ├── fetcher.py # Graph API → content_object -│ └── screenshot.py # Playwright Threads screenshotter -│ -├── reddit/ # Reddit implementation (kept as-is) -│ └── subreddit.py # PRAW API → content_object + thread_category -│ -├── video_creation/ -│ ├── final_video.py # FFmpeg composition (platform-aware folder naming) -│ ├── screenshot_downloader.py # Playwright Reddit UI capturer -│ ├── voices.py # TTS orchestrator (platform-agnostic) -│ ├── background.py # Video/audio downloader (platform-agnostic) -│ └── data/ -│ ├── videos.json # Dedup tracker -│ ├── cookie-dark-mode.json # Reddit theme cookie -│ └── cookie-threads.json # Threads session cookie (auto-created) -│ -├── TTS/ # Text-to-Speech -│ ├── engine_wrapper.py # Provider abstraction + post_lang fallback -│ ├── elevenlabs.py, aws_polly.py, etc. # 7+ provider implementations -│ -├── utils/ -│ ├── settings.py # Config loading + validation -│ ├── videos.py # check_done() + check_done_by_id() -│ ├── console.py # Rich terminal output -│ ├── .config.template.toml # Config schema (platform sections) -│ └── ... (id, voice, cleanup, etc.) -│ -├── main.py # CLI entry (platform-routed via factory) -├── GUI.py # Flask web UI (localhost:4000 in host mode, 0.0.0.0 in Docker) -├── requirements.txt # Dependencies -└── AGENTS.md / AGENT.md # This file + agent guidelines -``` - ---- - -## Configuration - -**File:** `utils/.config.template.toml` (schema) → `config.toml` (user config) - -### Platform Selection -```toml -[settings] -platform = "reddit" # or "threads" -post_lang = "es-cr" # Optional: translation language (all platforms) -``` - -### Reddit Config -```toml -[reddit.creds] -client_id = "..." # OAuth app -client_secret = "..." -username = "..." -password = "..." -2fa = true/false - -[reddit.thread] -subreddit = "AskReddit" -post_id = "" # Leave blank for auto-pick -max_comment_length = 500 -min_comment_length = 1 -min_comments = 20 -blocked_words = "..." -``` - -### Threads Config (NEW) -```toml -[threads.creds] -access_token = "EAABsbCS..." # Meta Graph API token (60-day expiry) -user_id = "12345678901234567" -username = "your_insta" # For Playwright login -password = "your_password" - -[threads.thread] -post_id = "" # Leave blank for auto-pick -max_reply_length = 500 -min_reply_length = 1 -min_replies = 5 -blocked_words = "..." -``` - -### Generic Settings -```toml -[settings] -theme = "dark" -resolution_w = 1080 -resolution_h = 1920 -storymode = false -times_to_run = 1 - -[settings.tts] -voice_choice = "tiktok" # or "elevenlabs", "awspolly", "googletranslate", etc. -random_voice = true -silence_duration = 0.3 - -[settings.background] -background_video = "minecraft" -background_audio = "lofi" -background_audio_volume = 0.15 -``` - ---- - -## Development Guidelines - -### ✅ DO: - -1. **Use platform factory in main.py** - ```python - from platforms import get_content_object, get_screenshot_fn - reddit_object = get_content_object(POST_ID) - screenshot_fn = get_screenshot_fn() - screenshot_fn(reddit_object, number_of_comments) - ``` - -2. **Return standard content dict** from all fetchers - ```python - return { - "thread_id": ..., - "thread_category": ..., # NEW: replaces hardcoded subreddit - "comments": [...] - } - ``` - -3. **Use config fallback chains** for cross-platform keys - ```python - lang = (settings.config["settings"].get("post_lang") or - settings.config.get("reddit", {}).get("thread", {}).get("post_lang", "")) - ``` - -4. **Read thread_category from dict** instead of config - ```python - # WRONG: - subreddit = settings.config["reddit"]["thread"]["subreddit"] - - # RIGHT: - platform = settings.config["settings"].get("platform", "reddit") - if platform == "reddit": - subreddit = settings.config["reddit"]["thread"]["subreddit"] - else: - subreddit = reddit_obj.get("thread_category", platform) - ``` - -5. **Test both platforms** after core pipeline changes - ```bash - # Test Reddit (must not regress) - sed -i 's/platform = "threads"/platform = "reddit"/' config.toml - python3 main.py - - # Test Threads - sed -i 's/platform = "reddit"/platform = "threads"/' config.toml - python3 main.py --post-id - ``` - -### ❌ DON'T: - -1. **Don't import platform modules directly** in main.py/utils - ```python - # WRONG: from reddit.subreddit import get_subreddit_threads - # RIGHT: from platforms import get_content_object - ``` - -2. **Don't hardcode platform names** in generic modules - ```python - # WRONG in final_video.py: - subreddit = settings.config["reddit"]["thread"]["subreddit"] - - # RIGHT: - subreddit = reddit_obj.get("thread_category", "unknown") - ``` - -3. **Don't add platform-specific UI selectors** outside `platforms/{platform}/screenshot.py` - - Reddit selectors stay in `video_creation/screenshot_downloader.py` - - Threads selectors stay in `platforms/threads/screenshot.py` - -4. **Don't assume config keys exist** without fallback - ```python - # WRONG: lang = settings.config["reddit"]["thread"]["post_lang"] - # RIGHT: lang = settings.config.get("settings", {}).get("post_lang", "") - ``` - ---- - -## Platform-Specific Knowledge - -### Reddit -- **API:** PRAW (Python Reddit API Wrapper) -- **Auth:** OAuth app (client_id, secret) + username/password -- **Screenshot:** Playwright on reddit.com/new.reddit.com - - Login form: `input[name="username"]`, `input[name="password"]` - - Post selector: `[data-test-id="post-content"]` - - Comment selector: `#t1_{comment_id}` -- **NSFW:** `submission.over_18` -- **Output folder:** `results/{subreddit}/` - -### Threads -- **API:** Meta Graph API (v18.0+) -- **Auth:** User access token (60-day lifetime) via https://developers.facebook.com/ -- **Screenshot:** Playwright on threads.net - - Login form: `input[autocomplete="username"]`, `input[autocomplete="current-password"]` - - Post selector: `article` (universal, more stable than Reddit) - - Cookies saved to: `video_creation/data/cookie-threads.json` -- **NSFW:** API doesn't provide; always False -- **Output folder:** `results/threads/` - -### Future: X/Twitter -Create: `platforms/twitter/fetcher.py` + `platforms/twitter/screenshot.py` + config section -Update: `platforms/__init__.py` with `elif platform == "twitter"` branches - ---- - -## Extending the Project - -### Adding a New TTS Provider -1. Create `TTS/my_provider.py` with a class implementing the TTS interface -2. Add config keys to `[settings.tts]` in `.config.template.toml` -3. Update `TTS/engine_wrapper.py` to call your provider -4. Test with `settings.config["settings"]["tts"]["voice_choice"] = "my_provider"` - -### Adding a New Platform (e.g., X/Twitter) -1. **Create fetcher:** `platforms/twitter/fetcher.py` - - Implement `get_twitter_content(POST_ID=None)` returning standard dict -2. **Create screenshotter:** `platforms/twitter/screenshot.py` - - Implement `get_screenshots_of_twitter_posts(content_object, screenshot_num)` -3. **Update config:** Add `[twitter.creds]` and `[twitter.thread]` sections -4. **Update factory:** Add `elif platform == "twitter"` in `platforms/__init__.py` -5. **Update CLI helper:** Add case to `_get_platform_post_id()` in `main.py` -6. **Test:** Verify Reddit mode still works, test Twitter mode end-to-end - -**Zero changes needed to:** TTS, backgrounds, video composition, or utils. - ---- - -## Debugging Tips - -### "No matching distribution found for yt-dlp==2026.3.17" -→ yt-dlp uses date versioning (YYYY.M.DD, no leading zeros). Use `2025.10.14` (latest stable). - -### "Threads API: Invalid or expired access_token" -→ Meta tokens expire every 60 days. Refresh at https://developers.facebook.com/tools/explorer/ - -### Playwright timeout on Threads screenshot -→ Login cookies corrupted or expired. Delete `video_creation/data/cookie-threads.json` to force fresh login next run. - -### "No eligible Threads posts found" -→ Configure `[threads.thread].min_replies = 5` (or lower). Ensure your Threads account has public posts with replies. - -### Video dedup not working -→ Check `video_creation/data/videos.json` is writable. Ensure `check_done_by_id()` is called before fetching content. - ---- - -## Testing Checklist - -- [ ] Reddit mode: `platform = "reddit"` produces video to `results/{subreddit}/` -- [ ] Threads mode: `platform = "threads"` produces video to `results/threads/` -- [ ] Video dedup: Running same post_id twice skips second run -- [ ] Translation: `post_lang = "es"` translates filenames -- [ ] TTS providers: Test with different voice_choice values -- [ ] Background selection: Custom background video/audio works -- [ ] Story mode: storymode=true only uses thread_post, not comments -- [ ] Error handling: Invalid credentials show clear messages - ---- - -## Key Files to Know - -| File | Purpose | -|------|---------| -| `main.py` | CLI entry; orchestrates pipeline via factory | -| `platforms/__init__.py` | Factory dispatch for multi-platform support | -| `platforms/threads/fetcher.py` | Threads Graph API client | -| `platforms/threads/screenshot.py` | Threads.net Playwright screenshotter | -| `video_creation/final_video.py` | FFmpeg composition; platform-aware output naming | -| `TTS/engine_wrapper.py` | TTS provider abstraction; post_lang fallback | -| `utils/settings.py` | Config loading & validation | -| `utils/videos.py` | Video dedup tracking | -| `utils/.config.template.toml` | Config schema | -| `requirements.txt` | Dependencies | - ---- - -## Useful Commands - -```bash -# Install dependencies -pip install -r requirements.txt - -# Run CLI -python3 main.py - -# Run with specific post -python3 main.py - -# Run Flask GUI -python3 GUI.py - -# Check syntax -python3 -m py_compile main.py platforms/threads/fetcher.py - -# Format code -black main.py platforms/ utils/ - -# Lint -pylint main.py -``` - -## Docker Workflow - -- Use `docker compose build` to build the shared image for both CLI and GUI. -- Use `docker compose up gui` to run the Flask app on port `4000`. -- Use `docker compose run --rm cli` to run the video generator in a container. -- The repo root is bind-mounted in Compose, so `config.toml`, `results/`, `assets/temp/`, `video_creation/data/videos.json`, and `utils/backgrounds.json` should persist across runs. -- The GUI must bind to `0.0.0.0` in Docker; do not switch it back to `localhost` for container use. - ---- - -## When You Get Stuck - -1. **"What does this module do?"** → Check imports in `main.py` or docstrings -2. **"How do I add support for platform X?"** → See "Adding a New Platform" section above -3. **"Why is my config not being read?"** → Check `utils/settings.py:check_toml()` and `.config.template.toml` schema -4. **"Why isn't my TTS provider being called?"** → Check `TTS/engine_wrapper.py:make_voice()` and config `voice_choice` -5. **"How do I debug the Playwright screenshot?"** → Uncomment `page.pause()` in screenshot downloader, run headful browser - -Good luck! 🚀 - - -# GitNexus — Code Intelligence - -This project is indexed by GitNexus as **VideoMakerBot** (802 symbols, 1287 relationships, 32 execution flows). Use the GitNexus MCP tools to understand code, assess impact, and navigate safely. - -> If any GitNexus tool warns the index is stale, run `npx gitnexus analyze` in terminal first. - -## Always Do - -- **MUST run impact analysis before editing any symbol.** Before modifying a function, class, or method, run `gitnexus_impact({target: "symbolName", direction: "upstream"})` and report the blast radius (direct callers, affected processes, risk level) to the user. -- **MUST run `gitnexus_detect_changes()` before committing** to verify your changes only affect expected symbols and execution flows. -- **MUST warn the user** if impact analysis returns HIGH or CRITICAL risk before proceeding with edits. -- When exploring unfamiliar code, use `gitnexus_query({query: "concept"})` to find execution flows instead of grepping. It returns process-grouped results ranked by relevance. -- When you need full context on a specific symbol — callers, callees, which execution flows it participates in — use `gitnexus_context({name: "symbolName"})`. - -## Never Do - -- NEVER edit a function, class, or method without first running `gitnexus_impact` on it. -- NEVER ignore HIGH or CRITICAL risk warnings from impact analysis. -- NEVER rename symbols with find-and-replace — use `gitnexus_rename` which understands the call graph. -- NEVER commit changes without running `gitnexus_detect_changes()` to check affected scope. - -## Resources - -| Resource | Use for | -|----------|---------| -| `gitnexus://repo/VideoMakerBot/context` | Codebase overview, check index freshness | -| `gitnexus://repo/VideoMakerBot/clusters` | All functional areas | -| `gitnexus://repo/VideoMakerBot/processes` | All execution flows | -| `gitnexus://repo/VideoMakerBot/process/{name}` | Step-by-step execution trace | - -## CLI - -| Task | Read this skill file | -|------|---------------------| -| Understand architecture / "How does X work?" | `.claude/skills/gitnexus/gitnexus-exploring/SKILL.md` | -| Blast radius / "What breaks if I change X?" | `.claude/skills/gitnexus/gitnexus-impact-analysis/SKILL.md` | -| Trace bugs / "Why is X failing?" | `.claude/skills/gitnexus/gitnexus-debugging/SKILL.md` | -| Rename / extract / split / refactor | `.claude/skills/gitnexus/gitnexus-refactoring/SKILL.md` | -| Tools, resources, schema reference | `.claude/skills/gitnexus/gitnexus-guide/SKILL.md` | -| Index, status, clean, wiki CLI commands | `.claude/skills/gitnexus/gitnexus-cli/SKILL.md` | - - diff --git a/CLAUDE.md b/CLAUDE.md index 91222fd..6be1021 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -5,16 +5,18 @@ **VideoMakerBot** — Automated short-form video creator from social media content. **Status:** Production-ready, actively maintained (v3.4.0) -**Language:** Python 3.10+ +**Language:** Python 3.10 (locked by `Dockerfile`; host venv may use 3.14 for tooling only) +**Runtime:** **Docker only** — all CLI, GUI, and test invocations go through `docker compose`. Do not invoke `python` on the host. **Platforms:** Reddit (PRAW API), Threads (Graph API + Web Scraping) ### Core Mission Transforms social media threads (post + comments/replies) into complete short-form videos with: - AI-generated speech (7+ TTS providers) -- UI screenshots (Playwright) +- UI screenshots (Playwright, headless Chromium pre-installed in image) - Background video/audio overlays -- FFmpeg composition & output +- FFmpeg composition & output (Linux ffmpeg with full filter set, including `drawtext`) - Optional YouTube upload +- Modern web UI (Tailwind CSS + DaisyUI + Lucide + vanilla ES6) on `localhost:4000` --- @@ -101,8 +103,21 @@ VideoMakerBot/ │ ├── background_audios.json # Background audio manifest │ └── ... │ +├── GUI/ # Flask templates (Tailwind + DaisyUI + Lucide) +│ ├── layout.html # Base layout (no jQuery, no Bootstrap) +│ ├── index.html # Video Library (3 buttons: source / download / copy link) +│ ├── backgrounds.html # Background Manager (videos catalog) +│ ├── settings.html # Config editor (validated against template) +│ └── create.html # Render progress page +│ +├── tests/ +│ └── test_gui_utils.py # pytest regression for add/delete background +│ ├── main.py # CLI entry (platform-routed via factory) -├── GUI.py # Flask web UI (localhost:4000) +├── GUI.py # Flask web UI; `/video/` serves files with sanitized headers +├── Dockerfile # python:3.10-slim-bookworm + ffmpeg + playwright + pytest +├── docker-compose.yml # Services: gui, cli, test +├── docker-entrypoint.sh # Runs `utils.docker_bootstrap` then exec's the command ├── requirements.txt └── CLAUDE.md ``` @@ -229,33 +244,40 @@ Last 1-4: engagement metrics (likes, replies, reposts, quotes) ### ✅ DO: -1. **Use platform factory** — never import platform modules directly -2. **Return standard content_object** from all fetchers -3. **Use clean body text** for TTS — parse out username/timestamp metadata -4. **Default to `googletranslate` TTS on macOS** — pyttsx3 hangs in headless environments -5. **Use `libx264` encoder on macOS** — `h264_nvenc` is NVIDIA-only -6. **Test both Threads discovery methods:** `api` and `scrape` +1. **Run everything through Docker** — `docker compose up gui`, `docker compose run --rm cli`, `docker compose run --rm test` +2. **Use platform factory** — never import platform modules directly +3. **Return standard content_object** from all fetchers +4. **Use clean body text** for TTS — parse out username/timestamp metadata +5. **Default to `googletranslate` TTS** for headless containers — no API key, fast, free +6. **Use `libx264` encoder** — `h264_nvenc` is NVIDIA-only and not available in the slim image +7. **Test both Threads discovery methods:** `api` and `scrape` +8. **Bind-mount preserves state** — edits to `config.toml`, `results/`, `assets/temp/`, `video_creation/data/`, and the `utils/background_*.json` catalogs persist across container runs +9. **GUI must bind to `0.0.0.0`** in Docker (already enforced via `GUI_HOST=0.0.0.0` env) +10. **Use `/video/` to serve renders** — the route looks up the file by id in `videos.json`, sanitizes the `Content-Disposition` filename, and avoids 404s caused by literal newlines in titles ### ❌ DON'T: -1. **Don't use `
` selectors** on Threads.net — the DOM is div-based -2. **Don't hardcode `h264_nvenc`** — use `libx264` for cross-platform compatibility -3. **Don't rely on `drawtext` FFmpeg filter** — not available in Homebrew builds +1. **Don't run `python GUI.py` or `python main.py` on the host** — Docker is the only supported path +2. **Don't use `
` selectors** on Threads.net — the DOM is div-based +3. **Don't hardcode `h264_nvenc`** — use `libx264` for cross-platform compatibility 4. **Don't import platform modules directly** in main.py/utils 5. **Don't assume config keys exist** without `.get()` fallback +6. **Don't reintroduce jQuery, Bootstrap, or ClipboardJS** — the UI is vanilla ES6 + Tailwind + DaisyUI + Lucide +7. **Don't write to `utils/backgrounds.json`** — it is a legacy empty file. Use `utils/background_videos.json` and `utils/background_audios.json` --- -## macOS-Specific Notes +## Web UI (Flask, served by `gui` service) -- **TTS:** `googletranslate` (gTTS) is the most reliable — free, fast, no API key - - `tiktok` auto-falls back to `pyttsx3` if sessionid missing, but pyttsx3 is very slow - - `pyttsx3` works but takes ~60s to initialize NSSpeechSynthesizer -- **FFmpeg encoder:** MUST use `libx264` — `h264_nvenc` is NVIDIA GPU only -- **FFmpeg filters:** `drawtext` missing from Homebrew bottle — credit text is disabled -- **yt-dlp:** Keep updated (`pip install --upgrade yt-dlp`) — YouTube changes APIs frequently - - Format selector: `best[height<=1080]` not `bestvideo` (many videos lack video-only streams) - - Upgrade path: `pip install --upgrade yt-dlp` +- **Stack:** Tailwind CSS, DaisyUI, Lucide Icons, vanilla ES6 (no jQuery, no Bootstrap, no ClipboardJS) +- **Routes:** + - `/` — Video Library; cards show source-post link, download, and copy-link buttons + - `/video/` — serves the rendered mp4 by id (lookup via `videos.json`); guards path-traversal and sanitizes the filename for `Content-Disposition` + - `/backgrounds` — Background Manager UI + - `/backgrounds.json` — serves `utils/background_videos.json` (the videos catalog) + - `/background/add`, `/background/delete` — POST endpoints; mutate **both** `utils/background_videos.json` and the `settings.background.background_video.options` array in `utils/.config.template.toml` + - `/settings` — config editor; loads from `config.toml`, validates against `utils/.config.template.toml`, persists via `utils/gui_utils.modify_settings` (preserves comments/formatting via `tomlkit`) +- **HTML escaping:** the `h()` helper in `index.html` escapes `& " < >` for any user-controlled string embedded in attributes — use it for any new dynamic data on the Library page --- @@ -277,64 +299,79 @@ Last 1-4: engagement metrics (likes, replies, reposts, quotes) | `reddit/subreddit.py` | PRAW Reddit fetcher with auto-2FA | | `utils/settings.py` | Config loading + interactive validation | | `utils/videos.py` | Video dedup tracking | -| `utils/.config.template.toml` | Config schema | -| `utils/background_videos.json` | Background video manifest | +| `utils/.config.template.toml` | Config schema (also drives Settings page validation) | +| `utils/background_videos.json` | Background video manifest (served at `/backgrounds.json`) | | `utils/background_audios.json` | Background audio manifest | +| `utils/gui_utils.py` | `add_background`, `delete_background`, `modify_settings`, `get_checks` | +| `GUI.py` | Flask app: `/`, `/video/`, `/backgrounds`, `/settings`, `/create` | +| `Dockerfile` | python:3.10-slim-bookworm + ffmpeg + Playwright Chromium + pytest | +| `docker-compose.yml` | Three services: `gui` (port 4000), `cli`, `test` | +| `tests/test_gui_utils.py` | Pytest regression for Background Manager round-trip | --- ## Debugging Tips ### FFmpeg "Unknown encoder 'h264_nvenc'" -→ On macOS, change to `libx264`. Find-and-replace `h264_nvenc` → `libx264` in `video_creation/final_video.py`. - -### FFmpeg "No such filter: 'drawtext'" -→ Homebrew FFmpeg lacks drawtext. The credit text overlay is automatically skipped. +→ Use `libx264`. Find-and-replace `h264_nvenc` → `libx264` in `video_creation/final_video.py`. The slim image does not ship with NVIDIA encoders. ### yt-dlp "Requested format is not available" -→ Update yt-dlp: `pip install --upgrade yt-dlp`. Also change format selector from `bestvideo` to `best` in `video_creation/background.py`. - -### pyttsx3 hang on macOS -→ NSSpeechSynthesizer needs GUI session. Switch to `voice_choice = "googletranslate"` for headless use. +→ Bump the pinned version in `requirements.txt` and rebuild (`docker compose build`). Also prefer `best[height<=1080]` over `bestvideo` in `video_creation/background.py` — many videos lack video-only streams. ### Threads screenshots fail ("Main post article not found") → Threads.net uses div cards, not `
`. Ensure screenshot code uses `a[href*="/post/"]` → ancestor div approach. ### Config validator EOFError in non-interactive mode -→ `check_toml()` prompts for ALL platform sections regardless of `platform` setting. Fill ALL required fields or load config directly with `toml.load()` + `settings.config = ...`. +→ `check_toml()` prompts for ALL platform sections regardless of `platform` setting. Either fill all required fields, edit through `/settings`, or pre-populate `config.toml` before `docker compose run cli`. ### Playwright timeout on Threads login -→ Cookies corrupted. Delete `video_creation/data/cookie-threads.json` for fresh login. Also check button selector: must use `exact=True` due to multiple "Log in" buttons. +→ Cookies corrupted. Delete `video_creation/data/cookie-threads.json` for fresh login (the file is bind-mounted, so deleting on host clears the container too). Also confirm selectors: button uses `exact=True` due to multiple "Log in" buttons. ### No viral posts found → Lower `min_engagement` in config. Most Threads feed posts have <100 likes — 10000 filters almost everything. +### Background Manager grid is empty +→ `/backgrounds.json` must serve `utils/background_videos.json` (split catalog), **not** the legacy `utils/backgrounds.json` (empty `{}`). Verify in `GUI.py:backgrounds_json`. + +### `/video/` returns 404 +→ The route looks up the entry in `video_creation/data/videos.json` by `id` and resolves the file under `results//.mp4`. Confirm both the JSON entry and the file exist; the file may have been pruned. + +### JS "Unexpected end of input" on Library page +→ Any user-controlled string interpolated into an HTML attribute must go through the `h()` helper in `index.html`. Avoid inline `onclick=` with `${JSON.stringify(...)}`. + +### Stale image after editing `requirements.txt` or `Dockerfile` +→ `docker compose build` to rebuild. Code changes alone do NOT need a rebuild because the repo root is bind-mounted to `/app`. + --- -## Useful Commands +## Useful Commands (Docker-only) ```bash -# Install dependencies -pip install -r requirements.txt +# Build (or rebuild after Dockerfile / requirements.txt changes) +docker compose build -# Run CLI -python3 main.py +# Run the GUI (foreground) +docker compose up gui +# → http://localhost:4000 -# Run bypassing config validator (non-interactive) -python3 -c " -import sys, toml -sys.path.insert(0, '.') -from utils import settings -settings.config = toml.load('config.toml') -from main import main; main() -" +# Run the GUI in the background +docker compose up -d gui +docker compose logs -f gui +docker compose down -# Update yt-dlp (YouTube downloads fix) -pip install --upgrade yt-dlp +# Run the CLI pipeline (one-off, removed on exit) +docker compose run --rm cli +docker compose run --rm cli python main.py -# Check syntax -python3 -m py_compile main.py platforms/threads/scraper.py +# Run the test suite +docker compose run --rm test -# Run Flask GUI -python3 GUI.py +# Open a shell in a fresh container for ad-hoc commands +docker compose run --rm --entrypoint /bin/bash gui +# inside: python -m py_compile main.py platforms/threads/scraper.py + +# Tail a running GUI container +docker compose exec gui ls /app/results/threads/ ``` + +> Anything that needs `pip install`, `playwright install`, or `apt-get` belongs in `Dockerfile` followed by `docker compose build` — never run those on the host. diff --git a/Dockerfile b/Dockerfile index 5a41218..4b9ccf8 100644 --- a/Dockerfile +++ b/Dockerfile @@ -16,7 +16,7 @@ RUN apt-get update \ COPY requirements.txt ./ RUN pip install --upgrade pip \ && pip install -r requirements.txt \ - && python -m spacy download en_core_web_sm + && pip install pytest RUN python -m playwright install --with-deps chromium diff --git a/GUI.py b/GUI.py index 771b9e5..a786702 100644 --- a/GUI.py +++ b/GUI.py @@ -1,28 +1,35 @@ -import os -import webbrowser -from pathlib import Path +import io +import json +import os +import sys +import threading +import webbrowser +from pathlib import Path # Used "tomlkit" instead of "toml" because it doesn't change formatting on "dump" -import tomlkit +import tomlkit from flask import ( Flask, + abort, + jsonify, redirect, render_template, request, + send_file, send_from_directory, url_for, ) -import utils.gui_utils as gui -from utils.docker_bootstrap import ensure_runtime_state - -ensure_runtime_state() - -# Set the hostname and port -HOST = os.environ.get("GUI_HOST", "0.0.0.0") -PORT = int(os.environ.get("GUI_PORT", "4000")) -OPEN_BROWSER = os.environ.get("GUI_OPEN_BROWSER", "1").lower() in {"1", "true", "yes", "on"} -BROWSER_URL = os.environ.get("GUI_BROWSER_URL", f"http://localhost:{PORT}") +import utils.gui_utils as gui +from utils.docker_bootstrap import ensure_runtime_state + +ensure_runtime_state() + +# Set the hostname and port +HOST = os.environ.get("GUI_HOST", "0.0.0.0") +PORT = int(os.environ.get("GUI_PORT", "4000")) +OPEN_BROWSER = os.environ.get("GUI_OPEN_BROWSER", "1").lower() in {"1", "true", "yes", "on"} +BROWSER_URL = os.environ.get("GUI_BROWSER_URL", f"http://localhost:{PORT}") # Configure application app = Flask(__name__, template_folder="GUI") @@ -99,13 +106,57 @@ def videos_json(): # Make backgrounds.json accessible @app.route("/backgrounds.json") def backgrounds_json(): - return send_from_directory("utils", "backgrounds.json") + return send_from_directory("utils", "background_videos.json") # Make videos in results folder accessible @app.route("/results/") def results(name): - return send_from_directory("results", name, as_attachment=True) + as_attachment = request.args.get("download", "0").lower() in {"1", "true", "yes"} + return send_from_directory("results", name, as_attachment=as_attachment) + + +# Serve a video by its videos.json id (handles filenames with unsafe chars like newlines) +@app.route("/video/") +def video_by_id(video_id): + try: + with open("video_creation/data/videos.json", "r", encoding="utf-8") as f: + videos = json.load(f) + except (OSError, json.JSONDecodeError): + abort(404) + + entry = next((v for v in videos if v.get("id") == video_id), None) + if not entry: + abort(404) + + subreddit = entry.get("subreddit", "") + filename = entry.get("filename", "") + file_path = (Path("results") / subreddit / filename).resolve() + results_root = Path("results").resolve() + + # Prevent path traversal: ensure resolved file is inside results/ + try: + file_path.relative_to(results_root) + except ValueError: + abort(404) + + if not file_path.is_file(): + abort(404) + + as_attachment = request.args.get("download", "0").lower() in {"1", "true", "yes"} + safe_name = filename.replace("\n", " ").replace("\r", " ").strip() or f"{video_id}.mp4" + return send_file(file_path, as_attachment=as_attachment, download_name=safe_name) + + +# Delete one or more videos by ID +@app.route("/videos/delete", methods=["POST"]) +def video_delete(): + data = request.get_json(silent=True) or {} + ids = data.get("ids", []) + if not ids or not isinstance(ids, list): + return jsonify({"error": "No IDs provided"}), 400 + deleted = gui.delete_videos(ids) + return jsonify({"deleted": deleted}) # Make voices samples in voices folder accessible @@ -114,9 +165,82 @@ def voices(name): return send_from_directory("GUI/voices", name, as_attachment=True) +# --- Pipeline state (shared across thread + HTTP) --- +pipeline_lock = threading.Lock() +pipeline_state: dict = { + "running": False, + "stage": "", + "error": None, + "result": None, # {"title": ..., "file": ..., "url": ...} + "log": [], # Last N status messages +} + + +def _run_pipeline(): + """Run the video creation pipeline in a background thread.""" + import toml + from utils import console as uconsole + from utils import settings + + with pipeline_lock: + pipeline_state["running"] = True + pipeline_state["stage"] = "configuring" + pipeline_state["error"] = None + pipeline_state["result"] = None + pipeline_state["log"] = [] + + try: + # Load config + settings.config = toml.load("config.toml") + + # Set up progress callback + def on_progress(stage=""): + with pipeline_lock: + pipeline_state["stage"] = stage + pipeline_state["log"].append(stage) + if len(pipeline_state["log"]) > 20: + pipeline_state["log"] = pipeline_state["log"][-20:] + + uconsole.set_progress_callback(on_progress) + + from main import main as run_pipeline + run_pipeline() + + with pipeline_lock: + pipeline_state["stage"] = "done" + pipeline_state["result"] = {"message": "Video created successfully! Check the home page."} + + except Exception as e: + with pipeline_lock: + pipeline_state["stage"] = "error" + pipeline_state["error"] = str(e)[:500].encode("ascii", errors="replace").decode("ascii") + finally: + with pipeline_lock: + pipeline_state["running"] = False + uconsole.set_progress_callback(None) + + +@app.route("/create", methods=["GET", "POST"]) +def create(): + if request.method == "POST": + if pipeline_state["running"]: + return jsonify({"status": "already_running"}) + thread = threading.Thread(target=_run_pipeline, daemon=True) + thread.start() + return jsonify({"status": "started"}) + return render_template("create.html", state=pipeline_state) + + +@app.route("/create/status") +def create_status(): + with pipeline_lock: + state_copy = dict(pipeline_state) + return jsonify(state_copy) + + # Run browser and start the app -if __name__ == "__main__": - if OPEN_BROWSER: - webbrowser.open(BROWSER_URL, new=2) - print("Website opened in new tab. Refresh if it didn't load.") - app.run(host=HOST, port=PORT) +if __name__ == "__main__": + if OPEN_BROWSER: + webbrowser.open(BROWSER_URL, new=2) + print("Website opened in new tab. Refresh if it didn't load.") + app.run(host=HOST, port=PORT) diff --git a/GUI/backgrounds.html b/GUI/backgrounds.html index 541e39f..ed7957e 100644 --- a/GUI/backgrounds.html +++ b/GUI/backgrounds.html @@ -1,263 +1,235 @@ {% extends "layout.html" %} {% block main %} - -