diff --git a/AGENT.md b/AGENT.md new file mode 100644 index 0000000..bc6dedb --- /dev/null +++ b/AGENT.md @@ -0,0 +1,392 @@ +# AGENT.md — Guidance for Agents & AI Working on VideoMakerBot + +This document guides **agents, bots, and AI assistants** on how to work effectively with the VideoMakerBot codebase. + +--- + +## Quick Start for Agents + +### Core Principle +**VideoMakerBot uses a platform-agnostic factory pattern.** Always respect the abstraction: +- Don't import platform-specific modules (reddit/, threads/) directly +- Always use `platforms/__init__.py` factory functions +- Keep platform-specific logic in `platforms/{platform}/` + +### The "Do This" Checklist +1. ✅ Read existing CLAUDE.md for architecture context +2. ✅ Use factory: `from platforms import get_content_object, get_screenshot_fn` +3. ✅ Return standard `content_object` dict from all fetchers +4. ✅ Test both Reddit and Threads modes before declaring completion +5. ✅ Use config fallback chains for cross-platform keys +6. ✅ Document platform-specific logic in docstrings + +### The "Don't Do This" List +1. ❌ Import `reddit.subreddit` directly in main.py or generic modules +2. ❌ Hardcode subreddit/platform names in core video pipeline +3. ❌ Add platform-specific selectors outside `platforms/{platform}/` +4. ❌ Assume config keys exist without `.get()` and fallbacks +5. ❌ Modify screenshot_downloader.py for non-Reddit platforms + +--- + +## Understanding the Codebase Structure + +### Entry Point +**`main.py`** — Single CLI entry point using platform factory +- Calls `get_content_object(POST_ID)` from factory +- Calls `get_screenshot_fn()` from factory +- Everything else is platform-agnostic + +### Platform Layer (`platforms/`) +- **`__init__.py`** — Factory dispatch functions (add new platforms here) +- **`threads/fetcher.py`** — Threads Graph API client (returns standard dict) +- **`threads/screenshot.py`** — Threads.net Playwright screenshotter + +### Legacy Platform (`reddit/`) +- **`subreddit.py`** — PRAW API client (returns standard dict) +- No changes needed; called via factory + +### Video Pipeline (`video_creation/`) +- **`final_video.py`** — FFmpeg composition (platform-aware output folder only) +- **`screenshot_downloader.py`** — Reddit Playwright screenshotter (not called for Threads) +- **`voices.py`** — TTS orchestration (platform-agnostic) +- **`background.py`** — Video/audio download (platform-agnostic) + +### TTS Layer (`TTS/`) +- **`engine_wrapper.py`** — Provider abstraction (handles `post_lang` fallback) +- **`*.py`** — Individual provider implementations (elevenlabs, aws_polly, etc.) + +### Config & Utils (`utils/`) +- **`settings.py`** — TOML config loading & validation +- **`videos.py`** — Dedup tracking (`check_done()` + `check_done_by_id()`) +- **`.config.template.toml`** — Config schema with `[settings]`, `[reddit.*]`, `[threads.*]`, `[ai]` + +--- + +## How to Approach Common Tasks + +### Adding a New Social Platform (e.g., X/Twitter) + +**Steps:** +1. Create `platforms/twitter/fetcher.py`: + ```python + def get_twitter_content(POST_ID=None) -> dict: + """Fetch post + replies, return standard content_object.""" + # Implement API fetching logic here + return { + "thread_id": ..., + "thread_category": "twitter", # NEW: generic field for output folder + "thread_title": ..., + "thread_url": ..., + "comments": [...] + } + ``` + +2. Create `platforms/twitter/screenshot.py`: + ```python + def get_screenshots_of_twitter_posts(content_object: dict, screenshot_num: int): + """Use Playwright to screenshot X/Twitter posts.""" + # Implement Playwright logic here + ``` + +3. Update `platforms/__init__.py`: + ```python + elif platform == "twitter": + from platforms.twitter.fetcher import get_twitter_content + return get_twitter_content(POST_ID) + ``` + +4. Add config section to `utils/.config.template.toml`: + ```toml + [twitter.creds] + api_key = { ... } + api_secret = { ... } + + [twitter.thread] + post_id = { ... } + ``` + +5. Update `main.py` helper: + ```python + elif platform == "twitter": + return config.get("twitter", {}).get("thread", {}).get("post_id", "") + ``` + +6. **Zero changes needed to:** TTS, backgrounds, video composition, utils. + +**Verification:** +```bash +# Test Reddit (regression check) +sed -i 's/platform = "twitter"/platform = "reddit"/' config.toml +python3 main.py +# Verify results/{subreddit}/ output + +# Test Twitter +sed -i 's/platform = "reddit"/platform = "twitter"/' config.toml +python3 main.py --post-id +# Verify results/twitter/ output +``` + +--- + +### Modifying the Video Pipeline + +**Scenario:** You need to change FFmpeg composition or add a new processing step. + +**Approach:** +1. Check which data the modified code consumes (`content_object` dict) +2. Verify it works with both Reddit and Threads content structures +3. If platform-specific: move logic to `platforms/{platform}/` +4. If generic: keep in `video_creation/` +5. Test both modes before merging + +**Example:** Adding video filters +```python +# In final_video.py (generic, works for all platforms) +def apply_filter(video_clip, filter_type): + # No platform-specific logic here + return video_clip.filter(...) + +# Test: +# - Reddit mode produces filtered video +# - Threads mode produces filtered video +``` + +--- + +### Fixing a Bug in Config Handling + +**Scenario:** `post_lang` is not being applied correctly. + +**Debug Path:** +1. Check `utils/settings.py` — how is config loaded? +2. Check `TTS/engine_wrapper.py:182` — uses fallback chain: + ```python + lang = (settings.config["settings"].get("post_lang") or + settings.config.get("reddit", {}).get("thread", {}).get("post_lang", "")) + ``` +3. Check `video_creation/final_video.py:78` — same fallback logic +4. If still broken: verify `utils/.config.template.toml` has the key defined +5. Test both platforms with `post_lang = "es"` in config + +--- + +### Adding Support for a New TTS Provider + +**Scenario:** User wants Whisper TTS support. + +**Steps:** +1. Create `TTS/whisper_tts.py`: + ```python + class WhisperTTS: + def make_voice(self, text): + # Call Whisper API + return audio_bytes + ``` + +2. Update `TTS/engine_wrapper.py:make_voice()`: + ```python + elif voice_choice == "whisper": + from TTS.whisper_tts import WhisperTTS + return WhisperTTS().make_voice(text) + ``` + +3. Add config to `utils/.config.template.toml`: + ```toml + [settings.tts] + whisper_api_key = { optional = true, ... } + ``` + +4. Test: + ```bash + # In config.toml: + voice_choice = "whisper" + # Run: python3 main.py + ``` + +--- + +## Common Pitfalls & How to Avoid Them + +### Pitfall 1: Platform-Specific Code in Generic Modules +**Problem:** +```python +# BAD: In video_creation/final_video.py +subreddit = settings.config["reddit"]["thread"]["subreddit"] +``` +**Will break** when platform = "threads" (no reddit.thread.subreddit). + +**Solution:** +```python +# GOOD: +platform = settings.config["settings"].get("platform", "reddit") +if platform == "reddit": + category = settings.config["reddit"]["thread"]["subreddit"] +else: + category = reddit_obj.get("thread_category", platform) +``` + +### Pitfall 2: Hardcoding Selectors in Platform-Agnostic Code +**Problem:** +```python +# BAD: In video_creation/voices.py +element = page.locator("#t1_{comment_id}") # Reddit-only selector! +``` +**Will fail** when running Threads mode (different DOM). + +**Solution:** +- Keep all Playwright logic in `platforms/{platform}/screenshot.py` +- Never hardcode selectors in generic modules + +### Pitfall 3: Forgetting to Test Both Modes +**Problem:** You change `final_video.py`, test with Reddit, declare done. +Threads mode breaks because you didn't test it. + +**Solution:** +```bash +# Test both before committing: +sed -i 's/platform = "threads"/platform = "reddit"/' config.toml +python3 main.py +# Check results/{subreddit}/ + +sed -i 's/platform = "reddit"/platform = "threads"/' config.toml +python3 main.py --post-id +# Check results/threads/ +``` + +### Pitfall 4: Assuming Config Keys Exist +**Problem:** +```python +# BAD: +lang = settings.config["reddit"]["thread"]["post_lang"] +``` +**Will crash** if key doesn't exist. + +**Solution:** +```python +# GOOD: +lang = (settings.config["settings"].get("post_lang") or + settings.config.get("reddit", {}).get("thread", {}).get("post_lang", "")) +``` + +--- + +## Code Review Checklist for Agents + +Before marking work complete, verify: + +- [ ] **No platform imports in main.py** — Uses factory only +- [ ] **Standard content_object dict** — All fetchers return same shape +- [ ] **Platform-specific logic isolated** — Only in `platforms/{platform}/` +- [ ] **Config fallback chains** — No hardcoded section names in generic code +- [ ] **Both modes tested** — Reddit AND Threads produce correct output +- [ ] **Docstrings updated** — New functions document platform assumptions +- [ ] **Error messages clear** — Include platform name + actionable guidance +- [ ] **Video dedup works** — No duplicate videos created + +--- + +## Understanding Data Flow + +### Happy Path: Fetch → TTS → Screenshot → Compose → Output + +``` +1. main.py:main() + └─→ platforms/__init__.py:get_content_object() + └─→ platforms/threads/fetcher.py:get_threads_content() + └─→ Returns: {thread_id, thread_title, comments, ...} + +2. video_creation/voices.py:save_text_to_mp3() + └─→ TTS/engine_wrapper.py:process_text() + └─→ TTS/engine_wrapper.py:make_voice() + └─→ TTS/{provider}.py: {elevenlabs,tiktok,etc} + └─→ Returns: audio_length, comment_count + +3. platforms/__init__.py:get_screenshot_fn() + └─→ platforms/threads/screenshot.py:get_screenshots_of_threads_posts() + └─→ Uses Playwright on threads.net + └─→ Saves: assets/temp/{thread_id}/png/{title,comment_0,etc}.png + +4. video_creation/background.py + └─→ download_background_video() & download_background_audio() + └─→ Uses yt-dlp to fetch YouTube videos/audio + └─→ Saves to: assets/temp/{thread_id}/{video,audio} + +5. video_creation/final_video.py:make_final_video() + └─→ Uses FFmpeg to compose everything + └─→ Reads: audio files, screenshot PNGs, background video + └─→ Writes: results/{thread_category}/{filename}.mp4 + +6. utils/videos.py:save_data() + └─→ Records video in videos.json for dedup +``` + +### Config Flow + +``` +config.toml (user settings) + ↓ +utils/settings.py:check_toml() + └─→ Validates against .config.template.toml schema + └─→ Returns: settings.config (dict) + + Used by: + ├─ main.py (platform selection) + ├─ platforms/reddit/ (subreddit, etc.) + ├─ platforms/threads/ (Graph API token, etc.) + ├─ TTS/engine_wrapper.py (post_lang fallback) + ├─ video_creation/ (theme, resolution, etc.) + └─ utils/videos.py (dedup behavior) +``` + +--- + +## Deployment Notes + +### Python Version +- **Minimum:** 3.10 +- **Tested:** 3.10, 3.11, 3.12 +- **Reason:** F-strings, type hints, modern async patterns + +### Critical Dependencies +- **reddit platform:** praw 7.8.1 (requires Reddit OAuth app) +- **threads platform:** requests (for Graph API calls) +- **screenshots:** playwright 1.49.1 (requires browser installation: `playwright install`) +- **video:** moviepy 2.2.1, ffmpeg-python 0.2.0 (requires FFmpeg system binary) +- **tts:** varies per provider (elevenlabs, aws_polly, openai, etc.) + +### Versions That Caused Issues +- **yt-dlp==2026.3.17** — Doesn't exist (use 2025.10.14 or latest stable) +- **playwright without browser install** — Will crash on first screenshot + +--- + +## When to Escalate + +### Escalate to User if: +- User needs new platform support (only they know requirements) +- Config changes affect backward compatibility +- Performance optimization needed (only user knows acceptable limits) +- Security concern (token handling, credential storage, etc.) + +### Safe to Implement as Agent: +- Bug fixes within existing architecture +- Adding new TTS providers +- Extending config options for existing platforms +- Performance optimizations (caching, parallelization) +- New filter/processing features that work platform-agnostically +- Documentation & refactoring + +--- + +## Final Guidance + +**Golden Rule:** The factory pattern is your friend. When in doubt, check if your change breaks the abstraction. If it does, rethink it. + +**Test Obsessively:** Always run both Reddit and Threads modes. The codebase is designed for multi-platform support, and it's easy to break one platform while fixing another. + +**Document Platform Assumptions:** If your code works differently for Reddit vs Threads, say so explicitly in docstrings and comments. + +**Ask Yourself:** "Would this work for X/Twitter?" If no, it probably belongs in `platforms/threads/`, not in generic code. + +Good luck, and happy contributing! 🎥 diff --git a/CLAUDE.md b/CLAUDE.md new file mode 100644 index 0000000..08c6225 --- /dev/null +++ b/CLAUDE.md @@ -0,0 +1,405 @@ +# CLAUDE.md — VideoMakerBot Development Guide + +## Project Overview + +**VideoMakerBot** — Automated short-form video creator from social media content. + +**Status:** Production-ready, actively maintained (v3.4.0) +**Language:** Python 3.10+ +**Platforms:** Reddit (original), Threads (NEW), X/Twitter (planned) + +### Core Mission +Transforms social media threads (post + comments/replies) into complete short-form videos with: +- AI-generated speech (7+ TTS providers) +- UI screenshots (Playwright) +- Background video/audio overlays +- FFmpeg composition & output + +--- + +## Architecture at a Glance + +``` +main.py (CLI) + ↓ [platform factory] + ├─→ reddit/subreddit.py [PRAW API] + └─→ platforms/threads/fetcher.py [Graph API] + ↓ [standard data dict] + ├─→ TTS/engine_wrapper.py [7+ providers] + ├─→ screenshot_downloader.py (Reddit) + │ or platforms/threads/screenshot.py (Threads) + ├─→ video_creation/background.py + └─→ video_creation/final_video.py [FFmpeg] + ↓ + results/{category}/{video.mp4} +``` + +### Key Design: Platform Abstraction via Factory Pattern + +**Why:** Single codebase supports multiple platforms without tight coupling. + +**How:** `platforms/__init__.py` exports: +- `get_content_object(POST_ID=None)` — routes to right fetcher +- `get_screenshot_fn()` — routes to right screenshotter + +**Result:** Adding X/Twitter requires only: new module + config section + two `elif` branches. + +--- + +## Data Contract: The "content_object" Dict + +All fetchers return this shape (defined in `platforms/__init__.py`): + +```python +{ + # Unique identifiers + "thread_id": str, # Used for temp folder: assets/temp/{id}/ + "thread_category": str, # "reddit", "threads", etc. → output folder + + # Content + "thread_title": str, # TTS as title + output filename + "thread_url": str, # Playwright navigates here for screenshot + "is_nsfw": bool, # Content filter flag + + # Replies/Comments (mutually exclusive with thread_post) + "comments": [ + { + "comment_body": str, # TTS per reply + "comment_url": str, # Playwright navigates here + "comment_id": str, # CSS selector ID or unique identifier + } + ], + + # OR Story mode: + "thread_post": str | list, # Long-form text (no comments) +} +``` + +**Why:** Loose coupling—TTS, backgrounds, and video composition don't need platform-specific logic. + +--- + +## File Organization + +``` +VideoMakerBot/ +├── platforms/ # Multi-platform abstraction +│ ├── __init__.py # Factory: get_content_object(), get_screenshot_fn() +│ └── threads/ # Threads (Meta) implementation +│ ├── fetcher.py # Graph API → content_object +│ └── screenshot.py # Playwright Threads screenshotter +│ +├── reddit/ # Reddit implementation (kept as-is) +│ └── subreddit.py # PRAW API → content_object + thread_category +│ +├── video_creation/ +│ ├── final_video.py # FFmpeg composition (platform-aware folder naming) +│ ├── screenshot_downloader.py # Playwright Reddit UI capturer +│ ├── voices.py # TTS orchestrator (platform-agnostic) +│ ├── background.py # Video/audio downloader (platform-agnostic) +│ └── data/ +│ ├── videos.json # Dedup tracker +│ ├── cookie-dark-mode.json # Reddit theme cookie +│ └── cookie-threads.json # Threads session cookie (auto-created) +│ +├── TTS/ # Text-to-Speech +│ ├── engine_wrapper.py # Provider abstraction + post_lang fallback +│ ├── elevenlabs.py, aws_polly.py, etc. # 7+ provider implementations +│ +├── utils/ +│ ├── settings.py # Config loading + validation +│ ├── videos.py # check_done() + check_done_by_id() +│ ├── console.py # Rich terminal output +│ ├── .config.template.toml # Config schema (platform sections) +│ └── ... (id, voice, cleanup, etc.) +│ +├── main.py # CLI entry (platform-routed via factory) +├── GUI.py # Flask web UI (localhost:4000) +├── requirements.txt # Dependencies +└── CLAUDE.md / AGENT.md # This file + agent guidelines +``` + +--- + +## Configuration + +**File:** `utils/.config.template.toml` (schema) → `config.toml` (user config) + +### Platform Selection +```toml +[settings] +platform = "reddit" # or "threads" +post_lang = "es-cr" # Optional: translation language (all platforms) +``` + +### Reddit Config +```toml +[reddit.creds] +client_id = "..." # OAuth app +client_secret = "..." +username = "..." +password = "..." +2fa = true/false + +[reddit.thread] +subreddit = "AskReddit" +post_id = "" # Leave blank for auto-pick +max_comment_length = 500 +min_comment_length = 1 +min_comments = 20 +blocked_words = "..." +``` + +### Threads Config (NEW) +```toml +[threads.creds] +access_token = "EAABsbCS..." # Meta Graph API token (60-day expiry) +user_id = "12345678901234567" +username = "your_insta" # For Playwright login +password = "your_password" + +[threads.thread] +post_id = "" # Leave blank for auto-pick +max_reply_length = 500 +min_reply_length = 1 +min_replies = 5 +blocked_words = "..." +``` + +### Generic Settings +```toml +[settings] +theme = "dark" +resolution_w = 1080 +resolution_h = 1920 +storymode = false +times_to_run = 1 + +[settings.tts] +voice_choice = "tiktok" # or "elevenlabs", "awspolly", "googletranslate", etc. +random_voice = true +silence_duration = 0.3 + +[settings.background] +background_video = "minecraft" +background_audio = "lofi" +background_audio_volume = 0.15 +``` + +--- + +## Development Guidelines + +### ✅ DO: + +1. **Use platform factory in main.py** + ```python + from platforms import get_content_object, get_screenshot_fn + reddit_object = get_content_object(POST_ID) + screenshot_fn = get_screenshot_fn() + screenshot_fn(reddit_object, number_of_comments) + ``` + +2. **Return standard content dict** from all fetchers + ```python + return { + "thread_id": ..., + "thread_category": ..., # NEW: replaces hardcoded subreddit + "comments": [...] + } + ``` + +3. **Use config fallback chains** for cross-platform keys + ```python + lang = (settings.config["settings"].get("post_lang") or + settings.config.get("reddit", {}).get("thread", {}).get("post_lang", "")) + ``` + +4. **Read thread_category from dict** instead of config + ```python + # WRONG: + subreddit = settings.config["reddit"]["thread"]["subreddit"] + + # RIGHT: + platform = settings.config["settings"].get("platform", "reddit") + if platform == "reddit": + subreddit = settings.config["reddit"]["thread"]["subreddit"] + else: + subreddit = reddit_obj.get("thread_category", platform) + ``` + +5. **Test both platforms** after core pipeline changes + ```bash + # Test Reddit (must not regress) + sed -i 's/platform = "threads"/platform = "reddit"/' config.toml + python3 main.py + + # Test Threads + sed -i 's/platform = "reddit"/platform = "threads"/' config.toml + python3 main.py --post-id + ``` + +### ❌ DON'T: + +1. **Don't import platform modules directly** in main.py/utils + ```python + # WRONG: from reddit.subreddit import get_subreddit_threads + # RIGHT: from platforms import get_content_object + ``` + +2. **Don't hardcode platform names** in generic modules + ```python + # WRONG in final_video.py: + subreddit = settings.config["reddit"]["thread"]["subreddit"] + + # RIGHT: + subreddit = reddit_obj.get("thread_category", "unknown") + ``` + +3. **Don't add platform-specific UI selectors** outside `platforms/{platform}/screenshot.py` + - Reddit selectors stay in `video_creation/screenshot_downloader.py` + - Threads selectors stay in `platforms/threads/screenshot.py` + +4. **Don't assume config keys exist** without fallback + ```python + # WRONG: lang = settings.config["reddit"]["thread"]["post_lang"] + # RIGHT: lang = settings.config.get("settings", {}).get("post_lang", "") + ``` + +--- + +## Platform-Specific Knowledge + +### Reddit +- **API:** PRAW (Python Reddit API Wrapper) +- **Auth:** OAuth app (client_id, secret) + username/password +- **Screenshot:** Playwright on reddit.com/new.reddit.com + - Login form: `input[name="username"]`, `input[name="password"]` + - Post selector: `[data-test-id="post-content"]` + - Comment selector: `#t1_{comment_id}` +- **NSFW:** `submission.over_18` +- **Output folder:** `results/{subreddit}/` + +### Threads +- **API:** Meta Graph API (v18.0+) +- **Auth:** User access token (60-day lifetime) via https://developers.facebook.com/ +- **Screenshot:** Playwright on threads.net + - Login form: `input[autocomplete="username"]`, `input[autocomplete="current-password"]` + - Post selector: `article` (universal, more stable than Reddit) + - Cookies saved to: `video_creation/data/cookie-threads.json` +- **NSFW:** API doesn't provide; always False +- **Output folder:** `results/threads/` + +### Future: X/Twitter +Create: `platforms/twitter/fetcher.py` + `platforms/twitter/screenshot.py` + config section +Update: `platforms/__init__.py` with `elif platform == "twitter"` branches + +--- + +## Extending the Project + +### Adding a New TTS Provider +1. Create `TTS/my_provider.py` with a class implementing the TTS interface +2. Add config keys to `[settings.tts]` in `.config.template.toml` +3. Update `TTS/engine_wrapper.py` to call your provider +4. Test with `settings.config["settings"]["tts"]["voice_choice"] = "my_provider"` + +### Adding a New Platform (e.g., X/Twitter) +1. **Create fetcher:** `platforms/twitter/fetcher.py` + - Implement `get_twitter_content(POST_ID=None)` returning standard dict +2. **Create screenshotter:** `platforms/twitter/screenshot.py` + - Implement `get_screenshots_of_twitter_posts(content_object, screenshot_num)` +3. **Update config:** Add `[twitter.creds]` and `[twitter.thread]` sections +4. **Update factory:** Add `elif platform == "twitter"` in `platforms/__init__.py` +5. **Update CLI helper:** Add case to `_get_platform_post_id()` in `main.py` +6. **Test:** Verify Reddit mode still works, test Twitter mode end-to-end + +**Zero changes needed to:** TTS, backgrounds, video composition, or utils. + +--- + +## Debugging Tips + +### "No matching distribution found for yt-dlp==2026.3.17" +→ yt-dlp uses date versioning (YYYY.M.DD, no leading zeros). Use `2025.10.14` (latest stable). + +### "Threads API: Invalid or expired access_token" +→ Meta tokens expire every 60 days. Refresh at https://developers.facebook.com/tools/explorer/ + +### Playwright timeout on Threads screenshot +→ Login cookies corrupted or expired. Delete `video_creation/data/cookie-threads.json` to force fresh login next run. + +### "No eligible Threads posts found" +→ Configure `[threads.thread].min_replies = 5` (or lower). Ensure your Threads account has public posts with replies. + +### Video dedup not working +→ Check `video_creation/data/videos.json` is writable. Ensure `check_done_by_id()` is called before fetching content. + +--- + +## Testing Checklist + +- [ ] Reddit mode: `platform = "reddit"` produces video to `results/{subreddit}/` +- [ ] Threads mode: `platform = "threads"` produces video to `results/threads/` +- [ ] Video dedup: Running same post_id twice skips second run +- [ ] Translation: `post_lang = "es"` translates filenames +- [ ] TTS providers: Test with different voice_choice values +- [ ] Background selection: Custom background video/audio works +- [ ] Story mode: storymode=true only uses thread_post, not comments +- [ ] Error handling: Invalid credentials show clear messages + +--- + +## Key Files to Know + +| File | Purpose | +|------|---------| +| `main.py` | CLI entry; orchestrates pipeline via factory | +| `platforms/__init__.py` | Factory dispatch for multi-platform support | +| `platforms/threads/fetcher.py` | Threads Graph API client | +| `platforms/threads/screenshot.py` | Threads.net Playwright screenshotter | +| `video_creation/final_video.py` | FFmpeg composition; platform-aware output naming | +| `TTS/engine_wrapper.py` | TTS provider abstraction; post_lang fallback | +| `utils/settings.py` | Config loading & validation | +| `utils/videos.py` | Video dedup tracking | +| `utils/.config.template.toml` | Config schema | +| `requirements.txt` | Dependencies | + +--- + +## Useful Commands + +```bash +# Install dependencies +pip install -r requirements.txt + +# Run CLI +python3 main.py + +# Run with specific post +python3 main.py + +# Run Flask GUI +python3 GUI.py + +# Check syntax +python3 -m py_compile main.py platforms/threads/fetcher.py + +# Format code +black main.py platforms/ utils/ + +# Lint +pylint main.py +``` + +--- + +## When You Get Stuck + +1. **"What does this module do?"** → Check imports in `main.py` or docstrings +2. **"How do I add support for platform X?"** → See "Adding a New Platform" section above +3. **"Why is my config not being read?"** → Check `utils/settings.py:check_toml()` and `.config.template.toml` schema +4. **"Why isn't my TTS provider being called?"** → Check `TTS/engine_wrapper.py:make_voice()` and config `voice_choice` +5. **"How do I debug the Playwright screenshot?"** → Uncomment `page.pause()` in screenshot downloader, run headful browser + +Good luck! 🚀 diff --git a/TTS/engine_wrapper.py b/TTS/engine_wrapper.py index 1026a6d..2dac26d 100644 --- a/TTS/engine_wrapper.py +++ b/TTS/engine_wrapper.py @@ -179,7 +179,8 @@ class TTSEngine: def process_text(text: str, clean: bool = True): - lang = settings.config["reddit"]["thread"]["post_lang"] + lang = (settings.config["settings"].get("post_lang") or + settings.config.get("reddit", {}).get("thread", {}).get("post_lang", "")) new_text = sanitize_text(text) if clean else text if lang: print_substep("Translating Text...") diff --git a/main.py b/main.py index 742fedf..c6a4ae4 100755 --- a/main.py +++ b/main.py @@ -6,9 +6,7 @@ from pathlib import Path from subprocess import Popen from typing import Dict, NoReturn -from prawcore import ResponseException - -from reddit.subreddit import get_subreddit_threads +from platforms import get_content_object, get_screenshot_fn from utils import settings from utils.cleanup import cleanup from utils.console import print_markdown, print_step, print_substep @@ -22,9 +20,14 @@ from video_creation.background import ( get_background_config, ) from video_creation.final_video import make_final_video -from video_creation.screenshot_downloader import get_screenshots_of_reddit_posts from video_creation.voices import save_text_to_mp3 +# Guard prawcore import — only available when Reddit is used +try: + from prawcore import ResponseException as _PrawResponseException +except ImportError: + _PrawResponseException = None + __VERSION__ = "3.4.0" print( @@ -46,14 +49,24 @@ reddit_id: str reddit_object: Dict[str, str | list] +def _get_platform_post_id(config: dict, platform: str) -> str: + """Returns the post_id string from config for the active platform.""" + if platform == "reddit": + return config.get("reddit", {}).get("thread", {}).get("post_id", "") + elif platform == "threads": + return config.get("threads", {}).get("thread", {}).get("post_id", "") + return "" + + def main(POST_ID=None) -> None: global reddit_id, reddit_object - reddit_object = get_subreddit_threads(POST_ID) + reddit_object = get_content_object(POST_ID) reddit_id = extract_id(reddit_object) print_substep(f"Thread ID is {reddit_id}", style="bold blue") length, number_of_comments = save_text_to_mp3(reddit_object) length = math.ceil(length) - get_screenshots_of_reddit_posts(reddit_object, number_of_comments) + screenshot_fn = get_screenshot_fn() + screenshot_fn(reddit_object, number_of_comments) bg_config = { "video": get_background_config("video"), "audio": get_background_config("audio"), @@ -105,11 +118,15 @@ if __name__ == "__main__": ) sys.exit() try: - if config["reddit"]["thread"]["post_id"]: - for index, post_id in enumerate(config["reddit"]["thread"]["post_id"].split("+")): + platform = config["settings"].get("platform", "reddit") + post_id_str = _get_platform_post_id(config, platform) + + if post_id_str: + for index, post_id in enumerate(post_id_str.split("+")): index += 1 + num_posts = len(post_id_str.split("+")) print_step( - f'on the {index}{("st" if index % 10 == 1 else ("nd" if index % 10 == 2 else ("rd" if index % 10 == 3 else "th")))} post of {len(config["reddit"]["thread"]["post_id"].split("+"))}' + f'on the {index}{("st" if index % 10 == 1 else ("nd" if index % 10 == 2 else ("rd" if index % 10 == 3 else "th")))} post of {num_posts}' ) main(post_id) Popen("cls" if name == "nt" else "clear", shell=True).wait() @@ -119,11 +136,13 @@ if __name__ == "__main__": main() except KeyboardInterrupt: shutdown() - except ResponseException: - print_markdown("## Invalid credentials") - print_markdown("Please check your credentials in the config.toml file") - shutdown() except Exception as err: + # Handle Reddit-specific credential errors if prawcore is available + if _PrawResponseException and isinstance(err, _PrawResponseException): + print_markdown("## Invalid Reddit credentials") + print_markdown("Please check your credentials in the config.toml file") + shutdown() + # Generic error handling for all other exceptions config["settings"]["tts"]["tiktok_sessionid"] = "REDACTED" config["settings"]["tts"]["elevenlabs_api_key"] = "REDACTED" config["settings"]["tts"]["openai_api_key"] = "REDACTED" diff --git a/platforms/__init__.py b/platforms/__init__.py new file mode 100644 index 0000000..736163a --- /dev/null +++ b/platforms/__init__.py @@ -0,0 +1,65 @@ +"""Platform abstraction layer for content source selection.""" + +from utils import settings + + +def get_content_object(POST_ID=None) -> dict: + """ + Returns a populated content_object dict for the configured platform. + Dispatches to the appropriate platform fetcher based on settings.config["settings"]["platform"]. + + Args: + POST_ID (str, optional): Specific post ID to fetch. If None, auto-selects a post. + + Returns: + dict: Standard content_object with keys: + - thread_id, thread_title, thread_url, is_nsfw, thread_category, comments + - (or thread_post if storymode is enabled) + + Raises: + ValueError: If platform is unknown or invalid. + """ + platform = settings.config["settings"].get("platform", "reddit").lower() + + if platform == "reddit": + from reddit.subreddit import get_subreddit_threads + return get_subreddit_threads(POST_ID) + + elif platform == "threads": + from platforms.threads.fetcher import get_threads_content + return get_threads_content(POST_ID) + + else: + raise ValueError( + f"Unknown platform: '{platform}'. Valid options: reddit, threads" + ) + + +def get_screenshot_fn(platform: str = None): + """ + Returns the appropriate screenshot function for the given platform. + + Args: + platform (str, optional): Platform name. If None, uses the configured platform. + + Returns: + callable: Screenshot function that takes (content_object, screenshot_num). + + Raises: + ValueError: If platform is unknown or invalid. + """ + if platform is None: + platform = settings.config["settings"].get("platform", "reddit").lower() + + if platform == "reddit": + from video_creation.screenshot_downloader import get_screenshots_of_reddit_posts + return get_screenshots_of_reddit_posts + + elif platform == "threads": + from platforms.threads.screenshot import get_screenshots_of_threads_posts + return get_screenshots_of_threads_posts + + else: + raise ValueError( + f"Unknown platform: '{platform}'. Valid options: reddit, threads" + ) diff --git a/platforms/threads/__init__.py b/platforms/threads/__init__.py new file mode 100644 index 0000000..dc4259b --- /dev/null +++ b/platforms/threads/__init__.py @@ -0,0 +1 @@ +"""Threads (Meta) platform integration for VideoMakerBot.""" diff --git a/platforms/threads/fetcher.py b/platforms/threads/fetcher.py new file mode 100644 index 0000000..ab3ef0a --- /dev/null +++ b/platforms/threads/fetcher.py @@ -0,0 +1,190 @@ +"""Fetches content from Meta Threads via the Graph API.""" + +import requests +from typing import Optional + +from utils import settings +from utils.console import print_step, print_substep +from utils.voice import sanitize_text +from utils.videos import check_done_by_id + + +GRAPH_API_BASE = "https://graph.threads.net/v1.0" + + +def _get_headers() -> dict: + """Returns HTTP headers with Bearer token for Graph API requests.""" + token = settings.config["threads"]["creds"]["access_token"] + if not token: + raise RuntimeError( + "Threads API: access_token is required. " + "Set it in config.toml under [threads.creds]." + ) + return {"Authorization": f"Bearer {token}"} + + +def _api_get(url: str, params: dict = None) -> dict: + """Makes a GET request to Threads Graph API with error handling.""" + try: + resp = requests.get(url, headers=_get_headers(), params=params or {}, timeout=15) + resp.raise_for_status() + return resp.json() + except requests.exceptions.HTTPError as e: + if e.response.status_code == 401: + raise RuntimeError( + "Threads API: Invalid or expired access_token. " + "Tokens are valid for 60 days. Refresh at: " + "https://developers.facebook.com/tools/explorer/" + ) from e + if e.response.status_code == 400: + error_msg = e.response.json().get("error", {}).get("message", str(e)) + raise RuntimeError(f"Threads API: Bad request — {error_msg}") from e + raise RuntimeError(f"Threads API: HTTP {e.response.status_code}") from e + except requests.exceptions.ConnectionError as e: + raise RuntimeError("Threads API: Cannot connect. Check internet connection.") from e + except requests.exceptions.Timeout as e: + raise RuntimeError("Threads API: Request timed out.") from e + + +def _fetch_post(post_id: str) -> dict: + """Fetches a single Threads post by ID.""" + url = f"{GRAPH_API_BASE}/{post_id}" + params = {"fields": "id,text,timestamp,permalink,is_quote_post,media_type"} + return _api_get(url, params) + + +def _fetch_replies(post_id: str, limit: int = 50) -> list: + """Fetches all replies to a Threads post, handling pagination.""" + url = f"{GRAPH_API_BASE}/{post_id}/replies" + params = { + "fields": "id,text,timestamp,username,permalink", + "limit": limit, + } + results = [] + + while url: + data = _api_get(url, params) + results.extend(data.get("data", [])) + # Handle pagination — next URL is provided in paging.next + url = data.get("paging", {}).get("next") + params = {} # Next URL already includes all params + + return results + + +def _pick_best_post() -> tuple: + """ + Fetches recent posts from the user and returns the first one + with enough replies that hasn't been processed yet. + + Returns: + tuple: (post_dict, replies_list) + + Raises: + RuntimeError: If no eligible posts are found. + """ + user_id = settings.config["threads"]["creds"]["user_id"] + if not user_id: + raise RuntimeError( + "Threads API: user_id is required. " + "Set it in config.toml under [threads.creds]." + ) + + url = f"{GRAPH_API_BASE}/{user_id}/threads" + params = {"fields": "id,text,timestamp,permalink,media_type", "limit": 25} + + data = _api_get(url, params) + posts = data.get("data", []) + + min_replies = settings.config["threads"]["thread"]["min_replies"] + + for post in posts: + if check_done_by_id(post["id"]): + continue + + replies = _fetch_replies(post["id"]) + if len(replies) >= min_replies: + return post, replies + + raise RuntimeError( + f"No eligible Threads posts found. " + f"Ensure you have posts with at least {min_replies} replies." + ) + + +def get_threads_content(POST_ID: str = None) -> dict: + """ + Fetches Threads content (post + replies) and returns it in the standard content_object format. + + Args: + POST_ID (str, optional): Specific post ID to fetch. If None, auto-selects. + + Returns: + dict: Standard content_object matching the pipeline contract. + + Raises: + RuntimeError: On API errors or if no eligible content found. + """ + print_step("Fetching Threads content...") + + # Determine which post to fetch + if POST_ID: + post = _fetch_post(POST_ID) + replies = _fetch_replies(POST_ID) + elif settings.config["threads"]["thread"].get("post_id"): + post_id = settings.config["threads"]["thread"]["post_id"] + post = _fetch_post(post_id) + replies = _fetch_replies(post_id) + else: + post, replies = _pick_best_post() + + # Load content filters from config + max_len = settings.config["threads"]["thread"]["max_reply_length"] + min_len = settings.config["threads"]["thread"]["min_reply_length"] + blocked_raw = settings.config["threads"]["thread"].get("blocked_words", "") + blocked = [w.strip().lower() for w in blocked_raw.split(",") if w.strip()] + + # Build content object in standard format + content = { + "thread_id": post["id"], + "thread_title": (post.get("text") or "")[:280], # Threads has no separate title + "thread_url": post["permalink"], + "is_nsfw": False, # Threads API doesn't provide NSFW flag + "thread_category": "threads", # Generic field for output folder naming + "comments": [], + } + + # Filter and add replies + for reply in replies: + body = reply.get("text", "").strip() + if not body: + continue + + # Check blocked words + if any(w in body.lower() for w in blocked): + continue + + # Check length constraints + if not (min_len <= len(body) <= max_len): + continue + + # Sanitize text + sanitised = sanitize_text(body) + if not sanitised: + continue + + content["comments"].append({ + "comment_body": body, + "comment_url": reply["permalink"], + "comment_id": reply["id"], + }) + + # Log summary + title_preview = content["thread_title"][:60] + print_substep( + f"Fetched Threads post '{title_preview}...' " + f"with {len(content['comments'])} replies.", + style="bold green", + ) + + return content diff --git a/platforms/threads/screenshot.py b/platforms/threads/screenshot.py new file mode 100644 index 0000000..cd371ea --- /dev/null +++ b/platforms/threads/screenshot.py @@ -0,0 +1,201 @@ +"""Captures screenshots of Threads posts via Playwright.""" + +import json +import re +from pathlib import Path +from typing import Final + +from playwright.sync_api import ViewportSize, sync_playwright + +from utils import settings +from utils.console import print_step, print_substep + + +THREADS_LOGIN_URL = "https://www.threads.net/login" +THREADS_COOKIE_FILE = "./video_creation/data/cookie-threads.json" + + +def _login_to_threads(page, context) -> None: + """ + Performs Threads login via Instagram credentials (Threads uses Instagram auth). + Saves session cookies to cookie-threads.json for reuse on future runs. + + Args: + page: Playwright page object + context: Playwright browser context + + Raises: + RuntimeError: If login credentials are not configured. + """ + username = settings.config["threads"]["creds"].get("username", "").strip() + password = settings.config["threads"]["creds"].get("password", "").strip() + + if not username or not password: + raise RuntimeError( + "Threads screenshot login requires credentials. " + "Set threads.creds.username and threads.creds.password in config.toml" + ) + + print_substep("Logging into Threads (via Instagram)...") + page.goto(THREADS_LOGIN_URL, timeout=0) + page.wait_for_load_state("networkidle") + + # Threads login form uses Instagram auth with these selectors + page.locator('input[autocomplete="username"]').fill(username) + page.locator('input[autocomplete="current-password"]').fill(password) + page.get_by_role("button", name="Log in").click() + + # Wait for login to complete + page.wait_for_timeout(6000) + + # Persist cookies for reuse + cookies = context.cookies() + Path(THREADS_COOKIE_FILE).parent.mkdir(parents=True, exist_ok=True) + with open(THREADS_COOKIE_FILE, "w") as f: + json.dump(cookies, f) + + print_substep("Logged into Threads and saved session cookies.", style="bold green") + + +def get_screenshots_of_threads_posts(content_object: dict, screenshot_num: int) -> None: + """ + Downloads screenshots of Threads posts via Playwright. + + Args: + content_object: Standard content dict from platforms/threads/fetcher.py + screenshot_num: Number of reply screenshots to capture + """ + W: Final[int] = int(settings.config["settings"]["resolution_w"]) + H: Final[int] = int(settings.config["settings"]["resolution_h"]) + storymode: Final[bool] = settings.config["settings"]["storymode"] + + print_step("Downloading screenshots of Threads posts...") + + thread_id = re.sub(r"[^\w\s-]", "", content_object["thread_id"]) + Path(f"assets/temp/{thread_id}/png").mkdir(parents=True, exist_ok=True) + + # Theme colors + theme = settings.config["settings"]["theme"] + if theme == "dark": + bgcolor = (33, 33, 36, 255) + txtcolor = (240, 240, 240) + else: + bgcolor = (255, 255, 255, 255) + txtcolor = (0, 0, 0) + + # Device scale factor (higher resolution screenshots) + dsf = (W // 600) + 1 + + with sync_playwright() as p: + print_substep("Launching headless browser...") + browser = p.chromium.launch(headless=True) + context = browser.new_context( + locale="en-US", + color_scheme="dark" if theme == "dark" else "light", + viewport=ViewportSize(width=W, height=H), + device_scale_factor=dsf, + user_agent=( + "Mozilla/5.0 (Windows NT 10.0; Win64; x64) " + "AppleWebKit/537.36 (KHTML, like Gecko) " + "Chrome/120.0.0.0 Safari/537.36" + ), + ) + + # Try to load saved cookies; if not found or invalid, do a fresh login + cookie_path = Path(THREADS_COOKIE_FILE) + if cookie_path.exists(): + try: + with open(cookie_path, encoding="utf-8") as f: + saved_cookies = json.load(f) + context.add_cookies(saved_cookies) + print_substep("Loaded saved Threads session cookies.") + except (json.JSONDecodeError, IOError): + print_substep("Saved cookies corrupted. Logging in fresh...") + page = context.new_page() + _login_to_threads(page, context) + page.close() + else: + print_substep("No saved cookies found. Logging in...") + page = context.new_page() + _login_to_threads(page, context) + page.close() + + # Screenshot the main post + page = context.new_page() + page.goto(content_object["thread_url"], timeout=0) + page.wait_for_load_state("networkidle") + page.wait_for_timeout(3000) + + postcontentpath = f"assets/temp/{thread_id}/png/title.png" + try: + # On Threads.net post permalink pages, the main post is the first article element + post_locator = page.locator("article").first + if not post_locator.is_visible(): + raise RuntimeError( + "Main post article not found on page. " + "Check if you're logged in correctly or if the post is deleted." + ) + + if settings.config["settings"].get("zoom", 1) != 1: + zoom = settings.config["settings"]["zoom"] + page.evaluate(f"document.body.style.zoom={zoom}") + location = post_locator.bounding_box() + if location: + for k in location: + location[k] = float("{:.2f}".format(location[k] * zoom)) + page.screenshot(clip=location, path=postcontentpath) + else: + post_locator.screenshot(path=postcontentpath) + else: + post_locator.screenshot(path=postcontentpath) + + print_substep("Main post screenshot captured.", style="bold green") + except Exception as e: + print_substep(f"Failed to screenshot main post: {e}", style="red") + raise + + # Screenshots of replies + if not storymode: + for idx in range(min(screenshot_num, len(content_object["comments"]))): + comment = content_object["comments"][idx] + try: + page.goto(comment["comment_url"], timeout=0) + page.wait_for_load_state("networkidle") + page.wait_for_timeout(2000) + + # Each reply permalink page shows that reply as the first article + reply_locator = page.locator("article").first + if not reply_locator.is_visible(): + print_substep(f"Reply {idx} article not found. Skipping...", style="yellow") + continue + + if settings.config["settings"].get("zoom", 1) != 1: + zoom = settings.config["settings"]["zoom"] + page.evaluate(f"document.body.style.zoom={zoom}") + location = reply_locator.bounding_box() + if location: + for k in location: + location[k] = float("{:.2f}".format(location[k] * zoom)) + page.screenshot( + clip=location, + path=f"assets/temp/{thread_id}/png/comment_{idx}.png", + ) + else: + reply_locator.screenshot( + path=f"assets/temp/{thread_id}/png/comment_{idx}.png" + ) + else: + reply_locator.screenshot( + path=f"assets/temp/{thread_id}/png/comment_{idx}.png" + ) + + except Exception as e: + print_substep(f"Error capturing reply {idx}: {e}. Skipping...", style="yellow") + # Don't crash; just skip this reply + continue + + print_substep(f"Reply screenshots captured ({min(screenshot_num, len(content_object['comments']))} total).", style="bold green") + + browser.close() + + print_substep("Threads screenshots downloaded successfully.", style="bold green") diff --git a/reddit/subreddit.py b/reddit/subreddit.py index daeb439..f54f13e 100644 --- a/reddit/subreddit.py +++ b/reddit/subreddit.py @@ -121,6 +121,7 @@ def get_subreddit_threads(POST_ID: str): content["thread_title"] = submission.title content["thread_id"] = submission.id content["is_nsfw"] = submission.over_18 + content["thread_category"] = settings.config["reddit"]["thread"]["subreddit"] content["comments"] = [] if settings.config["settings"]["storymode"]: if settings.config["settings"]["storymodemethod"] == 1: diff --git a/requirements.txt b/requirements.txt index 7aa38ee..170f90c 100644 --- a/requirements.txt +++ b/requirements.txt @@ -18,4 +18,4 @@ torch==2.7.0 transformers==4.52.4 ffmpeg-python==0.2.0 elevenlabs==1.57.0 -yt-dlp==2025.10.22 +yt-dlp==2025.10.14 diff --git a/utils/.config.template.toml b/utils/.config.template.toml index 9b13657..e78dcb3 100644 --- a/utils/.config.template.toml +++ b/utils/.config.template.toml @@ -20,7 +20,22 @@ blocked_words = { optional = true, default = "", type = "str", explanation = "Co ai_similarity_enabled = {optional = true, option = [true, false], default = false, type = "bool", explanation = "Threads read from Reddit are sorted based on their similarity to the keywords given below"} ai_similarity_keywords = {optional = true, type="str", example= 'Elon Musk, Twitter, Stocks', explanation = "Every keyword or even sentence, seperated with comma, is used to sort the reddit threads based on similarity"} +[threads.creds] +access_token = { optional = false, explanation = "Meta Threads long-lived user access token (User token from Graph API, valid for 60 days)", example = "EAABsbCS..." } +user_id = { optional = false, explanation = "Numeric Threads user ID", example = "12345678901234567" } +username = { optional = true, explanation = "Instagram/Threads username for Playwright screenshot login" } +password = { optional = true, explanation = "Instagram/Threads password for Playwright screenshot login" } + +[threads.thread] +post_id = { optional = true, default = "", regex = "^((?!://|://)[+a-zA-Z0-9])*$", explanation = "Specific Threads post ID to process. Leave blank for auto-pick.", example = "18044348473548254" } +max_reply_length = { default = 500, optional = false, nmin = 10, nmax = 10000, type = "int", explanation = "Max characters per reply", example = 500, oob_error = "Max reply length should be between 10 and 10000" } +min_reply_length = { default = 1, optional = true, nmin = 0, nmax = 10000, type = "int", explanation = "Min characters per reply", example = 1, oob_error = "Min reply length should be between 0 and 10000" } +min_replies = { default = 5, optional = false, nmin = 1, type = "int", explanation = "Minimum number of replies for a post to be eligible", example = 5, oob_error = "Minimum replies should be at least 1" } +blocked_words = { optional = true, default = "", type = "str", explanation = "Comma-separated list of blocked words/phrases. Posts and replies containing any of these will be skipped.", example = "nsfw, spoiler, politics" } + [settings] +platform = { optional = false, default = "reddit", options = ["reddit", "threads"], explanation = "Which social media platform to pull content from." } +post_lang = { default = "", optional = true, explanation = "The language you would like to translate to. Applies to all platforms.", example = "es-cr", options = ['','af', 'ak', 'am', 'ar', 'as', 'ay', 'az', 'be', 'bg', 'bho', 'bm', 'bn', 'bs', 'ca', 'ceb', 'ckb', 'co', 'cs', 'cy', 'da', 'de', 'doi', 'dv', 'ee', 'el', 'en', 'en-US', 'eo', 'es', 'et', 'eu', 'fa', 'fi', 'fr', 'fy', 'ga', 'gd', 'gl', 'gn', 'gom', 'gu', 'ha', 'haw', 'hi', 'hmn', 'hr', 'ht', 'hu', 'hy', 'id', 'ig', 'ilo', 'is', 'it', 'iw', 'ja', 'jw', 'ka', 'kk', 'km', 'kn', 'ko', 'kri', 'ku', 'ky', 'la', 'lb', 'lg', 'ln', 'lo', 'lt', 'lus', 'lv', 'mai', 'mg', 'mi', 'mk', 'ml', 'mn', 'mni-Mtei', 'mr', 'ms', 'mt', 'my', 'ne', 'nl', 'no', 'nso', 'ny', 'om', 'or', 'pa', 'pl', 'ps', 'pt', 'qu', 'ro', 'ru', 'rw', 'sa', 'sd', 'si', 'sk', 'sl', 'sm', 'sn', 'so', 'sq', 'sr', 'st', 'su', 'sv', 'sw', 'ta', 'te', 'tg', 'th', 'ti', 'tk', 'tl', 'tr', 'ts', 'tt', 'ug', 'uk', 'ur', 'uz', 'vi', 'xh', 'yi', 'yo', 'zh-CN', 'zh-TW', 'zu'] } allow_nsfw = { optional = false, type = "bool", default = false, example = false, options = [true, false, ], explanation = "Whether to allow NSFW content, True or False" } theme = { optional = false, default = "dark", example = "light", options = ["dark", "light", "transparent", ], explanation = "Sets the Reddit theme, either LIGHT or DARK. For story mode you can also use a transparent background." } times_to_run = { optional = false, default = 1, example = 2, explanation = "Used if you want to run multiple times. Set to an int e.g. 4 or 29 or 1", type = "int", nmin = 1, oob_error = "It's very hard to run something less than once." } diff --git a/utils/videos.py b/utils/videos.py index 7c756fc..481c4c8 100755 --- a/utils/videos.py +++ b/utils/videos.py @@ -1,11 +1,13 @@ import json import time - -from praw.models import Submission +from typing import TYPE_CHECKING from utils import settings from utils.console import print_step +if TYPE_CHECKING: + from praw.models import Submission + def check_done( redditobj: Submission, @@ -58,3 +60,19 @@ def save_data(subreddit: str, filename: str, reddit_title: str, reddit_id: str, done_vids.append(payload) raw_vids.seek(0) json.dump(done_vids, raw_vids, ensure_ascii=False, indent=4) + + +def check_done_by_id(post_id: str) -> bool: + """Returns True if a video for this post_id has already been generated. + + Platform-agnostic version of check_done, used by non-Reddit platforms. + + Args: + post_id (str): The unique post ID from any platform + + Returns: + bool: True if video already exists, False otherwise + """ + with open("./video_creation/data/videos.json", "r", encoding="utf-8") as f: + done_videos = json.load(f) + return any(video["id"] == str(post_id) for video in done_videos) diff --git a/video_creation/final_video.py b/video_creation/final_video.py index c4f3a0b..ea82683 100644 --- a/video_creation/final_video.py +++ b/video_creation/final_video.py @@ -75,7 +75,8 @@ def name_normalize(name: str) -> str: name = re.sub(r"(\w+)\s?\/\s?(\w+)", r"\1 or \2", name) name = re.sub(r"\/", r"", name) - lang = settings.config["reddit"]["thread"]["post_lang"] + lang = (settings.config["settings"].get("post_lang") or + settings.config.get("reddit", {}).get("thread", {}).get("post_lang", "")) if lang: print_substep("Translating filename...") translated_name = translators.translate_text(name, translator="google", to_language=lang) @@ -359,7 +360,11 @@ def make_final_video( title_thumb = reddit_obj["thread_title"] filename = f"{name_normalize(title)[:251]}" - subreddit = settings.config["reddit"]["thread"]["subreddit"] + platform = settings.config["settings"].get("platform", "reddit") + if platform == "reddit": + subreddit = settings.config["reddit"]["thread"]["subreddit"] + else: + subreddit = reddit_obj.get("thread_category", platform) if not exists(f"./results/{subreddit}"): print_substep("The 'results' folder could not be found so it was automatically created.")