From 94d8e45cf718a263d422e7324d48600c5be48ac3 Mon Sep 17 00:00:00 2001 From: Claude Date: Tue, 3 Feb 2026 18:17:26 +0000 Subject: [PATCH 1/3] feat: integrate Qwen3 TTS and add progress GUI Major changes: - Add Qwen3 TTS provider with authentication support - Remove local pyttsx3 TTS (replaced with cloud TTS) - Add real-time progress GUI with WebSocket updates - Comprehensive Docker setup with docker-compose - Updated README with new documentation New features: - Qwen TTS: Supports multiple speakers and languages - Progress GUI: Live step-by-step tracking at http://localhost:5000 - Docker: Full containerization with environment variables - Config: Example config file for easy setup Files added: - TTS/qwen_tts.py - Qwen3 TTS provider - progress_gui.py - Flask/SocketIO progress server - utils/progress.py - Progress tracking module - GUI/progress.html - Progress dashboard template - GUI/static/css/progress.css - Progress GUI styles - GUI/static/js/progress.js - WebSocket client - docker-compose.yml - Docker orchestration - docker-entrypoint.sh - Container startup script - config.example.toml - Example configuration https://claude.ai/code/session_01HLLH3WjpmRzvaoY6eYSFAD --- Dockerfile | 69 ++++- GUI/progress.html | 87 ++++++ GUI/static/css/progress.css | 539 ++++++++++++++++++++++++++++++++++++ GUI/static/js/progress.js | 307 ++++++++++++++++++++ README.md | 289 +++++++++++-------- TTS/pyttsx.py | 42 --- TTS/qwen_tts.py | 165 +++++++++++ config.example.toml | 80 ++++++ docker-compose.yml | 85 ++++++ docker-entrypoint.sh | 86 ++++++ main.py | 150 ++++++++-- progress_gui.py | 133 +++++++++ ptt.py | 10 - requirements.txt | 4 +- utils/.config.template.toml | 10 +- utils/progress.py | 317 +++++++++++++++++++++ video_creation/voices.py | 4 +- 17 files changed, 2178 insertions(+), 199 deletions(-) create mode 100644 GUI/progress.html create mode 100644 GUI/static/css/progress.css create mode 100644 GUI/static/js/progress.js delete mode 100644 TTS/pyttsx.py create mode 100644 TTS/qwen_tts.py create mode 100644 config.example.toml create mode 100644 docker-compose.yml create mode 100644 docker-entrypoint.sh create mode 100644 progress_gui.py delete mode 100644 ptt.py create mode 100644 utils/progress.py diff --git a/Dockerfile b/Dockerfile index 3f53ada..7247f3c 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,12 +1,67 @@ FROM python:3.10.14-slim -RUN apt update -RUN apt-get install -y ffmpeg -RUN apt install python3-pip -y +# Set environment variables +ENV PYTHONDONTWRITEBYTECODE=1 +ENV PYTHONUNBUFFERED=1 +ENV REDDIT_BOT_GUI=true -RUN mkdir /app -ADD . /app +# Install system dependencies +RUN apt-get update && apt-get install -y --no-install-recommends \ + ffmpeg \ + curl \ + wget \ + gnupg \ + libglib2.0-0 \ + libnss3 \ + libnspr4 \ + libatk1.0-0 \ + libatk-bridge2.0-0 \ + libcups2 \ + libdrm2 \ + libdbus-1-3 \ + libxcb1 \ + libxkbcommon0 \ + libx11-6 \ + libxcomposite1 \ + libxdamage1 \ + libxext6 \ + libxfixes3 \ + libxrandr2 \ + libgbm1 \ + libpango-1.0-0 \ + libcairo2 \ + libasound2 \ + libatspi2.0-0 \ + && rm -rf /var/lib/apt/lists/* + +# Create app directory WORKDIR /app -RUN pip install -r requirements.txt -CMD ["python3", "main.py"] +# Copy requirements first for better caching +COPY requirements.txt . + +# Install Python dependencies +RUN pip install --no-cache-dir -r requirements.txt + +# Install Playwright browsers +RUN playwright install chromium +RUN playwright install-deps chromium + +# Download spaCy language model +RUN python -m spacy download en_core_web_sm + +# Copy application code +COPY . . + +# Create necessary directories +RUN mkdir -p assets/temp assets/backgrounds/video assets/backgrounds/audio results + +# Expose ports +EXPOSE 5000 + +# Set entrypoint +COPY docker-entrypoint.sh /docker-entrypoint.sh +RUN chmod +x /docker-entrypoint.sh + +ENTRYPOINT ["/docker-entrypoint.sh"] +CMD ["python", "main.py"] diff --git a/GUI/progress.html b/GUI/progress.html new file mode 100644 index 0000000..51d899a --- /dev/null +++ b/GUI/progress.html @@ -0,0 +1,87 @@ + + + + + + Reddit Video Maker - Progress + + + + +
+
+

Reddit Video Maker Bot

+

Real-time Progress Tracker

+
+ +
+ +
+

Current Job

+
+
+ + + + +
+

Waiting for video generation to start...

+

Start the bot with: python main.py

+
+ +
+ + +
+

Recent Jobs

+
+

No completed jobs yet

+
+
+
+ +
+

Reddit Video Maker Bot - Progress GUI

+
+ + Connecting... +
+
+
+ + + + diff --git a/GUI/static/css/progress.css b/GUI/static/css/progress.css new file mode 100644 index 0000000..fea8423 --- /dev/null +++ b/GUI/static/css/progress.css @@ -0,0 +1,539 @@ +/* Reddit Video Maker Bot - Progress GUI Styles */ + +:root { + --bg-primary: #0f0f0f; + --bg-secondary: #1a1a1a; + --bg-tertiary: #252525; + --text-primary: #ffffff; + --text-secondary: #a0a0a0; + --text-muted: #666666; + --accent-primary: #ff4500; + --accent-secondary: #ff6b35; + --success: #4caf50; + --warning: #ff9800; + --error: #f44336; + --info: #2196f3; + --border-color: #333333; + --card-shadow: 0 4px 6px rgba(0, 0, 0, 0.3); +} + +* { + margin: 0; + padding: 0; + box-sizing: border-box; +} + +body { + font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, Oxygen, Ubuntu, sans-serif; + background: var(--bg-primary); + color: var(--text-primary); + min-height: 100vh; + line-height: 1.6; +} + +.container { + max-width: 1200px; + margin: 0 auto; + padding: 20px; +} + +/* Header */ +header { + text-align: center; + padding: 40px 20px; + border-bottom: 1px solid var(--border-color); + margin-bottom: 30px; +} + +header h1 { + font-size: 2.5rem; + font-weight: 700; + background: linear-gradient(135deg, var(--accent-primary), var(--accent-secondary)); + -webkit-background-clip: text; + -webkit-text-fill-color: transparent; + background-clip: text; + margin-bottom: 10px; +} + +header .subtitle { + color: var(--text-secondary); + font-size: 1.1rem; +} + +/* Cards */ +.card { + background: var(--bg-secondary); + border-radius: 12px; + padding: 24px; + margin-bottom: 24px; + box-shadow: var(--card-shadow); + border: 1px solid var(--border-color); +} + +.card h2 { + font-size: 1.5rem; + margin-bottom: 20px; + color: var(--text-primary); + display: flex; + align-items: center; + gap: 10px; +} + +/* No Job State */ +.no-job { + text-align: center; + padding: 60px 20px; + color: var(--text-secondary); +} + +.no-job .waiting-icon { + width: 80px; + height: 80px; + margin: 0 auto 20px; + color: var(--text-muted); + animation: pulse 2s infinite; +} + +@keyframes pulse { + 0%, 100% { opacity: 0.5; transform: scale(1); } + 50% { opacity: 1; transform: scale(1.05); } +} + +.no-job .hint { + margin-top: 20px; + font-size: 0.9rem; + color: var(--text-muted); +} + +.no-job code { + background: var(--bg-tertiary); + padding: 4px 12px; + border-radius: 4px; + font-family: 'Fira Code', monospace; +} + +/* Job Info */ +.job-info.hidden { + display: none; +} + +.job-header { + display: flex; + justify-content: space-between; + align-items: flex-start; + margin-bottom: 20px; + gap: 20px; +} + +.job-title { + flex: 1; +} + +.job-title .subreddit { + color: var(--accent-primary); + font-size: 0.9rem; + font-weight: 600; +} + +.job-title h3 { + font-size: 1.3rem; + margin-top: 4px; + word-break: break-word; +} + +/* Status Badges */ +.status-badge { + display: inline-flex; + align-items: center; + padding: 6px 12px; + border-radius: 20px; + font-size: 0.85rem; + font-weight: 600; + text-transform: uppercase; +} + +.status-badge.pending { + background: var(--bg-tertiary); + color: var(--text-secondary); +} + +.status-badge.in_progress { + background: rgba(33, 150, 243, 0.2); + color: var(--info); + animation: glow 1.5s infinite; +} + +@keyframes glow { + 0%, 100% { box-shadow: 0 0 5px rgba(33, 150, 243, 0.3); } + 50% { box-shadow: 0 0 15px rgba(33, 150, 243, 0.5); } +} + +.status-badge.completed { + background: rgba(76, 175, 80, 0.2); + color: var(--success); +} + +.status-badge.failed { + background: rgba(244, 67, 54, 0.2); + color: var(--error); +} + +.status-badge.skipped { + background: rgba(255, 152, 0, 0.2); + color: var(--warning); +} + +/* Overall Progress */ +.overall-progress { + display: flex; + align-items: center; + gap: 15px; + margin-bottom: 30px; +} + +.progress-bar { + flex: 1; + height: 12px; + background: var(--bg-tertiary); + border-radius: 6px; + overflow: hidden; +} + +.progress-fill { + height: 100%; + background: linear-gradient(90deg, var(--accent-primary), var(--accent-secondary)); + border-radius: 6px; + transition: width 0.3s ease; + width: 0%; +} + +.progress-text { + font-weight: 600; + color: var(--text-primary); + min-width: 50px; + text-align: right; +} + +/* Steps */ +.steps-container { + display: flex; + flex-direction: column; + gap: 12px; +} + +.step { + display: flex; + align-items: center; + gap: 16px; + padding: 16px; + background: var(--bg-tertiary); + border-radius: 8px; + transition: all 0.3s ease; +} + +.step.active { + background: rgba(33, 150, 243, 0.1); + border-left: 3px solid var(--info); +} + +.step.completed { + background: rgba(76, 175, 80, 0.1); + border-left: 3px solid var(--success); +} + +.step.failed { + background: rgba(244, 67, 54, 0.1); + border-left: 3px solid var(--error); +} + +.step-icon { + width: 40px; + height: 40px; + border-radius: 50%; + display: flex; + align-items: center; + justify-content: center; + font-size: 1.2rem; + flex-shrink: 0; +} + +.step.pending .step-icon { + background: var(--bg-secondary); + color: var(--text-muted); +} + +.step.active .step-icon { + background: var(--info); + color: white; +} + +.step.completed .step-icon { + background: var(--success); + color: white; +} + +.step.failed .step-icon { + background: var(--error); + color: white; +} + +.step.skipped .step-icon { + background: var(--warning); + color: white; +} + +.step-content { + flex: 1; + min-width: 0; +} + +.step-name { + font-weight: 600; + margin-bottom: 4px; +} + +.step-description { + font-size: 0.85rem; + color: var(--text-secondary); +} + +.step-message { + font-size: 0.8rem; + color: var(--text-muted); + margin-top: 4px; + font-style: italic; +} + +.step-progress { + width: 80px; + flex-shrink: 0; +} + +.step-progress-bar { + height: 6px; + background: var(--bg-secondary); + border-radius: 3px; + overflow: hidden; +} + +.step-progress-fill { + height: 100%; + background: var(--info); + transition: width 0.3s ease; +} + +.step-progress-text { + font-size: 0.75rem; + color: var(--text-muted); + text-align: right; + margin-top: 4px; +} + +/* Spinner */ +.spinner { + width: 20px; + height: 20px; + border: 2px solid transparent; + border-top-color: currentColor; + border-radius: 50%; + animation: spin 1s linear infinite; +} + +@keyframes spin { + to { transform: rotate(360deg); } +} + +/* Preview Section */ +.preview-section { + margin-top: 30px; + padding-top: 20px; + border-top: 1px solid var(--border-color); +} + +.preview-section h4 { + margin-bottom: 15px; + color: var(--text-secondary); +} + +.preview-container { + background: var(--bg-tertiary); + border-radius: 8px; + overflow: hidden; + min-height: 200px; + display: flex; + align-items: center; + justify-content: center; +} + +.preview-placeholder { + color: var(--text-muted); + text-align: center; + padding: 40px; +} + +.preview-image { + max-width: 100%; + max-height: 400px; + object-fit: contain; +} + +.preview-video { + max-width: 100%; + max-height: 400px; +} + +/* History */ +.history-list { + display: flex; + flex-direction: column; + gap: 12px; +} + +.history-item { + display: flex; + align-items: center; + gap: 16px; + padding: 16px; + background: var(--bg-tertiary); + border-radius: 8px; + transition: transform 0.2s ease; +} + +.history-item:hover { + transform: translateX(5px); +} + +.history-item .subreddit { + color: var(--accent-primary); + font-size: 0.85rem; + font-weight: 600; +} + +.history-item .title { + font-weight: 500; + margin-top: 2px; + white-space: nowrap; + overflow: hidden; + text-overflow: ellipsis; +} + +.history-item .meta { + font-size: 0.8rem; + color: var(--text-muted); + margin-top: 4px; +} + +.history-item .actions { + margin-left: auto; + display: flex; + gap: 8px; +} + +.btn { + padding: 8px 16px; + border: none; + border-radius: 6px; + font-size: 0.85rem; + cursor: pointer; + transition: all 0.2s ease; + text-decoration: none; +} + +.btn-primary { + background: var(--accent-primary); + color: white; +} + +.btn-primary:hover { + background: var(--accent-secondary); +} + +.btn-secondary { + background: var(--bg-secondary); + color: var(--text-primary); + border: 1px solid var(--border-color); +} + +.btn-secondary:hover { + background: var(--bg-tertiary); +} + +.no-history { + color: var(--text-muted); + text-align: center; + padding: 40px; +} + +/* Footer */ +footer { + margin-top: 40px; + padding: 20px; + text-align: center; + border-top: 1px solid var(--border-color); + color: var(--text-muted); + font-size: 0.9rem; +} + +.connection-status { + display: flex; + align-items: center; + justify-content: center; + gap: 8px; + margin-top: 10px; +} + +.status-dot { + width: 10px; + height: 10px; + border-radius: 50%; + background: var(--text-muted); + transition: background 0.3s ease; +} + +.status-dot.connected { + background: var(--success); +} + +.status-dot.disconnected { + background: var(--error); +} + +/* Responsive */ +@media (max-width: 768px) { + .container { + padding: 10px; + } + + header h1 { + font-size: 1.8rem; + } + + .job-header { + flex-direction: column; + } + + .step { + flex-wrap: wrap; + } + + .step-progress { + width: 100%; + margin-top: 10px; + } + + .history-item { + flex-direction: column; + align-items: flex-start; + } + + .history-item .actions { + margin-left: 0; + margin-top: 12px; + width: 100%; + } + + .history-item .actions .btn { + flex: 1; + text-align: center; + } +} diff --git a/GUI/static/js/progress.js b/GUI/static/js/progress.js new file mode 100644 index 0000000..5e30210 --- /dev/null +++ b/GUI/static/js/progress.js @@ -0,0 +1,307 @@ +/** + * Reddit Video Maker Bot - Progress GUI JavaScript + * Real-time progress tracking via WebSocket + */ + +class ProgressTracker { + constructor() { + this.socket = null; + this.connected = false; + this.currentJob = null; + this.jobHistory = []; + + // DOM elements + this.elements = { + noJob: document.getElementById('no-job'), + jobInfo: document.getElementById('job-info'), + jobSubreddit: document.getElementById('job-subreddit'), + jobTitle: document.getElementById('job-title'), + jobStatusBadge: document.getElementById('job-status-badge'), + overallProgressFill: document.getElementById('overall-progress-fill'), + overallProgressText: document.getElementById('overall-progress-text'), + stepsContainer: document.getElementById('steps-container'), + previewContainer: document.getElementById('preview-container'), + historyList: document.getElementById('history-list'), + connectionDot: document.getElementById('connection-dot'), + connectionText: document.getElementById('connection-text'), + }; + + this.stepIcons = { + pending: '○', + in_progress: '◐', + completed: '✓', + failed: '✗', + skipped: '⊘', + }; + + this.init(); + } + + init() { + this.connectWebSocket(); + this.fetchInitialStatus(); + } + + connectWebSocket() { + const protocol = window.location.protocol === 'https:' ? 'wss:' : 'ws:'; + const wsUrl = `${protocol}//${window.location.host}`; + + this.socket = io(wsUrl + '/progress', { + transports: ['websocket', 'polling'], + reconnection: true, + reconnectionDelay: 1000, + reconnectionAttempts: Infinity, + }); + + this.socket.on('connect', () => { + this.connected = true; + this.updateConnectionStatus(true); + console.log('Connected to progress server'); + }); + + this.socket.on('disconnect', () => { + this.connected = false; + this.updateConnectionStatus(false); + console.log('Disconnected from progress server'); + }); + + this.socket.on('progress_update', (data) => { + this.handleProgressUpdate(data); + }); + + this.socket.on('connect_error', (error) => { + console.error('Connection error:', error); + this.updateConnectionStatus(false); + }); + } + + fetchInitialStatus() { + fetch('/api/status') + .then(response => response.json()) + .then(data => this.handleProgressUpdate(data)) + .catch(error => console.error('Error fetching status:', error)); + } + + updateConnectionStatus(connected) { + const { connectionDot, connectionText } = this.elements; + + if (connected) { + connectionDot.classList.add('connected'); + connectionDot.classList.remove('disconnected'); + connectionText.textContent = 'Connected'; + } else { + connectionDot.classList.remove('connected'); + connectionDot.classList.add('disconnected'); + connectionText.textContent = 'Disconnected - Reconnecting...'; + } + } + + handleProgressUpdate(data) { + this.currentJob = data.current_job; + this.jobHistory = data.job_history || []; + + this.renderCurrentJob(); + this.renderHistory(); + } + + renderCurrentJob() { + const { noJob, jobInfo, jobSubreddit, jobTitle, jobStatusBadge, + overallProgressFill, overallProgressText, stepsContainer, previewContainer } = this.elements; + + if (!this.currentJob) { + noJob.classList.remove('hidden'); + jobInfo.classList.add('hidden'); + return; + } + + noJob.classList.add('hidden'); + jobInfo.classList.remove('hidden'); + + // Update job info + jobSubreddit.textContent = `r/${this.currentJob.subreddit}`; + jobTitle.textContent = this.currentJob.title; + + // Update status badge + jobStatusBadge.textContent = this.formatStatus(this.currentJob.status); + jobStatusBadge.className = `status-badge ${this.currentJob.status}`; + + // Update overall progress + const progress = this.currentJob.overall_progress || 0; + overallProgressFill.style.width = `${progress}%`; + overallProgressText.textContent = `${Math.round(progress)}%`; + + // Render steps + this.renderSteps(stepsContainer, this.currentJob.steps); + + // Update preview + this.updatePreview(previewContainer, this.currentJob.steps); + } + + renderSteps(container, steps) { + container.innerHTML = ''; + + steps.forEach((step, index) => { + const stepEl = document.createElement('div'); + stepEl.className = `step ${this.getStepClass(step.status)}`; + + const icon = this.getStepIcon(step.status); + const isActive = step.status === 'in_progress'; + + stepEl.innerHTML = ` +
+ ${isActive ? '
' : icon} +
+
+
${step.name}
+
${step.description}
+ ${step.message ? `
${step.message}
` : ''} +
+ ${step.status === 'in_progress' ? ` +
+
+
+
+
${Math.round(step.progress)}%
+
+ ` : ''} + `; + + container.appendChild(stepEl); + }); + } + + getStepClass(status) { + const classMap = { + pending: 'pending', + in_progress: 'active', + completed: 'completed', + failed: 'failed', + skipped: 'skipped', + }; + return classMap[status] || 'pending'; + } + + getStepIcon(status) { + return this.stepIcons[status] || this.stepIcons.pending; + } + + formatStatus(status) { + const statusMap = { + pending: 'Pending', + in_progress: 'In Progress', + completed: 'Completed', + failed: 'Failed', + skipped: 'Skipped', + }; + return statusMap[status] || status; + } + + updatePreview(container, steps) { + // Find the latest step with a preview + let previewPath = null; + for (let i = steps.length - 1; i >= 0; i--) { + if (steps[i].preview_path) { + previewPath = steps[i].preview_path; + break; + } + } + + if (!previewPath) { + container.innerHTML = ` +
+

Preview will appear here during processing

+
+ `; + return; + } + + // Determine if it's an image or video + const extension = previewPath.split('.').pop().toLowerCase(); + const isVideo = ['mp4', 'webm', 'mov'].includes(extension); + + if (isVideo) { + container.innerHTML = ` + + `; + } else { + container.innerHTML = ` + Preview + `; + } + } + + renderHistory() { + const { historyList } = this.elements; + + if (!this.jobHistory || this.jobHistory.length === 0) { + historyList.innerHTML = '

No completed jobs yet

'; + return; + } + + historyList.innerHTML = ''; + + // Show most recent first + const sortedHistory = [...this.jobHistory].reverse(); + + sortedHistory.forEach(job => { + const item = document.createElement('div'); + item.className = 'history-item'; + + const duration = job.completed_at && job.created_at + ? this.formatDuration(job.completed_at - job.created_at) + : 'N/A'; + + const statusClass = job.status === 'completed' ? 'completed' : 'failed'; + + item.innerHTML = ` +
+ ${job.status === 'completed' ? '✓' : '✗'} +
+
+
r/${job.subreddit}
+
${job.title}
+
+ ${this.formatDate(job.created_at)} • ${duration} + ${job.error ? `• ${job.error}` : ''} +
+
+
+ ${job.output_path ? ` + + View Video + + ` : ''} +
+ `; + + historyList.appendChild(item); + }); + } + + formatDuration(seconds) { + if (seconds < 60) { + return `${Math.round(seconds)}s`; + } else if (seconds < 3600) { + const mins = Math.floor(seconds / 60); + const secs = Math.round(seconds % 60); + return `${mins}m ${secs}s`; + } else { + const hours = Math.floor(seconds / 3600); + const mins = Math.floor((seconds % 3600) / 60); + return `${hours}h ${mins}m`; + } + } + + formatDate(timestamp) { + const date = new Date(timestamp * 1000); + return date.toLocaleString(); + } +} + +// Initialize the progress tracker when DOM is ready +document.addEventListener('DOMContentLoaded', () => { + window.progressTracker = new ProgressTracker(); +}); diff --git a/README.md b/README.md index 8042755..b83a68f 100644 --- a/README.md +++ b/README.md @@ -1,142 +1,213 @@ -# Reddit Video Maker Bot 🎥 +# Reddit Video Maker Bot -All done WITHOUT video editing or asset compiling. Just pure ✨programming magic✨. +Automatically generate short-form videos from Reddit posts. Supports multiple TTS engines including Qwen3 TTS. -Created by Lewis Menelaws & [TMRRW](https://tmrrwinc.ca) +## Features - - - - - - +- **Multiple TTS Engines**: Qwen3 TTS (default), OpenAI TTS, ElevenLabs, TikTok, Google Translate, AWS Polly +- **Real-time Progress GUI**: Web-based dashboard showing video generation progress with live updates +- **Docker Support**: Fully containerized with docker-compose for easy deployment +- **Background Customization**: Multiple background videos and audio tracks included +- **Story Mode**: Special mode for narrative subreddits (r/nosleep, r/tifu, etc.) +- **AI Content Sorting**: Optional semantic similarity sorting for relevant posts - +## Quick Start with Docker -## Video Explainer +```bash +# Clone the repository +git clone https://github.com/elebumm/RedditVideoMakerBot.git +cd RedditVideoMakerBot -[![lewisthumbnail](https://user-images.githubusercontent.com/6053155/173631669-1d1b14ad-c478-4010-b57d-d79592a789f2.png) -](https://www.youtube.com/watch?v=3gjcY_00U1w) +# Create your config file +cp config.example.toml config.toml +# Edit config.toml with your credentials -## Motivation 🤔 +# Start with docker-compose +docker-compose up -d -These videos on TikTok, YouTube and Instagram get MILLIONS of views across all platforms and require very little effort. -The only original thing being done is the editing and gathering of all materials... +# View progress at http://localhost:5000 +``` -... but what if we can automate that process? 🤔 +## Manual Installation -## Disclaimers 🚨 +### Requirements -- **At the moment**, this repository won't attempt to upload this content through this bot. It will give you a file that - you will then have to upload manually. This is for the sake of avoiding any sort of community guideline issues. +- Python 3.10, 3.11, or 3.12 +- FFmpeg +- Playwright browsers -## Requirements +### Setup -- Python 3.10 -- Playwright (this should install automatically in installation) +```bash +# Clone repository +git clone https://github.com/elebumm/RedditVideoMakerBot.git +cd RedditVideoMakerBot -## Installation 👩‍💻 +# Create virtual environment +python -m venv venv +source venv/bin/activate # On Windows: .\venv\Scripts\activate -1. Clone this repository: - ```sh - git clone https://github.com/elebumm/RedditVideoMakerBot.git - cd RedditVideoMakerBot - ``` +# Install dependencies +pip install -r requirements.txt -2. Create and activate a virtual environment: - - On **Windows**: - ```sh - python -m venv ./venv - .\venv\Scripts\activate - ``` - - On **macOS and Linux**: - ```sh - python3 -m venv ./venv - source ./venv/bin/activate - ``` +# Install Playwright browsers +playwright install +playwright install-deps -3. Install the required dependencies: - ```sh - pip install -r requirements.txt - ``` +# Download spaCy model (for story mode) +python -m spacy download en_core_web_sm -4. Install Playwright and its dependencies: - ```sh - python -m playwright install - python -m playwright install-deps - ``` +# Run the bot +python main.py +``` ---- +## Configuration -**EXPERIMENTAL!!!!** +Create a `config.toml` file in the project root. The bot will prompt you for settings on first run. - - On macOS and Linux (Debian, Arch, Fedora, CentOS, and based on those), you can run an installation script that will automatically install steps 1 to 3. (requires bash) - - `bash <(curl -sL https://raw.githubusercontent.com/elebumm/RedditVideoMakerBot/master/install.sh)` - - This can also be used to update the installation +### Reddit API Setup ---- +1. Go to [Reddit Apps](https://www.reddit.com/prefs/apps) +2. Create a new app with type "script" +3. Note your `client_id` and `client_secret` + +### Qwen TTS Setup (Default) + +Qwen TTS requires a running Qwen TTS server: + +```toml +[settings.tts] +voice_choice = "qwentts" +qwen_api_url = "http://localhost:8080" +qwen_email = "your_email@example.com" +qwen_password = "your_password" +qwen_speaker = "Vivian" # Options: Chelsie, Ethan, Vivian, Asher, Aria, Oliver, Emma, Noah, Sophia +qwen_language = "English" +qwen_instruct = "Warm, friendly, conversational." +``` -5. Run the bot: - ```sh - python main.py - ``` +### TTS Options -6. Visit [the Reddit Apps page](https://www.reddit.com/prefs/apps), and set up an app that is a "script". Paste any URL in the redirect URL field, for example: `https://jasoncameron.dev`. +| Provider | Key | Requirements | +|----------|-----|--------------| +| Qwen TTS | `qwentts` | Qwen TTS server | +| OpenAI | `openai` | API key | +| ElevenLabs | `elevenlabs` | API key | +| TikTok | `tiktok` | Session ID | +| Google Translate | `googletranslate` | None (free) | +| AWS Polly | `awspolly` | AWS credentials | +| Streamlabs Polly | `streamlabspolly` | None (rate limited) | -7. The bot will prompt you to fill in your details to connect to the Reddit API and configure the bot to your liking. +## Progress GUI -8. Enjoy 😎 +The bot includes a real-time progress tracking GUI. + +```bash +# Enable GUI mode +export REDDIT_BOT_GUI=true +python main.py + +# Or run GUI standalone +python progress_gui.py +``` -9. If you need to reconfigure the bot, simply open the `config.toml` file and delete the lines that need to be changed. On the next run of the bot, it will help you reconfigure those options. +Access at: http://localhost:5000 -(Note: If you encounter any errors installing or running the bot, try using `python3` or `pip3` instead of `python` or `pip`.) +### Features + +- Real-time progress updates via WebSocket +- Step-by-step visualization +- Preview images during generation +- Job history tracking -For a more detailed guide about the bot, please refer to the [documentation](https://reddit-video-maker-bot.netlify.app/). +## Docker Deployment -## Video +### Using Docker Compose -https://user-images.githubusercontent.com/66544866/173453972-6526e4e6-c6ef-41c5-ab40-5d275e724e7c.mp4 +```yaml +# docker-compose.yml +services: + reddit-video-bot: + build: . + ports: + - "5000:5000" + volumes: + - ./config.toml:/app/config.toml:ro + - ./results:/app/results + environment: + - REDDIT_BOT_GUI=true +``` + +### Environment Variables + +All config options can be set via environment variables: + +| Variable | Description | +|----------|-------------| +| `REDDIT_CLIENT_ID` | Reddit API client ID | +| `REDDIT_CLIENT_SECRET` | Reddit API client secret | +| `REDDIT_USERNAME` | Reddit username | +| `REDDIT_PASSWORD` | Reddit password | +| `REDDIT_SUBREDDIT` | Target subreddit | +| `TTS_VOICE_CHOICE` | TTS provider | +| `QWEN_API_URL` | Qwen TTS server URL | +| `QWEN_EMAIL` | Qwen TTS email | +| `QWEN_PASSWORD` | Qwen TTS password | + +## Project Structure + +``` +RedditVideoMakerBot/ +├── main.py # Entry point +├── progress_gui.py # Progress GUI server +├── config.toml # Configuration file +├── TTS/ # TTS engine modules +│ ├── qwen_tts.py # Qwen TTS provider +│ ├── openai_tts.py # OpenAI TTS provider +│ └── ... +├── video_creation/ # Video generation +├── reddit/ # Reddit API +├── utils/ # Utilities +│ ├── progress.py # Progress tracking +│ └── settings.py # Configuration +├── GUI/ # Web GUI templates +│ ├── progress.html +│ └── static/ +├── Dockerfile +└── docker-compose.yml +``` + +## Output + +Generated videos are saved to `results/{subreddit}/`. + +## Troubleshooting + +### Common Issues + +**FFmpeg not found** +```bash +# Ubuntu/Debian +sudo apt install ffmpeg + +# macOS +brew install ffmpeg + +# Windows +# Download from https://ffmpeg.org/download.html +``` + +**Playwright browsers missing** +```bash +playwright install +playwright install-deps +``` + +**TTS authentication failed** +- Verify your Qwen TTS server is running +- Check credentials in config.toml +- Ensure the API URL is correct + +## License -## Contributing & Ways to improve 📈 - -In its current state, this bot does exactly what it needs to do. However, improvements can always be made! - -I have tried to simplify the code so anyone can read it and start contributing at any skill level. Don't be shy :) contribute! - -- [ ] Creating better documentation and adding a command line interface. -- [x] Allowing the user to choose background music for their videos. -- [x] Allowing users to choose a reddit thread instead of being randomized. -- [x] Allowing users to choose a background that is picked instead of the Minecraft one. -- [x] Allowing users to choose between any subreddit. -- [x] Allowing users to change voice. -- [x] Checks if a video has already been created -- [x] Light and Dark modes -- [x] NSFW post filter - -Please read our [contributing guidelines](CONTRIBUTING.md) for more detailed information. - -### For any questions or support join the [Discord](https://discord.gg/qfQSx45xCV) server - -## Developers and maintainers. - -Elebumm (Lewis#6305) - https://github.com/elebumm (Founder) - -Jason Cameron - https://github.com/JasonLovesDoggo (Maintainer) - -Simon (OpenSourceSimon) - https://github.com/OpenSourceSimon - -CallumIO (c.#6837) - https://github.com/CallumIO - -Verq (Verq#2338) - https://github.com/CordlessCoder - -LukaHietala (Pix.#0001) - https://github.com/LukaHietala - -Freebiell (Freebie#3263) - https://github.com/FreebieII - -Aman Raza (electro199#8130) - https://github.com/electro199 - -Cyteon (cyteon) - https://github.com/cyteon - - -## LICENSE [Roboto Fonts](https://fonts.google.com/specimen/Roboto/about) are licensed under [Apache License V2](https://www.apache.org/licenses/LICENSE-2.0) diff --git a/TTS/pyttsx.py b/TTS/pyttsx.py deleted file mode 100644 index bf47601..0000000 --- a/TTS/pyttsx.py +++ /dev/null @@ -1,42 +0,0 @@ -import random - -import pyttsx3 - -from utils import settings - - -class pyttsx: - def __init__(self): - self.max_chars = 5000 - self.voices = [] - - def run( - self, - text: str, - filepath: str, - random_voice=False, - ): - voice_id = settings.config["settings"]["tts"]["python_voice"] - voice_num = settings.config["settings"]["tts"]["py_voice_num"] - if voice_id == "" or voice_num == "": - voice_id = 2 - voice_num = 3 - raise ValueError("set pyttsx values to a valid value, switching to defaults") - else: - voice_id = int(voice_id) - voice_num = int(voice_num) - for i in range(voice_num): - self.voices.append(i) - i = +1 - if random_voice: - voice_id = self.randomvoice() - engine = pyttsx3.init() - voices = engine.getProperty("voices") - engine.setProperty( - "voice", voices[voice_id].id - ) # changing index changes voices but ony 0 and 1 are working here - engine.save_to_file(text, f"{filepath}") - engine.runAndWait() - - def randomvoice(self): - return random.choice(self.voices) diff --git a/TTS/qwen_tts.py b/TTS/qwen_tts.py new file mode 100644 index 0000000..01c47a0 --- /dev/null +++ b/TTS/qwen_tts.py @@ -0,0 +1,165 @@ +import random +import requests + +from utils import settings + + +class QwenTTS: + """ + A Text-to-Speech engine that uses the Qwen3 TTS API endpoint to generate audio from text. + + This TTS provider connects to a Qwen TTS server and authenticates using email/password + to obtain a bearer token, then sends TTS requests. + + Attributes: + max_chars (int): Maximum number of characters allowed per API call. + api_base_url (str): Base URL for the Qwen TTS API server. + email (str): Email for authentication. + password (str): Password for authentication. + token (str): Bearer token obtained after login. + available_voices (list): List of supported Qwen TTS voices. + """ + + # Available Qwen TTS speakers + AVAILABLE_SPEAKERS = [ + "Chelsie", + "Ethan", + "Vivian", + "Asher", + "Aria", + "Oliver", + "Emma", + "Noah", + "Sophia", + ] + + # Available languages + AVAILABLE_LANGUAGES = [ + "English", + "Chinese", + "Spanish", + "French", + "German", + "Japanese", + "Korean", + "Portuguese", + "Russian", + "Italian", + "Arabic", + "Hindi", + ] + + def __init__(self): + self.max_chars = 5000 + self.token = None + + # Get configuration + tts_config = settings.config["settings"]["tts"] + + self.api_base_url = tts_config.get("qwen_api_url", "http://localhost:8080") + if self.api_base_url.endswith("/"): + self.api_base_url = self.api_base_url[:-1] + + self.email = tts_config.get("qwen_email") + self.password = tts_config.get("qwen_password") + + if not self.email or not self.password: + raise ValueError( + "Qwen TTS requires 'qwen_email' and 'qwen_password' in settings! " + "Please configure these in your config.toml file." + ) + + self.available_voices = self.AVAILABLE_SPEAKERS + self._authenticate() + + def _authenticate(self): + """ + Authenticate with the Qwen TTS server and obtain a bearer token. + """ + login_url = f"{self.api_base_url}/api/agent/api/auth/login" + payload = {"email": self.email, "password": self.password} + headers = {"Content-Type": "application/json"} + + try: + response = requests.post(login_url, json=payload, headers=headers, timeout=30) + if response.status_code != 200: + raise RuntimeError( + f"Qwen TTS authentication failed: {response.status_code} {response.text}" + ) + + data = response.json() + self.token = data.get("access_token") + if not self.token: + raise RuntimeError("Qwen TTS authentication failed: No access_token in response") + + except requests.exceptions.RequestException as e: + raise RuntimeError(f"Failed to connect to Qwen TTS server: {str(e)}") + + def get_available_voices(self): + """ + Return a list of supported voices for Qwen TTS. + """ + return self.AVAILABLE_SPEAKERS + + def randomvoice(self): + """ + Select and return a random voice from the available voices. + """ + return random.choice(self.available_voices) + + def run(self, text: str, filepath: str, random_voice: bool = False): + """ + Convert the provided text to speech and save the resulting audio to the specified filepath. + + Args: + text (str): The input text to convert. + filepath (str): The file path where the generated audio will be saved. + random_voice (bool): If True, select a random voice from the available voices. + """ + tts_config = settings.config["settings"]["tts"] + + # Choose voice based on configuration or randomly if requested + if random_voice: + speaker = self.randomvoice() + else: + speaker = tts_config.get("qwen_speaker", "Vivian") + + # Get language and instruct settings + language = tts_config.get("qwen_language", "English") + instruct = tts_config.get("qwen_instruct", "Warm, friendly, conversational.") + + # Build TTS request + tts_url = f"{self.api_base_url}/api/qwen-tts" + payload = { + "text": text, + "language": language, + "speaker": speaker, + "instruct": instruct, + } + headers = { + "Authorization": f"Bearer {self.token}", + "Content-Type": "application/json", + } + + try: + response = requests.post(tts_url, json=payload, headers=headers, timeout=120) + + # Handle token expiration - re-authenticate and retry + if response.status_code == 401: + self._authenticate() + headers["Authorization"] = f"Bearer {self.token}" + response = requests.post(tts_url, json=payload, headers=headers, timeout=120) + + if response.status_code != 200: + raise RuntimeError( + f"Qwen TTS generation failed: {response.status_code} {response.text}" + ) + + # Write the audio response to file + with open(filepath, "wb") as f: + f.write(response.content) + + except requests.exceptions.Timeout: + raise RuntimeError("Qwen TTS request timed out. The server may be overloaded.") + except requests.exceptions.RequestException as e: + raise RuntimeError(f"Failed to generate audio with Qwen TTS: {str(e)}") diff --git a/config.example.toml b/config.example.toml new file mode 100644 index 0000000..a720548 --- /dev/null +++ b/config.example.toml @@ -0,0 +1,80 @@ +# Reddit Video Maker Bot Configuration +# Copy this file to config.toml and fill in your credentials + +[reddit.creds] +client_id = "your_reddit_client_id" +client_secret = "your_reddit_client_secret" +username = "your_reddit_username" +password = "your_reddit_password" +2fa = false + +[reddit.thread] +random = true +subreddit = "AskReddit" +post_id = "" +max_comment_length = 500 +min_comment_length = 1 +post_lang = "" +min_comments = 20 + +[ai] +ai_similarity_enabled = false +ai_similarity_keywords = "" + +[settings] +allow_nsfw = false +theme = "dark" +times_to_run = 1 +opacity = 0.9 +storymode = false +storymodemethod = 1 +storymode_max_length = 1000 +resolution_w = 1080 +resolution_h = 1920 +zoom = 1 +channel_name = "Reddit Tales" + +[settings.background] +background_video = "minecraft" +background_audio = "lofi" +background_audio_volume = 0.15 +enable_extra_audio = false +background_thumbnail = false +background_thumbnail_font_family = "arial" +background_thumbnail_font_size = 96 +background_thumbnail_font_color = "255,255,255" + +[settings.tts] +# TTS Provider: qwentts, elevenlabs, tiktok, googletranslate, awspolly, streamlabspolly, openai +voice_choice = "qwentts" +random_voice = true +silence_duration = 0.3 +no_emojis = false + +# Qwen TTS Settings (default) +qwen_api_url = "http://localhost:8080" +qwen_email = "your_email@example.com" +qwen_password = "your_password" +qwen_speaker = "Vivian" +qwen_language = "English" +qwen_instruct = "Warm, friendly, conversational." + +# OpenAI TTS Settings +openai_api_url = "https://api.openai.com/v1/" +openai_api_key = "" +openai_voice_name = "alloy" +openai_model = "tts-1" + +# ElevenLabs Settings +elevenlabs_voice_name = "Bella" +elevenlabs_api_key = "" + +# TikTok TTS Settings +tiktok_voice = "en_us_001" +tiktok_sessionid = "" + +# AWS Polly Settings +aws_polly_voice = "Matthew" + +# Streamlabs Polly Settings +streamlabs_polly_voice = "Matthew" diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000..b87da9e --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,85 @@ +version: '3.8' + +services: + reddit-video-bot: + build: + context: . + dockerfile: Dockerfile + container_name: reddit-video-bot + restart: unless-stopped + ports: + - "5000:5000" + volumes: + - ./config.toml:/app/config.toml:ro + - ./results:/app/results + - ./assets:/app/assets + environment: + - REDDIT_BOT_GUI=true + # Reddit Credentials (can also be set in config.toml) + - REDDIT_CLIENT_ID=${REDDIT_CLIENT_ID:-} + - REDDIT_CLIENT_SECRET=${REDDIT_CLIENT_SECRET:-} + - REDDIT_USERNAME=${REDDIT_USERNAME:-} + - REDDIT_PASSWORD=${REDDIT_PASSWORD:-} + - REDDIT_2FA=${REDDIT_2FA:-false} + # Reddit Thread Settings + - REDDIT_SUBREDDIT=${REDDIT_SUBREDDIT:-AskReddit} + - REDDIT_RANDOM=${REDDIT_RANDOM:-true} + # TTS Settings (Qwen TTS) + - TTS_VOICE_CHOICE=${TTS_VOICE_CHOICE:-qwentts} + - QWEN_API_URL=${QWEN_API_URL:-http://qwen-tts:8080} + - QWEN_EMAIL=${QWEN_EMAIL:-} + - QWEN_PASSWORD=${QWEN_PASSWORD:-} + - QWEN_SPEAKER=${QWEN_SPEAKER:-Vivian} + - QWEN_LANGUAGE=${QWEN_LANGUAGE:-English} + networks: + - reddit-bot-network + depends_on: + - qwen-tts + + qwen-tts: + image: qwen-tts-server:latest + container_name: qwen-tts + restart: unless-stopped + ports: + - "8080:8080" + environment: + - TTS_MODEL=qwen3-tts + networks: + - reddit-bot-network + # Uncomment if using GPU + # deploy: + # resources: + # reservations: + # devices: + # - driver: nvidia + # count: 1 + # capabilities: [gpu] + + # Optional: Progress GUI only mode + progress-gui: + build: + context: . + dockerfile: Dockerfile + container_name: reddit-video-gui + restart: unless-stopped + ports: + - "5001:5000" + volumes: + - ./config.toml:/app/config.toml:ro + - ./results:/app/results + - ./assets:/app/assets + environment: + - REDDIT_BOT_GUI=true + command: python progress_gui.py + networks: + - reddit-bot-network + profiles: + - gui-only + +networks: + reddit-bot-network: + driver: bridge + +volumes: + results: + assets: diff --git a/docker-entrypoint.sh b/docker-entrypoint.sh new file mode 100644 index 0000000..e2af214 --- /dev/null +++ b/docker-entrypoint.sh @@ -0,0 +1,86 @@ +#!/bin/bash +set -e + +# Create config from environment if not exists +if [ ! -f /app/config.toml ]; then + echo "Creating config.toml from template..." + + # Check if all required environment variables are set + if [ -z "$REDDIT_CLIENT_ID" ] || [ -z "$REDDIT_CLIENT_SECRET" ] || [ -z "$REDDIT_USERNAME" ] || [ -z "$REDDIT_PASSWORD" ]; then + echo "Warning: Reddit credentials not set via environment variables." + echo "Please set REDDIT_CLIENT_ID, REDDIT_CLIENT_SECRET, REDDIT_USERNAME, REDDIT_PASSWORD" + echo "Or mount your config.toml file to /app/config.toml" + fi + + # Create basic config from environment + cat > /app/config.toml << EOF +[reddit.creds] +client_id = "${REDDIT_CLIENT_ID:-}" +client_secret = "${REDDIT_CLIENT_SECRET:-}" +username = "${REDDIT_USERNAME:-}" +password = "${REDDIT_PASSWORD:-}" +2fa = ${REDDIT_2FA:-false} + +[reddit.thread] +random = ${REDDIT_RANDOM:-true} +subreddit = "${REDDIT_SUBREDDIT:-AskReddit}" +post_id = "${REDDIT_POST_ID:-}" +max_comment_length = ${MAX_COMMENT_LENGTH:-500} +min_comment_length = ${MIN_COMMENT_LENGTH:-1} +post_lang = "${POST_LANG:-}" +min_comments = ${MIN_COMMENTS:-20} + +[ai] +ai_similarity_enabled = ${AI_SIMILARITY_ENABLED:-false} +ai_similarity_keywords = "${AI_SIMILARITY_KEYWORDS:-}" + +[settings] +allow_nsfw = ${ALLOW_NSFW:-false} +theme = "${THEME:-dark}" +times_to_run = ${TIMES_TO_RUN:-1} +opacity = ${OPACITY:-0.9} +storymode = ${STORYMODE:-false} +storymodemethod = ${STORYMODEMETHOD:-1} +storymode_max_length = ${STORYMODE_MAX_LENGTH:-1000} +resolution_w = ${RESOLUTION_W:-1080} +resolution_h = ${RESOLUTION_H:-1920} +zoom = ${ZOOM:-1} +channel_name = "${CHANNEL_NAME:-Reddit Tales}" + +[settings.background] +background_video = "${BACKGROUND_VIDEO:-minecraft}" +background_audio = "${BACKGROUND_AUDIO:-lofi}" +background_audio_volume = ${BACKGROUND_AUDIO_VOLUME:-0.15} +enable_extra_audio = ${ENABLE_EXTRA_AUDIO:-false} +background_thumbnail = ${BACKGROUND_THUMBNAIL:-false} +background_thumbnail_font_family = "${THUMBNAIL_FONT_FAMILY:-arial}" +background_thumbnail_font_size = ${THUMBNAIL_FONT_SIZE:-96} +background_thumbnail_font_color = "${THUMBNAIL_FONT_COLOR:-255,255,255}" + +[settings.tts] +voice_choice = "${TTS_VOICE_CHOICE:-qwentts}" +random_voice = ${TTS_RANDOM_VOICE:-true} +elevenlabs_voice_name = "${ELEVENLABS_VOICE_NAME:-Bella}" +elevenlabs_api_key = "${ELEVENLABS_API_KEY:-}" +aws_polly_voice = "${AWS_POLLY_VOICE:-Matthew}" +streamlabs_polly_voice = "${STREAMLABS_POLLY_VOICE:-Matthew}" +tiktok_voice = "${TIKTOK_VOICE:-en_us_001}" +tiktok_sessionid = "${TIKTOK_SESSIONID:-}" +silence_duration = ${TTS_SILENCE_DURATION:-0.3} +no_emojis = ${TTS_NO_EMOJIS:-false} +openai_api_url = "${OPENAI_API_URL:-https://api.openai.com/v1/}" +openai_api_key = "${OPENAI_API_KEY:-}" +openai_voice_name = "${OPENAI_VOICE_NAME:-alloy}" +openai_model = "${OPENAI_MODEL:-tts-1}" +qwen_api_url = "${QWEN_API_URL:-http://localhost:8080}" +qwen_email = "${QWEN_EMAIL:-}" +qwen_password = "${QWEN_PASSWORD:-}" +qwen_speaker = "${QWEN_SPEAKER:-Vivian}" +qwen_language = "${QWEN_LANGUAGE:-English}" +qwen_instruct = "${QWEN_INSTRUCT:-Warm, friendly, conversational.}" +EOF + echo "Config file created successfully!" +fi + +# Execute the command passed to docker run +exec "$@" diff --git a/main.py b/main.py index 742fedf..0da1508 100755 --- a/main.py +++ b/main.py @@ -1,10 +1,15 @@ #!/usr/bin/env python +""" +Reddit Video Maker Bot +Generates short-form videos from Reddit posts with Qwen TTS. +""" import math +import os import sys from os import name from pathlib import Path from subprocess import Popen -from typing import Dict, NoReturn +from typing import Dict, NoReturn, Optional from prawcore import ResponseException @@ -15,6 +20,7 @@ from utils.console import print_markdown, print_step, print_substep from utils.ffmpeg_install import ffmpeg_install from utils.id import extract_id from utils.version import checkversion +from utils.progress import progress_tracker from video_creation.background import ( chop_background, download_background_audio, @@ -25,7 +31,10 @@ from video_creation.final_video import make_final_video from video_creation.screenshot_downloader import get_screenshots_of_reddit_posts from video_creation.voices import save_text_to_mp3 -__VERSION__ = "3.4.0" +__VERSION__ = "4.0.0" + +# Check if GUI mode is enabled +GUI_MODE = os.environ.get("REDDIT_BOT_GUI", "false").lower() == "true" print( """ @@ -38,7 +47,7 @@ print( """ ) print_markdown( - "### Thanks for using this tool! Feel free to contribute to this project on GitHub! If you have any questions, feel free to join my Discord server or submit a GitHub issue. You can find solutions to many common problems in the documentation: https://reddit-video-maker-bot.netlify.app/" + "### Reddit Video Maker Bot v4.0 - Now with Qwen TTS and Progress GUI!" ) checkversion(__VERSION__) @@ -46,25 +55,88 @@ reddit_id: str reddit_object: Dict[str, str | list] -def main(POST_ID=None) -> None: +def main(POST_ID: Optional[str] = None) -> None: + """Main video generation function with progress tracking.""" global reddit_id, reddit_object - reddit_object = get_subreddit_threads(POST_ID) - reddit_id = extract_id(reddit_object) - print_substep(f"Thread ID is {reddit_id}", style="bold blue") - length, number_of_comments = save_text_to_mp3(reddit_object) - length = math.ceil(length) - get_screenshots_of_reddit_posts(reddit_object, number_of_comments) - bg_config = { - "video": get_background_config("video"), - "audio": get_background_config("audio"), - } - download_background_video(bg_config["video"]) - download_background_audio(bg_config["audio"]) - chop_background(bg_config, length, reddit_object) - make_final_video(number_of_comments, length, reddit_object, bg_config) - - -def run_many(times) -> None: + + try: + # Step 1: Fetch Reddit Post + progress_tracker.start_step("fetch_reddit", "Connecting to Reddit API...") + + reddit_object = get_subreddit_threads(POST_ID) + reddit_id = extract_id(reddit_object) + + # Start job tracking + progress_tracker.start_job( + reddit_id=reddit_id, + title=reddit_object.get("thread_title", "Unknown"), + subreddit=reddit_object.get("subreddit", "Unknown"), + ) + + progress_tracker.update_step_progress("fetch_reddit", 50, f"Found post: {reddit_id}") + print_substep(f"Thread ID is {reddit_id}", style="bold blue") + progress_tracker.complete_step("fetch_reddit", f"Loaded {len(reddit_object.get('comments', []))} comments") + + # Step 2: Generate TTS Audio + progress_tracker.start_step("generate_tts", "Initializing TTS engine...") + length, number_of_comments = save_text_to_mp3(reddit_object) + length = math.ceil(length) + progress_tracker.complete_step("generate_tts", f"Generated audio for {number_of_comments} comments ({length}s)") + + # Step 3: Capture Screenshots + progress_tracker.start_step("capture_screenshots", "Launching browser...") + get_screenshots_of_reddit_posts(reddit_object, number_of_comments) + + # Set preview for screenshots + screenshot_preview = f"assets/temp/{reddit_id}/png/title.png" + if os.path.exists(screenshot_preview): + progress_tracker.set_step_preview("capture_screenshots", f"/assets/temp/{reddit_id}/png/title.png") + + progress_tracker.complete_step("capture_screenshots", f"Captured {number_of_comments + 1} screenshots") + + # Step 4: Download Background + progress_tracker.start_step("download_background", "Loading background config...") + bg_config = { + "video": get_background_config("video"), + "audio": get_background_config("audio"), + } + progress_tracker.update_step_progress("download_background", 30, "Downloading video background...") + download_background_video(bg_config["video"]) + progress_tracker.update_step_progress("download_background", 70, "Downloading audio background...") + download_background_audio(bg_config["audio"]) + progress_tracker.complete_step("download_background", "Background assets ready") + + # Step 5: Process Background + progress_tracker.start_step("process_background", "Chopping background to fit...") + chop_background(bg_config, length, reddit_object) + progress_tracker.complete_step("process_background", f"Background prepared for {length}s video") + + # Step 6: Compose Video + progress_tracker.start_step("compose_video", "Starting video composition...") + make_final_video(number_of_comments, length, reddit_object, bg_config) + progress_tracker.complete_step("compose_video", "Video rendered successfully") + + # Step 7: Finalize + progress_tracker.start_step("finalize", "Cleaning up temporary files...") + subreddit = reddit_object.get("subreddit", "Unknown") + output_path = f"/results/{subreddit}/" + progress_tracker.complete_step("finalize", "Video generation complete!") + + # Mark job as completed + progress_tracker.complete_job(output_path=output_path) + print_step("Video generation completed successfully!") + + except Exception as e: + # Handle errors and update progress + current_step = progress_tracker.get_current_step() + if current_step: + progress_tracker.fail_step(current_step.id, str(e)) + progress_tracker.fail_job(str(e)) + raise + + +def run_many(times: int) -> None: + """Run video generation multiple times.""" for x in range(1, times + 1): print_step( f'on the {x}{("th", "st", "nd", "rd", "th", "th", "th", "th", "th", "th")[x % 10]} iteration of {times}' @@ -74,6 +146,7 @@ def run_many(times) -> None: def shutdown() -> NoReturn: + """Clean up and exit.""" if "reddit_id" in globals(): print_markdown("## Clearing temp files") cleanup(reddit_id) @@ -82,12 +155,22 @@ def shutdown() -> NoReturn: sys.exit() +def start_gui_server(): + """Start the progress GUI server in background.""" + from progress_gui import run_gui_background + print_step("Starting Progress GUI server...") + run_gui_background() + print_substep("Progress GUI available at http://localhost:5000", style="bold green") + + if __name__ == "__main__": if sys.version_info.major != 3 or sys.version_info.minor not in [10, 11, 12]: print( - "Hey! Congratulations, you've made it so far (which is pretty rare with no Python 3.10). Unfortunately, this program only works on Python 3.10. Please install Python 3.10 and try again." + "This program requires Python 3.10, 3.11, or 3.12. " + "Please install a compatible Python version and try again." ) sys.exit() + ffmpeg_install() directory = Path().absolute() config = settings.check_toml( @@ -95,15 +178,31 @@ if __name__ == "__main__": ) config is False and sys.exit() + # Validate Qwen TTS settings if selected + if config["settings"]["tts"]["voice_choice"].lower() == "qwentts": + if not config["settings"]["tts"].get("qwen_email") or not config["settings"]["tts"].get("qwen_password"): + print_substep( + "Qwen TTS requires 'qwen_email' and 'qwen_password' in config! " + "Please configure these settings.", + "bold red", + ) + sys.exit() + + # Validate TikTok settings if selected if ( not settings.config["settings"]["tts"]["tiktok_sessionid"] or settings.config["settings"]["tts"]["tiktok_sessionid"] == "" - ) and config["settings"]["tts"]["voice_choice"] == "tiktok": + ) and config["settings"]["tts"]["voice_choice"].lower() == "tiktok": print_substep( - "TikTok voice requires a sessionid! Check our documentation on how to obtain one.", + "TikTok voice requires a sessionid! Check documentation on how to obtain one.", "bold red", ) sys.exit() + + # Start GUI server if enabled + if GUI_MODE: + start_gui_server() + try: if config["reddit"]["thread"]["post_id"]: for index, post_id in enumerate(config["reddit"]["thread"]["post_id"].split("+")): @@ -127,8 +226,9 @@ if __name__ == "__main__": config["settings"]["tts"]["tiktok_sessionid"] = "REDACTED" config["settings"]["tts"]["elevenlabs_api_key"] = "REDACTED" config["settings"]["tts"]["openai_api_key"] = "REDACTED" + config["settings"]["tts"]["qwen_password"] = "REDACTED" print_step( - f"Sorry, something went wrong with this version! Try again, and feel free to report this issue at GitHub or the Discord community.\n" + f"Sorry, something went wrong! Try again, and feel free to report this issue on GitHub.\n" f"Version: {__VERSION__} \n" f"Error: {err} \n" f'Config: {config["settings"]}' diff --git a/progress_gui.py b/progress_gui.py new file mode 100644 index 0000000..98485cd --- /dev/null +++ b/progress_gui.py @@ -0,0 +1,133 @@ +#!/usr/bin/env python +""" +Progress GUI for Reddit Video Maker Bot. +Real-time progress tracking with steps and previews. +""" +import os +import json +import threading +import webbrowser +from pathlib import Path + +from flask import Flask, render_template, send_from_directory, jsonify, request +from flask_socketio import SocketIO, emit + +from utils.progress import progress_tracker + +# Configuration +HOST = "0.0.0.0" +PORT = 5000 + +# Configure Flask app +app = Flask(__name__, template_folder="GUI", static_folder="GUI/static") +app.secret_key = os.urandom(24) + +# Configure SocketIO for real-time updates +socketio = SocketIO(app, cors_allowed_origins="*", async_mode="gevent") + + +# Progress update callback +def broadcast_progress(data): + """Broadcast progress updates to all connected clients.""" + socketio.emit("progress_update", data, namespace="/progress") + + +# Register the callback +progress_tracker.add_update_callback(broadcast_progress) + + +@app.after_request +def after_request(response): + """Ensure responses aren't cached.""" + response.headers["Cache-Control"] = "no-cache, no-store, must-revalidate" + response.headers["Expires"] = 0 + response.headers["Pragma"] = "no-cache" + return response + + +@app.route("/") +def index(): + """Main progress dashboard.""" + return render_template("progress.html") + + +@app.route("/api/status") +def get_status(): + """Get current progress status.""" + return jsonify(progress_tracker.get_status()) + + +@app.route("/api/history") +def get_history(): + """Get job history.""" + return jsonify({ + "jobs": [job.to_dict() for job in progress_tracker.job_history] + }) + + +# Serve static files +@app.route("/static/") +def static_files(filename): + """Serve static files.""" + return send_from_directory("GUI/static", filename) + + +# Serve result videos +@app.route("/results/") +def results(name): + """Serve result videos.""" + return send_from_directory("results", name) + + +# Serve preview images +@app.route("/preview/") +def previews(name): + """Serve preview images.""" + return send_from_directory("assets/temp", name) + + +# Serve temp assets (screenshots, audio visualizations) +@app.route("/assets/") +def assets(name): + """Serve asset files.""" + return send_from_directory("assets", name) + + +# SocketIO Events +@socketio.on("connect", namespace="/progress") +def handle_connect(): + """Handle client connection.""" + emit("progress_update", progress_tracker.get_status()) + + +@socketio.on("disconnect", namespace="/progress") +def handle_disconnect(): + """Handle client disconnection.""" + pass + + +@socketio.on("request_status", namespace="/progress") +def handle_request_status(): + """Handle status request from client.""" + emit("progress_update", progress_tracker.get_status()) + + +def run_gui(open_browser=True): + """Run the progress GUI server.""" + if open_browser: + webbrowser.open(f"http://localhost:{PORT}", new=2) + + print(f"Progress GUI running at http://localhost:{PORT}") + socketio.run(app, host=HOST, port=PORT, debug=False) + + +def run_gui_background(): + """Run the GUI server in a background thread.""" + thread = threading.Thread(target=lambda: socketio.run(app, host=HOST, port=PORT, debug=False, use_reloader=False)) + thread.daemon = True + thread.start() + return thread + + +if __name__ == "__main__": + run_gui() diff --git a/ptt.py b/ptt.py deleted file mode 100644 index 6b49ef6..0000000 --- a/ptt.py +++ /dev/null @@ -1,10 +0,0 @@ -import pyttsx3 - -engine = pyttsx3.init() -voices = engine.getProperty("voices") -for voice in voices: - print(voice, voice.id) - engine.setProperty("voice", voice.id) - engine.say("Hello World!") - engine.runAndWait() - engine.stop() diff --git a/requirements.txt b/requirements.txt index 7aa38ee..bc80d05 100644 --- a/requirements.txt +++ b/requirements.txt @@ -8,9 +8,9 @@ requests==2.32.3 rich==13.9.4 toml==0.10.2 translators==5.9.9 -pyttsx3==2.98 tomlkit==0.13.2 Flask==3.1.1 +Flask-SocketIO==5.3.6 clean-text==0.6.0 unidecode==1.4.0 spacy==3.8.7 @@ -19,3 +19,5 @@ transformers==4.52.4 ffmpeg-python==0.2.0 elevenlabs==1.57.0 yt-dlp==2025.10.22 +gevent==24.2.1 +gevent-websocket==0.10.1 diff --git a/utils/.config.template.toml b/utils/.config.template.toml index 9185a29..99e36bf 100644 --- a/utils/.config.template.toml +++ b/utils/.config.template.toml @@ -44,7 +44,7 @@ background_thumbnail_font_size = { optional = true, type = "int", default = 96, background_thumbnail_font_color = { optional = true, default = "255,255,255", example = "255,255,255", explanation = "Font color in RGB format for the thumbnail text" } [settings.tts] -voice_choice = { optional = false, default = "tiktok", options = ["elevenlabs", "streamlabspolly", "tiktok", "googletranslate", "awspolly", "pyttsx", "OpenAI"], example = "tiktok", explanation = "The voice platform used for TTS generation. " } +voice_choice = { optional = false, default = "qwentts", options = ["elevenlabs", "streamlabspolly", "tiktok", "googletranslate", "awspolly", "qwentts", "OpenAI"], example = "qwentts", explanation = "The voice platform used for TTS generation." } random_voice = { optional = false, type = "bool", default = true, example = true, options = [true, false,], explanation = "Randomizes the voice used for each comment" } elevenlabs_voice_name = { optional = false, default = "Bella", example = "Bella", explanation = "The voice used for elevenlabs", options = ["Adam", "Antoni", "Arnold", "Bella", "Domi", "Elli", "Josh", "Rachel", "Sam", ] } elevenlabs_api_key = { optional = true, example = "21f13f91f54d741e2ae27d2ab1b99d59", explanation = "Elevenlabs API key" } @@ -52,11 +52,15 @@ aws_polly_voice = { optional = false, default = "Matthew", example = "Matthew", streamlabs_polly_voice = { optional = false, default = "Matthew", example = "Matthew", explanation = "The voice used for Streamlabs Polly" } tiktok_voice = { optional = true, default = "en_us_001", example = "en_us_006", explanation = "The voice used for TikTok TTS" } tiktok_sessionid = { optional = true, example = "c76bcc3a7625abcc27b508c7db457ff1", explanation = "TikTok sessionid needed if you're using the TikTok TTS. Check documentation if you don't know how to obtain it." } -python_voice = { optional = false, default = "1", example = "1", explanation = "The index of the system tts voices (can be downloaded externally, run ptt.py to find value, start from zero)" } -py_voice_num = { optional = false, default = "2", example = "2", explanation = "The number of system voices (2 are pre-installed in Windows)" } silence_duration = { optional = true, example = "0.1", explanation = "Time in seconds between TTS comments", default = 0.3, type = "float" } no_emojis = { optional = false, type = "bool", default = false, example = false, options = [true, false,], explanation = "Whether to remove emojis from the comments" } openai_api_url = { optional = true, default = "https://api.openai.com/v1/", example = "https://api.openai.com/v1/", explanation = "The API endpoint URL for OpenAI TTS generation" } openai_api_key = { optional = true, example = "sk-abc123def456...", explanation = "Your OpenAI API key for TTS generation" } openai_voice_name = { optional = false, default = "alloy", example = "alloy", explanation = "The voice used for OpenAI TTS generation", options = ["alloy", "ash", "coral", "echo", "fable", "onyx", "nova", "sage", "shimmer", "af_heart"] } openai_model = { optional = false, default = "tts-1", example = "tts-1", explanation = "The model variant used for OpenAI TTS generation", options = ["tts-1", "tts-1-hd", "gpt-4o-mini-tts"] } +qwen_api_url = { optional = true, default = "http://localhost:8080", example = "http://localhost:8080", explanation = "The base URL for the Qwen TTS API server" } +qwen_email = { optional = true, example = "you@example.com", explanation = "Email for Qwen TTS authentication" } +qwen_password = { optional = true, example = "your_password", explanation = "Password for Qwen TTS authentication" } +qwen_speaker = { optional = false, default = "Vivian", example = "Vivian", explanation = "The speaker voice for Qwen TTS", options = ["Chelsie", "Ethan", "Vivian", "Asher", "Aria", "Oliver", "Emma", "Noah", "Sophia"] } +qwen_language = { optional = false, default = "English", example = "English", explanation = "The language for Qwen TTS output", options = ["English", "Chinese", "Spanish", "French", "German", "Japanese", "Korean", "Portuguese", "Russian", "Italian", "Arabic", "Hindi"] } +qwen_instruct = { optional = true, default = "Warm, friendly, conversational.", example = "Warm, friendly, conversational.", explanation = "Style instructions for Qwen TTS voice generation" } diff --git a/utils/progress.py b/utils/progress.py new file mode 100644 index 0000000..5aea470 --- /dev/null +++ b/utils/progress.py @@ -0,0 +1,317 @@ +""" +Progress tracking module for Reddit Video Maker Bot. +Provides real-time progress updates via WebSocket for the GUI. +""" +import os +import json +import time +from dataclasses import dataclass, field, asdict +from typing import Optional, List, Callable +from enum import Enum +from pathlib import Path + + +class StepStatus(str, Enum): + PENDING = "pending" + IN_PROGRESS = "in_progress" + COMPLETED = "completed" + FAILED = "failed" + SKIPPED = "skipped" + + +@dataclass +class Step: + id: str + name: str + description: str + status: StepStatus = StepStatus.PENDING + progress: float = 0.0 + message: str = "" + preview_path: Optional[str] = None + started_at: Optional[float] = None + completed_at: Optional[float] = None + error: Optional[str] = None + + def to_dict(self): + return { + "id": self.id, + "name": self.name, + "description": self.description, + "status": self.status.value, + "progress": self.progress, + "message": self.message, + "preview_path": self.preview_path, + "started_at": self.started_at, + "completed_at": self.completed_at, + "error": self.error, + "duration": (self.completed_at - self.started_at) if self.completed_at and self.started_at else None, + } + + +@dataclass +class VideoJob: + id: str + reddit_id: str + title: str + subreddit: str + status: StepStatus = StepStatus.PENDING + steps: List[Step] = field(default_factory=list) + created_at: float = field(default_factory=time.time) + completed_at: Optional[float] = None + output_path: Optional[str] = None + thumbnail_path: Optional[str] = None + error: Optional[str] = None + + def to_dict(self): + return { + "id": self.id, + "reddit_id": self.reddit_id, + "title": self.title, + "subreddit": self.subreddit, + "status": self.status.value, + "steps": [step.to_dict() for step in self.steps], + "created_at": self.created_at, + "completed_at": self.completed_at, + "output_path": self.output_path, + "thumbnail_path": self.thumbnail_path, + "error": self.error, + "overall_progress": self.get_overall_progress(), + } + + def get_overall_progress(self) -> float: + if not self.steps: + return 0.0 + completed = sum(1 for s in self.steps if s.status == StepStatus.COMPLETED) + return (completed / len(self.steps)) * 100 + + +class ProgressTracker: + """ + Singleton progress tracker that manages video generation jobs and steps. + Provides callbacks for real-time GUI updates. + """ + + _instance = None + _initialized = False + + def __new__(cls): + if cls._instance is None: + cls._instance = super().__new__(cls) + return cls._instance + + def __init__(self): + if ProgressTracker._initialized: + return + ProgressTracker._initialized = True + + self.current_job: Optional[VideoJob] = None + self.job_history: List[VideoJob] = [] + self._update_callbacks: List[Callable] = [] + self._preview_dir = Path("assets/temp/previews") + self._preview_dir.mkdir(parents=True, exist_ok=True) + + def add_update_callback(self, callback: Callable): + """Register a callback function to be called on progress updates.""" + self._update_callbacks.append(callback) + + def remove_update_callback(self, callback: Callable): + """Remove a callback function.""" + if callback in self._update_callbacks: + self._update_callbacks.remove(callback) + + def _notify_update(self): + """Notify all registered callbacks of a progress update.""" + data = self.get_status() + for callback in self._update_callbacks: + try: + callback(data) + except Exception as e: + print(f"Error in progress callback: {e}") + + def start_job(self, reddit_id: str, title: str, subreddit: str) -> VideoJob: + """Start a new video generation job.""" + job = VideoJob( + id=f"job_{int(time.time())}_{reddit_id}", + reddit_id=reddit_id, + title=title, + subreddit=subreddit, + status=StepStatus.IN_PROGRESS, + steps=self._create_default_steps(), + ) + self.current_job = job + self._notify_update() + return job + + def _create_default_steps(self) -> List[Step]: + """Create the default pipeline steps.""" + return [ + Step( + id="fetch_reddit", + name="Fetch Reddit Post", + description="Fetching post and comments from Reddit", + ), + Step( + id="generate_tts", + name="Generate Audio", + description="Converting text to speech using Qwen TTS", + ), + Step( + id="capture_screenshots", + name="Capture Screenshots", + description="Taking screenshots of Reddit comments", + ), + Step( + id="download_background", + name="Download Background", + description="Downloading and preparing background video/audio", + ), + Step( + id="process_background", + name="Process Background", + description="Chopping background to fit video length", + ), + Step( + id="compose_video", + name="Compose Video", + description="Combining all elements into final video", + ), + Step( + id="finalize", + name="Finalize", + description="Final processing and cleanup", + ), + ] + + def start_step(self, step_id: str, message: str = ""): + """Mark a step as in progress.""" + if not self.current_job: + return + + for step in self.current_job.steps: + if step.id == step_id: + step.status = StepStatus.IN_PROGRESS + step.started_at = time.time() + step.message = message + step.progress = 0 + break + + self._notify_update() + + def update_step_progress(self, step_id: str, progress: float, message: str = ""): + """Update the progress of a step.""" + if not self.current_job: + return + + for step in self.current_job.steps: + if step.id == step_id: + step.progress = min(100, max(0, progress)) + if message: + step.message = message + break + + self._notify_update() + + def set_step_preview(self, step_id: str, preview_path: str): + """Set a preview image/video for a step.""" + if not self.current_job: + return + + for step in self.current_job.steps: + if step.id == step_id: + step.preview_path = preview_path + break + + self._notify_update() + + def complete_step(self, step_id: str, message: str = ""): + """Mark a step as completed.""" + if not self.current_job: + return + + for step in self.current_job.steps: + if step.id == step_id: + step.status = StepStatus.COMPLETED + step.completed_at = time.time() + step.progress = 100 + if message: + step.message = message + break + + self._notify_update() + + def fail_step(self, step_id: str, error: str): + """Mark a step as failed.""" + if not self.current_job: + return + + for step in self.current_job.steps: + if step.id == step_id: + step.status = StepStatus.FAILED + step.completed_at = time.time() + step.error = error + step.message = f"Failed: {error}" + break + + self.current_job.status = StepStatus.FAILED + self.current_job.error = error + self._notify_update() + + def skip_step(self, step_id: str, reason: str = ""): + """Mark a step as skipped.""" + if not self.current_job: + return + + for step in self.current_job.steps: + if step.id == step_id: + step.status = StepStatus.SKIPPED + step.completed_at = time.time() + step.message = reason or "Skipped" + break + + self._notify_update() + + def complete_job(self, output_path: str, thumbnail_path: Optional[str] = None): + """Mark the current job as completed.""" + if not self.current_job: + return + + self.current_job.status = StepStatus.COMPLETED + self.current_job.completed_at = time.time() + self.current_job.output_path = output_path + self.current_job.thumbnail_path = thumbnail_path + + self.job_history.append(self.current_job) + self._notify_update() + + def fail_job(self, error: str): + """Mark the current job as failed.""" + if not self.current_job: + return + + self.current_job.status = StepStatus.FAILED + self.current_job.completed_at = time.time() + self.current_job.error = error + + self.job_history.append(self.current_job) + self._notify_update() + + def get_status(self) -> dict: + """Get the current status of all jobs.""" + return { + "current_job": self.current_job.to_dict() if self.current_job else None, + "job_history": [job.to_dict() for job in self.job_history[-10:]], # Last 10 jobs + } + + def get_current_step(self) -> Optional[Step]: + """Get the currently active step.""" + if not self.current_job: + return None + + for step in self.current_job.steps: + if step.status == StepStatus.IN_PROGRESS: + return step + return None + + +# Global progress tracker instance +progress_tracker = ProgressTracker() diff --git a/video_creation/voices.py b/video_creation/voices.py index 3d48e9e..55d88b1 100644 --- a/video_creation/voices.py +++ b/video_creation/voices.py @@ -7,7 +7,7 @@ from TTS.elevenlabs import elevenlabs from TTS.engine_wrapper import TTSEngine from TTS.GTTS import GTTS from TTS.openai_tts import OpenAITTS -from TTS.pyttsx import pyttsx +from TTS.qwen_tts import QwenTTS from TTS.streamlabs_polly import StreamlabsPolly from TTS.TikTok import TikTok from utils import settings @@ -20,7 +20,7 @@ TTSProviders = { "AWSPolly": AWSPolly, "StreamlabsPolly": StreamlabsPolly, "TikTok": TikTok, - "pyttsx": pyttsx, + "QwenTTS": QwenTTS, "ElevenLabs": elevenlabs, "OpenAI": OpenAITTS, } From cd9f9f5b406e93e0beee5286cd9e9dcce4460132 Mon Sep 17 00:00:00 2001 From: Claude Date: Wed, 4 Feb 2026 01:56:03 +0000 Subject: [PATCH 2/3] feat: replace Reddit API with no-OAuth scraper Major changes: - Remove PRAW dependency and Reddit API credentials - Add no-OAuth Reddit scraper using public .json endpoints - No Reddit API keys required - simpler setup! New scraper features: - Uses Reddit's public .json endpoints (www.reddit.com/r/subreddit.json) - Configurable rate limiting via request_delay setting - Automatic retry with exponential backoff - Fetches posts and comments without authentication Files changed: - reddit/scraper.py (new) - No-OAuth Reddit scraper - reddit/subreddit.py - Updated to use scraper instead of PRAW - requirements.txt - Removed praw dependency - utils/.config.template.toml - Removed Reddit credentials - config.example.toml - Updated with scraper settings - docker-entrypoint.sh - Updated for no-auth setup - docker-compose.yml - Removed Reddit credential env vars - main.py - Updated exception handling Limitations: - Subject to Reddit's rate limiting (configurable delay) - ~1000 post cap per subreddit listing - Some comments may be missing in large threads https://claude.ai/code/session_01HLLH3WjpmRzvaoY6eYSFAD --- README.md | 71 ++++- config.example.toml | 48 ++-- docker-compose.yml | 9 +- docker-entrypoint.sh | 20 +- main.py | 10 +- reddit/scraper.py | 506 ++++++++++++++++++++++++++++++++++++ reddit/subreddit.py | 351 +++++++++++++++++-------- requirements.txt | 1 - utils/.config.template.toml | 11 +- 9 files changed, 845 insertions(+), 182 deletions(-) create mode 100644 reddit/scraper.py diff --git a/README.md b/README.md index b83a68f..f35b736 100644 --- a/README.md +++ b/README.md @@ -1,9 +1,10 @@ # Reddit Video Maker Bot -Automatically generate short-form videos from Reddit posts. Supports multiple TTS engines including Qwen3 TTS. +Automatically generate short-form videos from Reddit posts. No Reddit API credentials required. ## Features +- **No Reddit API Keys Needed**: Uses Reddit's public `.json` endpoints (no OAuth required) - **Multiple TTS Engines**: Qwen3 TTS (default), OpenAI TTS, ElevenLabs, TikTok, Google Translate, AWS Polly - **Real-time Progress GUI**: Web-based dashboard showing video generation progress with live updates - **Docker Support**: Fully containerized with docker-compose for easy deployment @@ -20,7 +21,7 @@ cd RedditVideoMakerBot # Create your config file cp config.example.toml config.toml -# Edit config.toml with your credentials +# Edit config.toml with your TTS settings (no Reddit credentials needed!) # Start with docker-compose docker-compose up -d @@ -57,6 +58,10 @@ playwright install-deps # Download spaCy model (for story mode) python -m spacy download en_core_web_sm +# Copy and configure +cp config.example.toml config.toml +# Edit config.toml with your settings + # Run the bot python main.py ``` @@ -65,11 +70,22 @@ python main.py Create a `config.toml` file in the project root. The bot will prompt you for settings on first run. -### Reddit API Setup +### Reddit Settings (No API Keys Required!) + +The bot scrapes Reddit's public `.json` endpoints - no API credentials needed: -1. Go to [Reddit Apps](https://www.reddit.com/prefs/apps) -2. Create a new app with type "script" -3. Note your `client_id` and `client_secret` +```toml +[reddit.scraper] +user_agent = "python:reddit_video_bot:1.0" # Customize to avoid rate limiting +request_delay = 2.0 # Seconds between requests + +[reddit.thread] +subreddit = "AskReddit" # Target subreddit +post_id = "" # Optional: specific post ID +min_comments = 20 # Minimum comments required +``` + +**Note**: This approach is subject to Reddit's rate limiting. If you experience 429 errors, increase `request_delay`. ### Qwen TTS Setup (Default) @@ -86,6 +102,23 @@ qwen_language = "English" qwen_instruct = "Warm, friendly, conversational." ``` +**Qwen TTS API Usage:** + +```bash +# 1. Login to get token +TOKEN=$(curl -s http://localhost:8080/api/agent/api/auth/login \ + -H 'Content-Type: application/json' \ + -d '{"email":"you@example.com","password":"YOUR_PASSWORD"}' \ + | python -c "import sys, json; print(json.load(sys.stdin)['access_token'])") + +# 2. Generate TTS +curl -s http://localhost:8080/api/qwen-tts \ + -H "Authorization: Bearer $TOKEN" \ + -H "Content-Type: application/json" \ + -d '{"text": "Hello!", "language": "English", "speaker": "Vivian", "instruct": "Warm, friendly."}' \ + --output output.wav +``` + ### TTS Options | Provider | Key | Requirements | @@ -140,14 +173,8 @@ services: ### Environment Variables -All config options can be set via environment variables: - | Variable | Description | |----------|-------------| -| `REDDIT_CLIENT_ID` | Reddit API client ID | -| `REDDIT_CLIENT_SECRET` | Reddit API client secret | -| `REDDIT_USERNAME` | Reddit username | -| `REDDIT_PASSWORD` | Reddit password | | `REDDIT_SUBREDDIT` | Target subreddit | | `TTS_VOICE_CHOICE` | TTS provider | | `QWEN_API_URL` | Qwen TTS server URL | @@ -166,7 +193,9 @@ RedditVideoMakerBot/ │ ├── openai_tts.py # OpenAI TTS provider │ └── ... ├── video_creation/ # Video generation -├── reddit/ # Reddit API +├── reddit/ # Reddit scraper (no-auth) +│ ├── scraper.py # Public .json endpoint scraper +│ └── subreddit.py # Thread fetcher ├── utils/ # Utilities │ ├── progress.py # Progress tracking │ └── settings.py # Configuration @@ -181,10 +210,26 @@ RedditVideoMakerBot/ Generated videos are saved to `results/{subreddit}/`. +## Limitations + +### Reddit Scraper Limitations + +- **Rate Limiting**: Reddit may throttle or block requests. Increase `request_delay` if needed. +- **~1000 Post Cap**: Reddit listings are capped at ~1000 posts. Run daily for continuous collection. +- **Incomplete Comments**: Large threads may have missing comments (\"more\" placeholders are skipped). +- **Policy Compliance**: Respect Reddit's Terms of Service when using scraped content. + ## Troubleshooting ### Common Issues +**Rate Limited (429 errors)** +```toml +# Increase delay in config.toml +[reddit.scraper] +request_delay = 5.0 # Try 5+ seconds +``` + **FFmpeg not found** ```bash # Ubuntu/Debian diff --git a/config.example.toml b/config.example.toml index a720548..e498147 100644 --- a/config.example.toml +++ b/config.example.toml @@ -1,33 +1,35 @@ # Reddit Video Maker Bot Configuration -# Copy this file to config.toml and fill in your credentials +# Copy this file to config.toml and configure your settings +# +# NOTE: No Reddit API credentials required! +# This bot uses Reddit's public .json endpoints (no OAuth needed). -[reddit.creds] -client_id = "your_reddit_client_id" -client_secret = "your_reddit_client_secret" -username = "your_reddit_username" -password = "your_reddit_password" -2fa = false +[reddit.scraper] +# User-Agent string for Reddit requests. Customize to avoid rate limiting. +user_agent = "python:reddit_video_bot:1.0" +# Delay in seconds between Reddit requests. Increase if you get rate limited. +request_delay = 2.0 [reddit.thread] random = true -subreddit = "AskReddit" -post_id = "" +subreddit = "AskReddit" # Can also use "AskReddit+nosleep" for multiple subreddits +post_id = "" # Optional: specific post ID to use max_comment_length = 500 min_comment_length = 1 -post_lang = "" +post_lang = "" # Optional: translate to this language (e.g., "es", "fr") min_comments = 20 [ai] ai_similarity_enabled = false -ai_similarity_keywords = "" +ai_similarity_keywords = "" # Comma-separated keywords for AI sorting [settings] allow_nsfw = false -theme = "dark" +theme = "dark" # Options: dark, light, transparent times_to_run = 1 opacity = 0.9 -storymode = false -storymodemethod = 1 +storymode = false # Use for narrative subreddits like r/nosleep +storymodemethod = 1 # 0 = single image, 1 = multiple images storymode_max_length = 1000 resolution_w = 1080 resolution_h = 1920 @@ -35,8 +37,8 @@ zoom = 1 channel_name = "Reddit Tales" [settings.background] -background_video = "minecraft" -background_audio = "lofi" +background_video = "minecraft" # Options: minecraft, gta, rocket-league, etc. +background_audio = "lofi" # Options: lofi, lofi-2, chill-summer background_audio_volume = 0.15 enable_extra_audio = false background_thumbnail = false @@ -55,26 +57,26 @@ no_emojis = false qwen_api_url = "http://localhost:8080" qwen_email = "your_email@example.com" qwen_password = "your_password" -qwen_speaker = "Vivian" -qwen_language = "English" +qwen_speaker = "Vivian" # Options: Chelsie, Ethan, Vivian, Asher, Aria, Oliver, Emma, Noah, Sophia +qwen_language = "English" # Options: English, Chinese, Spanish, French, German, Japanese, Korean, etc. qwen_instruct = "Warm, friendly, conversational." -# OpenAI TTS Settings +# OpenAI TTS Settings (alternative) openai_api_url = "https://api.openai.com/v1/" openai_api_key = "" openai_voice_name = "alloy" openai_model = "tts-1" -# ElevenLabs Settings +# ElevenLabs Settings (alternative) elevenlabs_voice_name = "Bella" elevenlabs_api_key = "" -# TikTok TTS Settings +# TikTok TTS Settings (alternative) tiktok_voice = "en_us_001" tiktok_sessionid = "" -# AWS Polly Settings +# AWS Polly Settings (alternative) aws_polly_voice = "Matthew" -# Streamlabs Polly Settings +# Streamlabs Polly Settings (alternative) streamlabs_polly_voice = "Matthew" diff --git a/docker-compose.yml b/docker-compose.yml index b87da9e..14d0e43 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -15,14 +15,9 @@ services: - ./assets:/app/assets environment: - REDDIT_BOT_GUI=true - # Reddit Credentials (can also be set in config.toml) - - REDDIT_CLIENT_ID=${REDDIT_CLIENT_ID:-} - - REDDIT_CLIENT_SECRET=${REDDIT_CLIENT_SECRET:-} - - REDDIT_USERNAME=${REDDIT_USERNAME:-} - - REDDIT_PASSWORD=${REDDIT_PASSWORD:-} - - REDDIT_2FA=${REDDIT_2FA:-false} - # Reddit Thread Settings + # Reddit Scraper Settings (no API keys required!) - REDDIT_SUBREDDIT=${REDDIT_SUBREDDIT:-AskReddit} + - REDDIT_REQUEST_DELAY=${REDDIT_REQUEST_DELAY:-2.0} - REDDIT_RANDOM=${REDDIT_RANDOM:-true} # TTS Settings (Qwen TTS) - TTS_VOICE_CHOICE=${TTS_VOICE_CHOICE:-qwentts} diff --git a/docker-entrypoint.sh b/docker-entrypoint.sh index e2af214..1848dbc 100644 --- a/docker-entrypoint.sh +++ b/docker-entrypoint.sh @@ -4,22 +4,16 @@ set -e # Create config from environment if not exists if [ ! -f /app/config.toml ]; then echo "Creating config.toml from template..." - - # Check if all required environment variables are set - if [ -z "$REDDIT_CLIENT_ID" ] || [ -z "$REDDIT_CLIENT_SECRET" ] || [ -z "$REDDIT_USERNAME" ] || [ -z "$REDDIT_PASSWORD" ]; then - echo "Warning: Reddit credentials not set via environment variables." - echo "Please set REDDIT_CLIENT_ID, REDDIT_CLIENT_SECRET, REDDIT_USERNAME, REDDIT_PASSWORD" - echo "Or mount your config.toml file to /app/config.toml" - fi + echo "Note: No Reddit API credentials required - using public .json endpoints" # Create basic config from environment cat > /app/config.toml << EOF -[reddit.creds] -client_id = "${REDDIT_CLIENT_ID:-}" -client_secret = "${REDDIT_CLIENT_SECRET:-}" -username = "${REDDIT_USERNAME:-}" -password = "${REDDIT_PASSWORD:-}" -2fa = ${REDDIT_2FA:-false} +# Reddit Video Maker Bot Configuration +# No Reddit API credentials required - uses public .json endpoints + +[reddit.scraper] +user_agent = "${REDDIT_USER_AGENT:-python:reddit_video_bot:1.0}" +request_delay = ${REDDIT_REQUEST_DELAY:-2.0} [reddit.thread] random = ${REDDIT_RANDOM:-true} diff --git a/main.py b/main.py index 0da1508..d1a1be1 100755 --- a/main.py +++ b/main.py @@ -11,9 +11,8 @@ from pathlib import Path from subprocess import Popen from typing import Dict, NoReturn, Optional -from prawcore import ResponseException - from reddit.subreddit import get_subreddit_threads +from reddit.scraper import RedditScraperError from utils import settings from utils.cleanup import cleanup from utils.console import print_markdown, print_step, print_substep @@ -218,9 +217,10 @@ if __name__ == "__main__": main() except KeyboardInterrupt: shutdown() - except ResponseException: - print_markdown("## Invalid credentials") - print_markdown("Please check your credentials in the config.toml file") + except RedditScraperError as e: + print_markdown("## Reddit Scraper Error") + print_markdown(f"Error fetching Reddit data: {e}") + print_markdown("This may be due to rate limiting. Try again later or increase request_delay in config.") shutdown() except Exception as err: config["settings"]["tts"]["tiktok_sessionid"] = "REDACTED" diff --git a/reddit/scraper.py b/reddit/scraper.py new file mode 100644 index 0000000..564aefd --- /dev/null +++ b/reddit/scraper.py @@ -0,0 +1,506 @@ +""" +No-OAuth Reddit scraper using public .json endpoints. +No API keys required - uses Reddit's public JSON interface. + +Note: This approach is subject to rate limiting and may be blocked by Reddit. +For production use, consider using the official Reddit API with OAuth. +""" +import json +import time +from dataclasses import dataclass +from datetime import datetime, timedelta, timezone +from typing import Any, Dict, List, Optional, Tuple + +import requests + +from utils.console import print_substep + + +# Default User-Agent - customize this to avoid rate limiting +DEFAULT_USER_AGENT = "python:reddit_video_bot:1.0 (no-oauth scraper)" + +# Reddit base URLs +REDDIT_BASES = ["https://www.reddit.com", "https://old.reddit.com"] + + +class RedditScraperError(Exception): + """Exception raised for Reddit scraper errors.""" + pass + + +@dataclass +class RedditPost: + """Represents a Reddit post/submission.""" + id: str + name: str # t3_xxx + title: str + selftext: str + author: str + created_utc: float + score: int + upvote_ratio: float + num_comments: int + permalink: str + url: str + over_18: bool + stickied: bool + subreddit: str + + @classmethod + def from_json(cls, data: Dict[str, Any]) -> "RedditPost": + return cls( + id=data.get("id", ""), + name=data.get("name", ""), + title=data.get("title", ""), + selftext=data.get("selftext", ""), + author=data.get("author", "[deleted]"), + created_utc=float(data.get("created_utc", 0)), + score=int(data.get("score", 0)), + upvote_ratio=float(data.get("upvote_ratio", 0)), + num_comments=int(data.get("num_comments", 0)), + permalink=data.get("permalink", ""), + url=data.get("url", ""), + over_18=bool(data.get("over_18", False)), + stickied=bool(data.get("stickied", False)), + subreddit=data.get("subreddit", ""), + ) + + +@dataclass +class RedditComment: + """Represents a Reddit comment.""" + id: str + name: str # t1_xxx + body: str + author: str + created_utc: float + score: int + permalink: str + parent_id: str + link_id: str + depth: int + stickied: bool + + @classmethod + def from_json(cls, data: Dict[str, Any], depth: int = 0) -> "RedditComment": + return cls( + id=data.get("id", ""), + name=data.get("name", ""), + body=data.get("body", ""), + author=data.get("author", "[deleted]"), + created_utc=float(data.get("created_utc", 0)), + score=int(data.get("score", 0)), + permalink=data.get("permalink", ""), + parent_id=data.get("parent_id", ""), + link_id=data.get("link_id", ""), + depth=depth, + stickied=bool(data.get("stickied", False)), + ) + + +class RedditScraper: + """ + No-OAuth Reddit scraper using public .json endpoints. + + Example usage: + scraper = RedditScraper() + posts = scraper.get_subreddit_posts("AskReddit", limit=25, sort="hot") + post, comments = scraper.get_post_with_comments(posts[0].id) + """ + + def __init__( + self, + user_agent: str = DEFAULT_USER_AGENT, + base_url: str = REDDIT_BASES[0], + request_delay: float = 2.0, + timeout: float = 30.0, + max_retries: int = 5, + ): + """ + Initialize the Reddit scraper. + + Args: + user_agent: User-Agent string for requests + base_url: Reddit base URL (www.reddit.com or old.reddit.com) + request_delay: Delay between requests in seconds + timeout: Request timeout in seconds + max_retries: Maximum number of retries per request + """ + self.user_agent = user_agent + self.base_url = base_url.rstrip("/") + self.request_delay = request_delay + self.timeout = timeout + self.max_retries = max_retries + self.session = requests.Session() + self._last_request_time = 0.0 + + def _rate_limit(self) -> None: + """Enforce rate limiting between requests.""" + elapsed = time.time() - self._last_request_time + if elapsed < self.request_delay: + time.sleep(self.request_delay - elapsed) + self._last_request_time = time.time() + + def _fetch_json(self, url: str, params: Optional[Dict[str, Any]] = None) -> Any: + """ + Fetch JSON from a Reddit endpoint with retries and rate limiting. + + Args: + url: Full URL to fetch + params: Query parameters + + Returns: + Parsed JSON response + + Raises: + RedditScraperError: If request fails after retries + """ + headers = { + "User-Agent": self.user_agent, + "Accept": "application/json", + } + + if params is None: + params = {} + params["raw_json"] = 1 # Get unescaped JSON + + last_error: Optional[Exception] = None + + for attempt in range(self.max_retries): + self._rate_limit() + + try: + response = self.session.get( + url, + params=params, + headers=headers, + timeout=self.timeout, + ) + + # Handle rate limiting + if response.status_code == 429: + retry_after = int(response.headers.get("Retry-After", 60)) + print_substep(f"Rate limited. Waiting {retry_after}s...", style="yellow") + time.sleep(max(self.request_delay, retry_after)) + last_error = RedditScraperError(f"Rate limited (429)") + continue + + # Handle server errors + if 500 <= response.status_code < 600: + wait_time = self.request_delay * (attempt + 1) + print_substep(f"Server error {response.status_code}. Retrying in {wait_time}s...", style="yellow") + time.sleep(wait_time) + last_error = RedditScraperError(f"Server error: {response.status_code}") + continue + + # Handle other errors + if response.status_code != 200: + raise RedditScraperError( + f"HTTP {response.status_code}: {response.text[:200]}" + ) + + return response.json() + + except requests.exceptions.RequestException as e: + last_error = e + wait_time = self.request_delay * (attempt + 1) + if attempt < self.max_retries - 1: + print_substep(f"Request failed: {e}. Retrying in {wait_time}s...", style="yellow") + time.sleep(wait_time) + continue + + raise RedditScraperError(f"Failed after {self.max_retries} attempts: {last_error}") + + def get_subreddit_posts( + self, + subreddit: str, + sort: str = "hot", + limit: int = 25, + time_filter: str = "all", + after: Optional[str] = None, + ) -> List[RedditPost]: + """ + Get posts from a subreddit. + + Args: + subreddit: Subreddit name (without r/ prefix) + sort: Sort method (hot, new, top, rising, controversial) + limit: Maximum number of posts to retrieve (max 100 per request) + time_filter: Time filter for top/controversial (hour, day, week, month, year, all) + after: Pagination cursor (fullname of last item) + + Returns: + List of RedditPost objects + """ + # Clean subreddit name + subreddit = subreddit.strip() + if subreddit.lower().startswith("r/"): + subreddit = subreddit[2:] + + url = f"{self.base_url}/r/{subreddit}/{sort}.json" + params: Dict[str, Any] = {"limit": min(limit, 100)} + + if sort in ("top", "controversial"): + params["t"] = time_filter + if after: + params["after"] = after + + data = self._fetch_json(url, params) + + posts = [] + children = data.get("data", {}).get("children", []) + + for child in children: + if child.get("kind") != "t3": + continue + post_data = child.get("data", {}) + if post_data: + posts.append(RedditPost.from_json(post_data)) + + return posts + + def get_post_by_id(self, post_id: str) -> Optional[RedditPost]: + """ + Get a single post by ID. + + Args: + post_id: Post ID (without t3_ prefix) + + Returns: + RedditPost object or None if not found + """ + # Remove t3_ prefix if present + if post_id.startswith("t3_"): + post_id = post_id[3:] + + url = f"{self.base_url}/comments/{post_id}.json" + params = {"limit": 0} # Don't fetch comments + + try: + data = self._fetch_json(url, params) + except RedditScraperError: + return None + + if not isinstance(data, list) or len(data) < 1: + return None + + post_listing = data[0] + children = post_listing.get("data", {}).get("children", []) + + if not children: + return None + + post_data = children[0].get("data", {}) + return RedditPost.from_json(post_data) if post_data else None + + def get_post_with_comments( + self, + post_id: str, + comment_sort: str = "top", + comment_limit: int = 500, + comment_depth: int = 10, + max_comments: int = 1000, + ) -> Tuple[Optional[RedditPost], List[RedditComment]]: + """ + Get a post with its comments. + + Args: + post_id: Post ID (without t3_ prefix) + comment_sort: Comment sort (top, new, controversial, best, old, qa) + comment_limit: Number of comments per request (max ~500) + comment_depth: Maximum depth of comment tree + max_comments: Hard cap on total comments to return + + Returns: + Tuple of (RedditPost, List[RedditComment]) + """ + # Remove t3_ prefix if present + if post_id.startswith("t3_"): + post_id = post_id[3:] + + url = f"{self.base_url}/comments/{post_id}.json" + params = { + "sort": comment_sort, + "limit": min(comment_limit, 500), + "depth": comment_depth, + } + + data = self._fetch_json(url, params) + + if not isinstance(data, list) or len(data) < 2: + raise RedditScraperError(f"Unexpected response format for post {post_id}") + + # Parse post + post_listing = data[0] + post_children = post_listing.get("data", {}).get("children", []) + + if not post_children: + return None, [] + + post_data = post_children[0].get("data", {}) + post = RedditPost.from_json(post_data) if post_data else None + + # Parse comments + comment_listing = data[1] + comment_children = comment_listing.get("data", {}).get("children", []) + + comments: List[RedditComment] = [] + self._flatten_comments(comment_children, depth=0, out=comments, max_comments=max_comments) + + return post, comments + + def _flatten_comments( + self, + children: List[Dict[str, Any]], + depth: int, + out: List[RedditComment], + max_comments: int, + ) -> None: + """ + Recursively flatten comment tree into a list. + + Ignores "more" placeholders - some comments may be missing in large threads. + """ + for child in children: + if len(out) >= max_comments: + return + + kind = child.get("kind") + data = child.get("data", {}) + + if kind == "t1": + # This is a comment + comment = RedditComment.from_json(data, depth=depth) + out.append(comment) + + # Process replies + replies = data.get("replies") + if isinstance(replies, dict): + reply_children = replies.get("data", {}).get("children", []) + if reply_children: + self._flatten_comments( + reply_children, + depth=depth + 1, + out=out, + max_comments=max_comments, + ) + + elif kind == "more": + # "More comments" placeholder - skip (some comments will be missing) + continue + + def search_subreddit( + self, + subreddit: str, + query: str, + sort: str = "relevance", + time_filter: str = "all", + limit: int = 25, + ) -> List[RedditPost]: + """ + Search posts in a subreddit. + + Args: + subreddit: Subreddit name + query: Search query + sort: Sort method (relevance, hot, top, new, comments) + time_filter: Time filter (hour, day, week, month, year, all) + limit: Maximum results + + Returns: + List of matching posts + """ + subreddit = subreddit.strip() + if subreddit.lower().startswith("r/"): + subreddit = subreddit[2:] + + url = f"{self.base_url}/r/{subreddit}/search.json" + params = { + "q": query, + "sort": sort, + "t": time_filter, + "limit": min(limit, 100), + "restrict_sr": "on", # Restrict to subreddit + } + + data = self._fetch_json(url, params) + + posts = [] + children = data.get("data", {}).get("children", []) + + for child in children: + if child.get("kind") != "t3": + continue + post_data = child.get("data", {}) + if post_data: + posts.append(RedditPost.from_json(post_data)) + + return posts + + def get_posts_newer_than( + self, + subreddit: str, + days: int = 30, + max_posts: int = 1000, + ) -> List[RedditPost]: + """ + Get posts from a subreddit newer than a specified number of days. + + Note: Reddit listings are capped at ~1000 posts. If the subreddit has + more posts than this in the time window, older posts will be missed. + + Args: + subreddit: Subreddit name + days: Number of days to look back + max_posts: Maximum posts to retrieve + + Returns: + List of posts within the time window + """ + cutoff = datetime.now(timezone.utc) - timedelta(days=days) + cutoff_ts = cutoff.timestamp() + + all_posts: List[RedditPost] = [] + after: Optional[str] = None + + while len(all_posts) < max_posts: + posts = self.get_subreddit_posts( + subreddit=subreddit, + sort="new", + limit=100, + after=after, + ) + + if not posts: + break + + for post in posts: + # Skip stickied posts (they can be old) + if post.stickied: + continue + + if post.created_utc < cutoff_ts: + # Reached posts older than cutoff + return all_posts + + all_posts.append(post) + + if len(all_posts) >= max_posts: + return all_posts + + # Set pagination cursor + after = posts[-1].name + + return all_posts + + +# Global scraper instance +_scraper: Optional[RedditScraper] = None + + +def get_scraper() -> RedditScraper: + """Get or create the global Reddit scraper instance.""" + global _scraper + if _scraper is None: + _scraper = RedditScraper() + return _scraper diff --git a/reddit/subreddit.py b/reddit/subreddit.py index 5f2ac5f..d8cf624 100644 --- a/reddit/subreddit.py +++ b/reddit/subreddit.py @@ -1,160 +1,283 @@ +""" +Reddit subreddit thread fetcher using no-OAuth scraper. +No API keys required - uses Reddit's public JSON endpoints. +""" import re +from typing import Dict, List, Optional, Any, Tuple -import praw -from praw.models import MoreComments -from prawcore.exceptions import ResponseException - +from reddit.scraper import get_scraper, RedditPost, RedditComment, RedditScraperError from utils import settings from utils.ai_methods import sort_by_similarity from utils.console import print_step, print_substep from utils.posttextparser import posttextparser -from utils.subreddit import get_subreddit_undone from utils.videos import check_done from utils.voice import sanitize_text -def get_subreddit_threads(POST_ID: str): - """ - Returns a list of threads from the AskReddit subreddit. +class SubmissionWrapper: + """Wrapper to make RedditPost compatible with existing utility functions.""" + + def __init__(self, post: RedditPost): + self.id = post.id + self.title = post.title + self.selftext = post.selftext + self.author = post.author + self.score = post.score + self.upvote_ratio = post.upvote_ratio + self.num_comments = post.num_comments + self.permalink = post.permalink + self.url = post.url + self.over_18 = post.over_18 + self.stickied = post.stickied + self.subreddit_name = post.subreddit + self._post = post + + def to_post(self) -> RedditPost: + return self._post + + +def get_subreddit_threads(POST_ID: Optional[str] = None) -> Dict[str, Any]: """ + Fetches a Reddit thread and its comments using the no-OAuth scraper. + No API keys required. - print_substep("Logging into Reddit.") + Args: + POST_ID: Optional specific post ID to fetch - content = {} - if settings.config["reddit"]["creds"]["2fa"]: - print("\nEnter your two-factor authentication code from your authenticator app.\n") - code = input("> ") - print() - pw = settings.config["reddit"]["creds"]["password"] - passkey = f"{pw}:{code}" - else: - passkey = settings.config["reddit"]["creds"]["password"] - username = settings.config["reddit"]["creds"]["username"] - if str(username).casefold().startswith("u/"): - username = username[2:] - try: - reddit = praw.Reddit( - client_id=settings.config["reddit"]["creds"]["client_id"], - client_secret=settings.config["reddit"]["creds"]["client_secret"], - user_agent="Accessing Reddit threads", - username=username, - passkey=passkey, - check_for_async=False, - ) - except ResponseException as e: - if e.response.status_code == 401: - print("Invalid credentials - please check them in config.toml") - except: - print("Something went wrong...") + Returns: + Dictionary containing thread data and comments + """ + print_substep("Connecting to Reddit (no-auth mode)...") - # Ask user for subreddit input - print_step("Getting subreddit threads...") + scraper = get_scraper() + content: Dict[str, Any] = {} similarity_score = 0 - if not settings.config["reddit"]["thread"][ - "subreddit" - ]: # note to user. you can have multiple subreddits via reddit.subreddit("redditdev+learnpython") - try: - subreddit = reddit.subreddit( - re.sub(r"r\/", "", input("What subreddit would you like to pull from? ")) - # removes the r/ from the input - ) - except ValueError: - subreddit = reddit.subreddit("askreddit") + + # Get subreddit from config or user input + print_step("Getting subreddit threads...") + + subreddit_name = settings.config["reddit"]["thread"].get("subreddit", "") + + if not subreddit_name: + subreddit_name = input("What subreddit would you like to pull from? ") + subreddit_name = re.sub(r"^r/", "", subreddit_name.strip()) + if not subreddit_name: + subreddit_name = "AskReddit" print_substep("Subreddit not defined. Using AskReddit.") else: - sub = settings.config["reddit"]["thread"]["subreddit"] - print_substep(f"Using subreddit: r/{sub} from TOML config") - subreddit_choice = sub - if str(subreddit_choice).casefold().startswith("r/"): # removes the r/ from the input - subreddit_choice = subreddit_choice[2:] - subreddit = reddit.subreddit(subreddit_choice) - - if POST_ID: # would only be called if there are multiple queued posts - submission = reddit.submission(id=POST_ID) - - elif ( - settings.config["reddit"]["thread"]["post_id"] - and len(str(settings.config["reddit"]["thread"]["post_id"]).split("+")) == 1 - ): - submission = reddit.submission(id=settings.config["reddit"]["thread"]["post_id"]) - elif settings.config["ai"]["ai_similarity_enabled"]: # ai sorting based on comparison - threads = subreddit.hot(limit=50) - keywords = settings.config["ai"]["ai_similarity_keywords"].split(",") - keywords = [keyword.strip() for keyword in keywords] - # Reformat the keywords for printing - keywords_print = ", ".join(keywords) - print(f"Sorting threads by similarity to the given keywords: {keywords_print}") - threads, similarity_scores = sort_by_similarity(threads, keywords) - submission, similarity_score = get_subreddit_undone( - threads, subreddit, similarity_scores=similarity_scores - ) - else: - threads = subreddit.hot(limit=25) - submission = get_subreddit_undone(threads, subreddit) + # Clean the subreddit name + if str(subreddit_name).lower().startswith("r/"): + subreddit_name = subreddit_name[2:] + print_substep(f"Using subreddit: r/{subreddit_name} from config") + + # Get the submission + submission: Optional[RedditPost] = None + + try: + if POST_ID: + # Specific post ID provided (for queued posts) + submission = scraper.get_post_by_id(POST_ID) + if not submission: + raise RedditScraperError(f"Could not find post with ID: {POST_ID}") + + elif settings.config["reddit"]["thread"].get("post_id"): + # Post ID from config (single post) + post_id = str(settings.config["reddit"]["thread"]["post_id"]) + if "+" not in post_id: # Single post, not multiple + submission = scraper.get_post_by_id(post_id) + if not submission: + raise RedditScraperError(f"Could not find post with ID: {post_id}") + + elif settings.config["ai"].get("ai_similarity_enabled"): + # AI sorting based on keyword similarity + print_substep("Fetching posts for AI similarity sorting...") + posts = scraper.get_subreddit_posts(subreddit_name, sort="hot", limit=50) + + if not posts: + raise RedditScraperError(f"No posts found in r/{subreddit_name}") + + keywords = settings.config["ai"].get("ai_similarity_keywords", "").split(",") + keywords = [keyword.strip() for keyword in keywords if keyword.strip()] + + if keywords: + keywords_print = ", ".join(keywords) + print_substep(f"Sorting threads by similarity to: {keywords_print}") + + # Convert posts to format expected by sort_by_similarity + wrappers = [SubmissionWrapper(post) for post in posts] + sorted_wrappers, similarity_scores = sort_by_similarity(wrappers, keywords) + + submission, similarity_score = _get_undone_post( + sorted_wrappers, subreddit_name, similarity_scores=similarity_scores + ) + else: + wrappers = [SubmissionWrapper(post) for post in posts] + submission = _get_undone_post(wrappers, subreddit_name) + + else: + # Default: get hot posts + posts = scraper.get_subreddit_posts(subreddit_name, sort="hot", limit=25) + + if not posts: + raise RedditScraperError(f"No posts found in r/{subreddit_name}") + + wrappers = [SubmissionWrapper(post) for post in posts] + submission = _get_undone_post(wrappers, subreddit_name) + + except RedditScraperError as e: + print_substep(f"Error fetching Reddit data: {e}", style="bold red") + raise if submission is None: - return get_subreddit_threads(POST_ID) # submission already done. rerun + print_substep("No suitable submission found. Retrying...", style="yellow") + return get_subreddit_threads(POST_ID) - elif not submission.num_comments and settings.config["settings"]["storymode"] == "false": - print_substep("No comments found. Skipping.") + # Check if story mode with no comments is okay + if not submission.num_comments and not settings.config["settings"].get("storymode"): + print_substep("No comments found. Skipping.", style="bold red") exit() - submission = check_done(submission) # double-checking + # Double-check if this post was already done + wrapper = SubmissionWrapper(submission) + checked = check_done(wrapper) + if checked is None: + print_substep("Post already processed. Finding another...", style="yellow") + return get_subreddit_threads(POST_ID) + # Display post info upvotes = submission.score ratio = submission.upvote_ratio * 100 num_comments = submission.num_comments - threadurl = f"https://new.reddit.com/{submission.permalink}" + thread_url = f"https://new.reddit.com{submission.permalink}" - print_substep(f"Video will be: {submission.title} :thumbsup:", style="bold green") - print_substep(f"Thread url is: {threadurl} :thumbsup:", style="bold green") + print_substep(f"Video will be: {submission.title}", style="bold green") + print_substep(f"Thread url is: {thread_url}", style="bold green") print_substep(f"Thread has {upvotes} upvotes", style="bold blue") - print_substep(f"Thread has a upvote ratio of {ratio}%", style="bold blue") + print_substep(f"Thread has a upvote ratio of {ratio:.0f}%", style="bold blue") print_substep(f"Thread has {num_comments} comments", style="bold blue") + if similarity_score: print_substep( f"Thread has a similarity score up to {round(similarity_score * 100)}%", style="bold blue", ) - content["thread_url"] = threadurl + # Build content dictionary + content["thread_url"] = thread_url content["thread_title"] = submission.title content["thread_id"] = submission.id content["is_nsfw"] = submission.over_18 + content["subreddit"] = subreddit_name content["comments"] = [] - if settings.config["settings"]["storymode"]: - if settings.config["settings"]["storymodemethod"] == 1: + + if settings.config["settings"].get("storymode"): + # Story mode - use the post's selftext + if settings.config["settings"].get("storymodemethod") == 1: content["thread_post"] = posttextparser(submission.selftext) else: content["thread_post"] = submission.selftext else: - for top_level_comment in submission.comments: - if isinstance(top_level_comment, MoreComments): - continue + # Comment mode - fetch and process comments + print_substep("Fetching comments...", style="bold blue") + + try: + _, comments = scraper.get_post_with_comments( + submission.id, + comment_sort="top", + comment_limit=500, + max_comments=1000, + ) + + # Filter and process comments + max_len = int(settings.config["reddit"]["thread"].get("max_comment_length", 500)) + min_len = int(settings.config["reddit"]["thread"].get("min_comment_length", 1)) + + for comment in comments: + # Skip non-top-level comments (depth > 0) + if comment.depth > 0: + continue + + # Skip deleted/removed + if comment.body in ["[removed]", "[deleted]"]: + continue - if top_level_comment.body in ["[removed]", "[deleted]"]: - continue # # see https://github.com/JasonLovesDoggo/RedditVideoMakerBot/issues/78 - if not top_level_comment.stickied: - sanitised = sanitize_text(top_level_comment.body) - if not sanitised or sanitised == " ": + # Skip stickied comments + if comment.stickied: continue - if len(top_level_comment.body) <= int( - settings.config["reddit"]["thread"]["max_comment_length"] - ): - if len(top_level_comment.body) >= int( - settings.config["reddit"]["thread"]["min_comment_length"] - ): - if ( - top_level_comment.author is not None - and sanitize_text(top_level_comment.body) is not None - ): # if errors occur with this change to if not. - content["comments"].append( - { - "comment_body": top_level_comment.body, - "comment_url": top_level_comment.permalink, - "comment_id": top_level_comment.id, - } - ) - - print_substep("Received subreddit threads Successfully.", style="bold green") + + # Sanitize and validate + sanitized = sanitize_text(comment.body) + if not sanitized or sanitized.strip() == "": + continue + + # Check length constraints + if len(comment.body) > max_len: + continue + if len(comment.body) < min_len: + continue + + # Skip if author is deleted + if comment.author in ["[deleted]", "[removed]"]: + continue + + content["comments"].append({ + "comment_body": comment.body, + "comment_url": comment.permalink, + "comment_id": comment.id, + }) + + print_substep(f"Collected {len(content['comments'])} valid comments", style="bold green") + + except RedditScraperError as e: + print_substep(f"Error fetching comments: {e}", style="yellow") + # Continue without comments if fetch fails + + print_substep("Received subreddit threads successfully.", style="bold green") return content + + +def _get_undone_post( + wrappers: List[SubmissionWrapper], + subreddit_name: str, + similarity_scores: Optional[List[float]] = None, +) -> Optional[RedditPost] | Tuple[Optional[RedditPost], float]: + """ + Find a submission that hasn't been processed yet. + + Args: + wrappers: List of SubmissionWrapper objects + subreddit_name: Name of the subreddit + similarity_scores: Optional similarity scores for each submission + + Returns: + First undone RedditPost, or tuple of (RedditPost, similarity_score) if scores provided + """ + allow_nsfw = settings.config["settings"].get("allow_nsfw", False) + min_comments = int(settings.config["reddit"]["thread"].get("min_comments", 20)) + + for i, wrapper in enumerate(wrappers): + # Skip NSFW if not allowed + if wrapper.over_18 and not allow_nsfw: + continue + + # Skip stickied posts + if wrapper.stickied: + continue + + # Check minimum comments (unless story mode) + if not settings.config["settings"].get("storymode"): + if wrapper.num_comments < min_comments: + continue + + # Check if already done + if check_done(wrapper) is None: + continue + + post = wrapper.to_post() + + if similarity_scores is not None and i < len(similarity_scores): + return post, similarity_scores[i] + + return post + + return None diff --git a/requirements.txt b/requirements.txt index bc80d05..543627e 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,7 +3,6 @@ botocore==1.36.8 gTTS==2.5.4 moviepy==2.2.1 playwright==1.49.1 -praw==7.8.1 requests==2.32.3 rich==13.9.4 toml==0.10.2 diff --git a/utils/.config.template.toml b/utils/.config.template.toml index 99e36bf..268f98f 100644 --- a/utils/.config.template.toml +++ b/utils/.config.template.toml @@ -1,10 +1,9 @@ -[reddit.creds] -client_id = { optional = false, nmin = 12, nmax = 30, explanation = "The ID of your Reddit app of SCRIPT type", example = "fFAGRNJru1FTz70BzhT3Zg", regex = "^[-a-zA-Z0-9._~+/]+=*$", input_error = "The client ID can only contain printable characters.", oob_error = "The ID should be over 12 and under 30 characters, double check your input." } -client_secret = { optional = false, nmin = 20, nmax = 40, explanation = "The SECRET of your Reddit app of SCRIPT type", example = "fFAGRNJru1FTz70BzhT3Zg", regex = "^[-a-zA-Z0-9._~+/]+=*$", input_error = "The client ID can only contain printable characters.", oob_error = "The secret should be over 20 and under 40 characters, double check your input." } -username = { optional = false, nmin = 3, nmax = 20, explanation = "The username of your reddit account", example = "JasonLovesDoggo", regex = "^[-_0-9a-zA-Z]+$", oob_error = "A username HAS to be between 3 and 20 characters" } -password = { optional = false, nmin = 8, explanation = "The password of your reddit account", example = "fFAGRNJru1FTz70BzhT3Zg", oob_error = "Password too short" } -2fa = { optional = true, type = "bool", options = [true, false, ], default = false, explanation = "Whether you have Reddit 2FA enabled, Valid options are True and False", example = true } +# Note: No Reddit API credentials required! This bot uses public .json endpoints. +# If you experience rate limiting, try increasing the delay between requests. +[reddit.scraper] +user_agent = { optional = true, default = "python:reddit_video_bot:1.0", example = "python:reddit_video_bot:1.0 (contact: you@example.com)", explanation = "User-Agent string for Reddit requests. Customize to avoid rate limiting." } +request_delay = { optional = true, default = 2.0, example = 3.0, type = "float", explanation = "Delay in seconds between Reddit requests. Increase if rate limited." } [reddit.thread] random = { optional = true, options = [true, false, ], default = false, type = "bool", explanation = "If set to no, it will ask you a thread link to extract the thread, if yes it will randomize it. Default: 'False'", example = "True" } From 2c4a8a8a649fc87b2f3ca2de22144ea8bef04c72 Mon Sep 17 00:00:00 2001 From: Claude Date: Wed, 4 Feb 2026 02:30:23 +0000 Subject: [PATCH 3/3] feat: add complete web UI for configuration and video generation - Add dashboard page with quick actions and progress overview - Add settings page for Qwen TTS, Reddit scraper, and video configuration - Add backgrounds page for uploading custom video/audio backgrounds - Add videos page for viewing and downloading generated content - Add TTS connection test endpoint - Update docker-compose for standalone operation - Create unified CSS and JavaScript for consistent UI experience https://claude.ai/code/session_01HLLH3WjpmRzvaoY6eYSFAD --- GUI/backgrounds.html | 419 +++++++++---------- GUI/dashboard.html | 233 +++++++++++ GUI/settings.html | 907 +++++++++++++++-------------------------- GUI/static/css/app.css | 774 +++++++++++++++++++++++++++++++++++ GUI/static/js/app.js | 174 ++++++++ GUI/videos.html | 141 +++++++ docker-compose.yml | 36 +- progress_gui.py | 458 ++++++++++++++++++++- 8 files changed, 2295 insertions(+), 847 deletions(-) create mode 100644 GUI/dashboard.html create mode 100644 GUI/static/css/app.css create mode 100644 GUI/static/js/app.js create mode 100644 GUI/videos.html diff --git a/GUI/backgrounds.html b/GUI/backgrounds.html index 541e39f..f13dc3d 100644 --- a/GUI/backgrounds.html +++ b/GUI/backgrounds.html @@ -1,263 +1,210 @@ -{% extends "layout.html" %} -{% block main %} - - -