feat: keyword filtering + two-panel Playwright scraping visualization

Add keywords input to Create page to override search_queries per run.
Split /create into two panels: left controls + progress, right
real-time scraper activity feed with stage diagram and typed event
cards. Emit structured events from scraper and auth modules. Add
blocked_words fields to Settings page Content tab.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
pull/2551/head
Hong Phuc 4 weeks ago
parent faaaa85be8
commit 9e219ebfbd

@ -3,6 +3,7 @@ import json
import os
import sys
import threading
import time
import webbrowser
from pathlib import Path
@ -173,10 +174,30 @@ pipeline_state: dict = {
"error": None,
"result": None, # {"title": ..., "file": ..., "url": ...}
"log": [], # Last N status messages
"scraper_events": [], # Structured scraper events for visualization
}
def _run_pipeline():
def _event_to_summary(event_type, data):
"""Convert a structured scraper event to a human-readable log line."""
data = data or {}
summaries = {
"browser_launch": lambda d: "Launching browser...",
"login": lambda d: d.get("message", "Login event"),
"feed_scroll": lambda d: f"Scrolled: {d.get('new_posts', 0)} new, {d.get('total_posts', 0)} total",
"post_discovered": lambda d: f"Post by {d.get('username', '?')}: {d.get('body', '')[:45]}",
"search_query": lambda d: f"Search '{d.get('query', '?')}': {d.get('posts_found', 0)} posts",
"filter_results": lambda d: f"Filtered {d.get('before', 0)} -> {d.get('after', 0)} candidates",
"visiting_post": lambda d: f"Trying post {d.get('post_id', '')[:8]}...",
"replies_found": lambda d: f"Got {d.get('count', 0)} replies (need {d.get('min_required', '?')})",
"post_selected": lambda d: f"Selected: {d.get('title', '')[:55]}",
"general": lambda d: d.get("message", ""),
}
fn = summaries.get(event_type)
return fn(data) if fn else None
def _run_pipeline(search_queries=None):
"""Run the video creation pipeline in a background thread."""
import toml
from utils import console as uconsole
@ -188,18 +209,34 @@ def _run_pipeline():
pipeline_state["error"] = None
pipeline_state["result"] = None
pipeline_state["log"] = []
pipeline_state["scraper_events"] = []
try:
# Load config
settings.config = toml.load("config.toml")
# Set up progress callback
def on_progress(stage=""):
# Apply search_queries override if provided from UI
if search_queries:
settings.config.setdefault("threads", {}).setdefault("thread", {})["search_queries"] = search_queries
# Set up progress callback with structured event support
def on_progress(stage=None, event=None, data=None):
with pipeline_lock:
pipeline_state["stage"] = stage
pipeline_state["log"].append(stage)
if len(pipeline_state["log"]) > 20:
pipeline_state["log"] = pipeline_state["log"][-20:]
if stage:
pipeline_state["stage"] = stage
pipeline_state["log"].append(stage)
if len(pipeline_state["log"]) > 20:
pipeline_state["log"] = pipeline_state["log"][-20:]
if event:
entry = {"type": event, "data": data or {}, "ts": time.time()}
pipeline_state["scraper_events"].append(entry)
if len(pipeline_state["scraper_events"]) > 100:
pipeline_state["scraper_events"] = pipeline_state["scraper_events"][-100:]
summary = _event_to_summary(event, data)
if summary:
pipeline_state["log"].append(summary)
if len(pipeline_state["log"]) > 20:
pipeline_state["log"] = pipeline_state["log"][-20:]
uconsole.set_progress_callback(on_progress)
@ -225,10 +262,19 @@ def create():
if request.method == "POST":
if pipeline_state["running"]:
return jsonify({"status": "already_running"})
thread = threading.Thread(target=_run_pipeline, daemon=True)
data = request.get_json(silent=True) or {}
search_queries = data.get("search_queries") or None
thread = threading.Thread(
target=_run_pipeline,
kwargs={"search_queries": search_queries},
daemon=True,
)
thread.start()
return jsonify({"status": "started"})
return render_template("create.html", state=pipeline_state)
# Load current config default for pre-filling the keywords input
cfg = tomlkit.loads(Path("config.toml").read_text())
default_queries = cfg.get("threads", {}).get("thread", {}).get("search_queries", "")
return render_template("create.html", state=pipeline_state, default_search_queries=default_queries)
@app.route("/create/status")

@ -2,97 +2,154 @@
{% block main %}
<div class="bg-slate-900 min-h-screen py-12">
<div class="container mx-auto px-4 max-w-2xl">
<div class="card bg-slate-800 border border-white/5 shadow-2xl">
<div class="card-body p-8">
<div class="flex items-center gap-4 mb-8">
<div class="bg-indigo-600/20 p-3 rounded-xl">
<i data-lucide="plus-square" class="w-8 h-8 text-indigo-500"></i>
</div>
<div>
<h2 class="card-title text-2xl text-white">Create New Short</h2>
<p class="text-slate-400 text-sm">Start the automated video creation pipeline.</p>
</div>
</div>
<div class="container mx-auto px-4">
<div class="grid grid-cols-1 lg:grid-cols-5 gap-6">
<div class="space-y-8">
<!-- Action Button -->
<button id="create-btn" class="btn btn-indigo btn-lg w-full bg-indigo-600 hover:bg-indigo-500 border-none text-white h-16 text-lg"
onclick="startPipeline()" disabled>
<span id="btn-text">Start Generation</span>
<span id="btn-spinner" class="loading loading-spinner loading-md hidden"></span>
</button>
<!-- Progress Visualization -->
<div id="progress-area" class="hidden space-y-4 animate-in fade-in duration-500">
<div class="flex justify-between items-end">
<div class="space-y-1">
<span class="text-xs uppercase tracking-widest text-slate-500 font-bold">Current Stage</span>
<div class="flex items-center gap-2">
<div class="w-2 h-2 rounded-full bg-indigo-500 animate-pulse"></div>
<h3 id="stage-text" class="text-indigo-400 font-semibold text-lg capitalize">Preparing...</h3>
</div>
<!-- LEFT PANEL: Controls + Progress -->
<div class="lg:col-span-2">
<div class="card bg-slate-800 border border-white/5 shadow-2xl">
<div class="card-body p-8">
<div class="flex items-center gap-4 mb-8">
<div class="bg-indigo-600/20 p-3 rounded-xl">
<i data-lucide="plus-square" class="w-8 h-8 text-indigo-500"></i>
</div>
<div>
<h2 class="card-title text-2xl text-white">Create New Short</h2>
<p class="text-slate-400 text-sm">Start the automated video creation pipeline.</p>
</div>
<span id="pct-text" class="text-2xl font-black text-slate-700 font-mono">0%</span>
</div>
<progress id="progress-bar" class="progress progress-indigo w-full h-3 bg-slate-900" value="0" max="100"></progress>
<div class="space-y-6">
<!-- Search Keywords Input -->
<div class="form-control w-full">
<label class="label px-0">
<span class="label-text text-slate-300 font-medium">Search Keywords</span>
<span class="label-text-alt text-slate-500">Optional — overrides config</span>
</label>
<div class="flex gap-2">
<input id="keywords-input" type="text"
class="input input-bordered bg-slate-900 border-slate-700 flex-1 text-white placeholder:text-slate-600"
placeholder="news, politics, trending, viral"
value="{{ default_search_queries }}">
<button id="clear-keywords" class="btn btn-ghost btn-square text-slate-500 hover:text-slate-300"
onclick="document.getElementById('keywords-input').value=''" type="button">
<i data-lucide="x" class="w-4 h-4"></i>
</button>
</div>
<label class="label px-0">
<span class="label-text-alt text-slate-500">Comma-separated topics to search on Threads. Leave empty for config default.</span>
</label>
</div>
<!-- Action Button -->
<button id="create-btn" class="btn btn-indigo btn-lg w-full bg-indigo-600 hover:bg-indigo-500 border-none text-white h-16 text-lg"
onclick="startPipeline()" disabled>
<span id="btn-text">Initializing...</span>
<span id="btn-spinner" class="loading loading-spinner loading-md hidden"></span>
</button>
<!-- Progress Visualization -->
<div id="progress-area" class="hidden space-y-4">
<div class="flex justify-between items-end">
<div class="space-y-1">
<span class="text-xs uppercase tracking-widest text-slate-500 font-bold">Current Stage</span>
<div class="flex items-center gap-2">
<div class="w-2 h-2 rounded-full bg-indigo-500 animate-pulse"></div>
<h3 id="stage-text" class="text-indigo-400 font-semibold text-lg capitalize">Preparing...</h3>
</div>
</div>
<span id="pct-text" class="text-2xl font-black text-slate-700 font-mono">0%</span>
</div>
<progress id="progress-bar" class="progress progress-indigo w-full h-3 bg-slate-900" value="0" max="100"></progress>
<div class="grid grid-cols-2 gap-4 pt-4">
<div class="bg-slate-900/50 p-3 rounded-lg border border-white/5 flex items-center gap-3">
<i data-lucide="clock" class="w-4 h-4 text-slate-500"></i>
<div class="flex flex-col">
<span class="text-[10px] uppercase text-slate-500 font-bold">Elapsed</span>
<span id="elapsed-time" class="text-sm font-mono text-slate-300">00:00</span>
<div class="grid grid-cols-2 gap-4 pt-4">
<div class="bg-slate-900/50 p-3 rounded-lg border border-white/5 flex items-center gap-3">
<i data-lucide="clock" class="w-4 h-4 text-slate-500"></i>
<div class="flex flex-col">
<span class="text-[10px] uppercase text-slate-500 font-bold">Elapsed</span>
<span id="elapsed-time" class="text-sm font-mono text-slate-300">00:00</span>
</div>
</div>
<div class="bg-slate-900/50 p-3 rounded-lg border border-white/5 flex items-center gap-3">
<i data-lucide="layers" class="w-4 h-4 text-slate-500"></i>
<div class="flex flex-col">
<span class="text-[10px] uppercase text-slate-500 font-bold">Status</span>
<span class="text-sm text-indigo-400">Processing</span>
</div>
</div>
</div>
</div>
<div class="bg-slate-900/50 p-3 rounded-lg border border-white/5 flex items-center gap-3">
<i data-lucide="layers" class="w-4 h-4 text-slate-500"></i>
<div class="flex flex-col">
<span class="text-[10px] uppercase text-slate-500 font-bold">Status</span>
<span class="text-sm text-indigo-400">Processing</span>
<!-- Success Message -->
<div id="done-area" class="hidden">
<div class="alert bg-emerald-500/10 border-emerald-500/20 text-emerald-400 flex flex-col items-start gap-4 p-6">
<div class="flex items-center gap-3">
<div class="bg-emerald-500 text-slate-900 p-1 rounded-full">
<i data-lucide="check" class="w-4 h-4"></i>
</div>
<span class="font-bold text-lg">Generation Complete!</span>
</div>
<p id="done-msg" class="text-slate-300 text-sm">Your video has been rendered and saved to the library.</p>
<a href="/" class="btn btn-emerald btn-sm bg-emerald-600 hover:bg-emerald-500 border-none text-white px-6">View Video</a>
</div>
</div>
</div>
</div>
<!-- Success Message -->
<div id="done-area" class="hidden animate-in zoom-in duration-300">
<div class="alert bg-emerald-500/10 border-emerald-500/20 text-emerald-400 flex flex-col items-start gap-4 p-6">
<div class="flex items-center gap-3">
<div class="bg-emerald-500 text-slate-900 p-1 rounded-full">
<i data-lucide="check" class="w-4 h-4"></i>
<!-- Error Message -->
<div id="error-area" class="hidden">
<div class="alert bg-red-500/10 border-red-500/20 text-red-400 p-6">
<i data-lucide="alert-triangle" class="w-6 h-6"></i>
<div>
<h3 class="font-bold">Pipeline Failed</h3>
<div id="error-text" class="text-xs mt-2 font-mono bg-black/20 p-3 rounded overflow-x-auto whitespace-pre-wrap"></div>
</div>
</div>
</div>
<!-- Log Output -->
<div id="log-area" class="hidden space-y-3">
<div class="flex items-center justify-between">
<h4 class="text-xs uppercase tracking-widest text-slate-500 font-bold">Execution Logs</h4>
<span class="badge badge-outline border-slate-700 text-slate-500 text-[10px]">Real-time</span>
</div>
<div id="log-list" class="bg-slate-900 rounded-xl p-4 font-mono text-[11px] leading-relaxed text-slate-400 h-48 overflow-y-auto border border-white/5 shadow-inner">
</div>
<span class="font-bold text-lg">Generation Complete!</span>
</div>
<p id="done-msg" class="text-slate-300 text-sm">Your video has been rendered and saved to the library.</p>
<a href="/" class="btn btn-emerald btn-sm bg-emerald-600 hover:bg-emerald-500 border-none text-white px-6">View Video</a>
</div>
</div>
</div>
</div>
<!-- Error Message -->
<div id="error-area" class="hidden">
<div class="alert bg-red-500/10 border-red-500/20 text-red-400 p-6">
<i data-lucide="alert-triangle" class="w-6 h-6"></i>
<div>
<h3 class="font-bold">Pipeline Failed</h3>
<div id="error-text" class="text-xs mt-2 font-mono bg-black/20 p-3 rounded overflow-x-auto whitespace-pre-wrap"></div>
<!-- RIGHT PANEL: Pipeline Activity Visualization -->
<div class="lg:col-span-3">
<div class="card bg-slate-800 border border-white/5 shadow-2xl">
<div class="card-body p-6">
<div class="flex items-center justify-between mb-5">
<div class="flex items-center gap-2">
<i data-lucide="activity" class="w-5 h-5 text-indigo-400"></i>
<h3 class="card-title text-white text-lg">Pipeline Activity</h3>
</div>
<span class="badge badge-outline border-slate-700 text-slate-500 text-[10px]">Live</span>
</div>
<!-- Stage Diagram -->
<div id="stage-diagram" class="mb-6">
</div>
</div>
<!-- Log Output -->
<div id="log-area" class="hidden space-y-3">
<div class="flex items-center justify-between">
<h4 class="text-xs uppercase tracking-widest text-slate-500 font-bold">Execution Logs</h4>
<span class="badge badge-outline border-slate-700 text-slate-500 text-[10px]">Real-time</span>
<!-- Scraper Event Feed -->
<div id="scraper-feed" class="space-y-2 max-h-[500px] overflow-y-auto pr-1 custom-scrollbar">
</div>
<div id="log-list" class="bg-slate-900 rounded-xl p-4 font-mono text-[11px] leading-relaxed text-slate-400 h-48 overflow-y-auto border border-white/5 shadow-inner">
<!-- Logs will appear here -->
<!-- Empty state -->
<div id="feed-empty" class="text-center py-16 text-slate-600">
<i data-lucide="eye-off" class="w-12 h-12 mx-auto mb-3 opacity-50"></i>
<p class="text-sm">Scraper activity will appear here</p>
<p class="text-xs mt-1">Start a pipeline to see real-time scraping visualization</p>
</div>
</div>
</div>
</div>
</div>
</div>
</div>
@ -118,24 +175,264 @@
'error': 0
};
const pipelineStages = [
{ id: 'configuring', label: 'Configuring', icon: 'settings' },
{ id: 'discovering', label: 'Discovering', icon: 'search' },
{ id: 'scraping', label: 'Scraping', icon: 'loader-2' },
{ id: 'fetching', label: 'Fetching', icon: 'download' },
{ id: 'tts', label: 'TTS', icon: 'volume-2' },
{ id: 'screenshots', label: 'Screenshots', icon: 'camera' },
{ id: 'background', label: 'Background', icon: 'image' },
{ id: 'chopping', label: 'Chopping', icon: 'scissors' },
{ id: 'creating', label: 'Creating', icon: 'film' },
{ id: 'rendering', label: 'Rendering', icon: 'sparkles' },
{ id: 'done', label: 'Complete', icon: 'check-circle' },
];
// --- helpers ---
function esc(s) {
if (!s) return '';
const div = document.createElement('div');
div.textContent = s;
return div.innerHTML;
}
function fmtMetric(n) {
if (!n && n !== 0) return '0';
n = Number(n);
if (n >= 1000000) return (n / 1000000).toFixed(1) + 'M';
if (n >= 1000) return (n / 1000).toFixed(1) + 'K';
return n.toLocaleString();
}
function fmtTime(ts) {
if (!ts) return '';
const seconds = Math.floor((Date.now() / 1000) - ts);
if (seconds < 5) return 'just now';
if (seconds < 60) return seconds + 's ago';
const mins = Math.floor(seconds / 60);
return mins + 'm ago';
}
function updateElapsedTime() {
if (!startTime) return;
const now = new Date();
const diff = Math.floor((now - startTime) / 1000);
const mins = Math.floor(diff / 60).toString().padStart(2, '0');
const secs = (diff % 60).toString().padStart(2, '0');
document.getElementById('elapsed-time').textContent = `${mins}:${secs}`;
document.getElementById('elapsed-time').textContent = mins + ':' + secs;
}
function stageProgress(stage) {
let pct = 0;
const s = stage.toLowerCase();
const s = (stage || '').toLowerCase();
for (let [key, val] of Object.entries(stageWeights)) {
if (s.includes(key)) { pct = val; }
}
return pct;
}
// --- stage diagram ---
function renderStageDiagram(currentStage) {
const container = document.getElementById('stage-diagram');
const s = (currentStage || '').toLowerCase();
let activeIdx = -1;
for (let i = 0; i < pipelineStages.length; i++) {
if (s.includes(pipelineStages[i].id)) {
activeIdx = i;
}
}
const start = Math.max(0, activeIdx - 4);
const end = Math.min(pipelineStages.length - 1, Math.max(activeIdx, start + 7));
const visible = pipelineStages.slice(start, end + 1);
let html = '<div class="flex items-center gap-1 overflow-x-auto py-2">';
visible.forEach((st, idx) => {
const globalIdx = start + idx;
const isDone = globalIdx < activeIdx;
const isActive = globalIdx === activeIdx;
const isPending = globalIdx > activeIdx;
let cls = 'flex items-center gap-1.5 px-2.5 py-1.5 rounded-lg text-xs font-medium whitespace-nowrap shrink-0 transition-all duration-300 ';
if (isActive) {
cls += 'bg-indigo-600/30 text-indigo-300 ring-1 ring-indigo-500/50';
} else if (isDone) {
cls += 'bg-emerald-600/20 text-emerald-400';
} else {
cls += 'bg-slate-700/50 text-slate-500';
}
html += '<div class="' + cls + '">';
if (isDone) {
html += '<i data-lucide="check" class="w-3 h-3 text-emerald-400"></i>';
} else if (isActive) {
html += '<i data-lucide="loader-2" class="w-3 h-3 text-indigo-300 animate-spin"></i>';
} else {
html += '<i data-lucide="' + st.icon + '" class="w-3 h-3"></i>';
}
html += '<span>' + st.label + '</span></div>';
if (idx < visible.length - 1) {
html += '<div class="text-slate-600 shrink-0">';
html += isDone
? '<i data-lucide="chevron-right" class="w-3 h-3 text-emerald-400/50"></i>'
: '<i data-lucide="chevron-right" class="w-3 h-3"></i>';
html += '</div>';
}
});
html += '</div>';
container.innerHTML = html;
lucide.createIcons();
}
// --- scraper event feed ---
function renderScraperEvent(event) {
const { type, data, ts } = event;
const d = data || {};
const templates = {
'post_discovered': () =>
'<div class="bg-slate-700/30 rounded-lg p-3 border border-white/5 hover:border-indigo-500/30 transition-colors">' +
'<div class="flex items-start gap-3">' +
'<div class="bg-indigo-600/20 p-1.5 rounded-full shrink-0">' +
'<i data-lucide="message-circle" class="w-3.5 h-3.5 text-indigo-400"></i>' +
'</div>' +
'<div class="flex-1 min-w-0">' +
'<div class="flex items-center gap-2 mb-1">' +
'<span class="text-xs font-semibold text-slate-200 truncate">' + esc(d.username || 'unknown') + '</span>' +
'<span class="text-[10px] text-slate-500">' + fmtTime(ts) + '</span>' +
'</div>' +
'<p class="text-[11px] text-slate-400 leading-relaxed line-clamp-2">' + esc(d.body || '') + '</p>' +
'<div class="flex items-center gap-3 mt-1.5 text-[10px] text-slate-500">' +
'<span>&#9829; ' + fmtMetric(d.likes) + '</span>' +
'<span>&#128172; ' + fmtMetric(d.replies) + '</span>' +
(d.reposts ? '<span>&#128259; ' + fmtMetric(d.reposts) + '</span>' : '') +
'</div>' +
'</div>' +
'</div>' +
'</div>',
'feed_scroll': () =>
'<div class="flex items-center gap-2 py-1.5 px-1">' +
'<div class="bg-cyan-600/20 p-1 rounded-full">' +
'<i data-lucide="mouse-pointer-2" class="w-3 h-3 text-cyan-400"></i>' +
'</div>' +
'<span class="text-[11px] text-slate-400">' +
'Scrolled <strong class="text-slate-300">' + (d.scroll || '?') + '/' + (d.max_scrolls || '?') + '</strong>' +
' &mdash; <strong class="text-slate-300">' + (d.new_posts || 0) + '</strong> new,' +
' <strong class="text-slate-300">' + (d.total_posts || 0) + '</strong> total posts' +
'</span>' +
'</div>',
'search_query': () =>
'<div class="flex items-center gap-2 py-1.5 px-1">' +
'<div class="bg-purple-600/20 p-1 rounded-full">' +
'<i data-lucide="search" class="w-3 h-3 text-purple-400"></i>' +
'</div>' +
'<span class="text-[11px] text-slate-400">' +
'Searched "<strong class="text-slate-300">' + esc(d.query || '') + '</strong>"' +
' &mdash; <strong class="text-slate-300">' + (d.posts_found || 0) + '</strong> posts found' +
'</span>' +
'</div>',
'filter_results': () =>
'<div class="flex items-center gap-2 py-1.5 px-1">' +
'<div class="bg-amber-600/20 p-1 rounded-full">' +
'<i data-lucide="filter" class="w-3 h-3 text-amber-400"></i>' +
'</div>' +
'<span class="text-[11px] text-slate-400">' +
'Filtered <strong class="text-slate-300">' + (d.before || 0) + '</strong> posts &rarr;' +
' <strong class="text-slate-300">' + (d.after || 0) + '</strong> candidates' +
(d.min_engagement ? ' (min ' + fmtMetric(d.min_engagement) + ' engagement)' : '') +
'</span>' +
'</div>',
'visiting_post': () =>
'<div class="flex items-center gap-2 py-1.5 px-1">' +
'<div class="bg-rose-600/20 p-1 rounded-full">' +
'<i data-lucide="external-link" class="w-3 h-3 text-rose-400"></i>' +
'</div>' +
'<span class="text-[11px] text-slate-400">' +
'Examining candidate #' + (d.attempt || '?') + ': "' + esc((d.body || '').substring(0, 40)) + '..."' +
' <span class="text-slate-500 ml-1">&#9829;' + fmtMetric(d.likes) + '</span>' +
'</span>' +
'</div>',
'replies_found': () =>
'<div class="flex items-center gap-2 py-1.5 px-1">' +
'<div class="bg-emerald-600/20 p-1 rounded-full">' +
'<i data-lucide="message-square" class="w-3 h-3 text-emerald-400"></i>' +
'</div>' +
'<span class="text-[11px] text-slate-400">' +
d.count + ' replies found' + (d.min_required ? ' (need ' + d.min_required + ')' : '') +
'</span>' +
'</div>',
'post_selected': () =>
'<div class="bg-emerald-600/10 rounded-lg p-3 border border-emerald-500/20">' +
'<div class="flex items-start gap-3">' +
'<div class="bg-emerald-600/30 p-1.5 rounded-full shrink-0">' +
'<i data-lucide="check-circle" class="w-3.5 h-3.5 text-emerald-400"></i>' +
'</div>' +
'<div>' +
'<span class="text-xs font-semibold text-emerald-300">Post Selected!</span>' +
'<p class="text-[11px] text-slate-400 mt-0.5">' + esc(d.title || '') + '</p>' +
'<div class="flex gap-3 mt-1 text-[10px] text-slate-500">' +
'<span>&#9829; ' + fmtMetric(d.likes) + '</span>' +
'<span>&#128172; ' + (d.replies_count || 0) + ' replies</span>' +
'</div>' +
'</div>' +
'</div>' +
'</div>',
'login': () =>
'<div class="flex items-center gap-2 py-1.5 px-1">' +
'<div class="bg-blue-600/20 p-1 rounded-full">' +
'<i data-lucide="log-in" class="w-3 h-3 text-blue-400"></i>' +
'</div>' +
'<span class="text-[11px] text-slate-400">' + esc(d.message || '') + '</span>' +
'</div>',
'browser_launch': () =>
'<div class="flex items-center gap-2 py-1.5 px-1">' +
'<div class="bg-slate-600/20 p-1 rounded-full">' +
'<i data-lucide="globe" class="w-3 h-3 text-slate-400"></i>' +
'</div>' +
'<span class="text-[11px] text-slate-400">' + esc(d.message || '') + '</span>' +
'</div>',
};
const fn = templates[type];
return fn ? fn() : (
'<div class="flex items-center gap-2 py-1.5 px-1">' +
'<span class="text-[11px] text-slate-400">' + esc(d.message || type) + '</span>' +
'</div>'
);
}
function renderScraperFeed(events) {
const container = document.getElementById('scraper-feed');
const empty = document.getElementById('feed-empty');
if (!events || events.length === 0) {
container.innerHTML = '';
empty.classList.remove('hidden');
return;
}
empty.classList.add('hidden');
const recent = events.slice(-50);
container.innerHTML = recent.map(function(e) { return renderScraperEvent(e); }).join('');
container.scrollTop = container.scrollHeight;
lucide.createIcons();
}
// --- pipeline lifecycle ---
async function startPipeline() {
const btn = document.getElementById('create-btn');
const btnText = document.getElementById('btn-text');
@ -150,8 +447,22 @@
document.getElementById('done-area').classList.add('hidden');
document.getElementById('error-area').classList.add('hidden');
// Show empty feed state
document.getElementById('scraper-feed').innerHTML = '';
document.getElementById('feed-empty').classList.remove('hidden');
document.getElementById('stage-diagram').innerHTML = '';
// Reset progress
document.getElementById('progress-bar').classList.remove('progress-success', 'progress-error');
const keywords = document.getElementById('keywords-input').value.trim();
try {
const r = await fetch('/create', { method: 'POST' });
const r = await fetch('/create', {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ search_queries: keywords || null }),
});
const data = await r.json();
if (data.status === 'started' || data.status === 'already_running') {
@ -181,15 +492,21 @@
stageText.textContent = state.stage || 'Running...';
const pct = stageProgress(state.stage || '');
progressBar.value = pct;
pctText.textContent = `${pct}%`;
pctText.textContent = pct + '%';
if (state.log && state.log.length > 0) {
logList.innerHTML = state.log.map(l =>
`<div class="py-0.5 border-b border-white/5 last:border-0">${l}</div>`
).join('');
logList.innerHTML = state.log.map(function(l) {
return '<div class="py-0.5 border-b border-white/5 last:border-0">' + esc(l) + '</div>';
}).join('');
logList.scrollTop = logList.scrollHeight;
}
// Render visualization panels
renderStageDiagram(state.stage);
if (state.scraper_events) {
renderScraperFeed(state.scraper_events);
}
if (!state.running) {
clearInterval(pollTimer);
clearInterval(elapsedTimer);
@ -218,6 +535,7 @@
}
}
// --- init ---
window.addEventListener('load', async function() {
lucide.createIcons();
try {
@ -234,11 +552,19 @@
startTime = new Date(); // Approximate
elapsedTimer = setInterval(updateElapsedTime, 1000);
pollTimer = setInterval(pollStatus, 2000);
// Re-render visualization from state
renderStageDiagram(state.stage);
if (state.scraper_events) {
renderScraperFeed(state.scraper_events);
}
} else {
btn.disabled = false;
document.getElementById('btn-text').textContent = 'Start Generation';
}
} catch (err) {
console.error("Initial status check failed:", err);
document.getElementById('create-btn').disabled = false;
document.getElementById('btn-text').textContent = 'Start Generation';
}
});
</script>

@ -127,6 +127,11 @@
<label class="label"><span class="label-text text-slate-400">Min Comments: <span class="val-display font-mono text-indigo-400"></span></span></label>
<input name="reddit.thread.min_comments" type="range" min="1" max="100" step="1" class="range range-xs range-indigo" value="{{ data['reddit.thread.min_comments'] }}">
</div>
<div class="form-control w-full">
<label class="label"><span class="label-text text-slate-400">Blocked Words</span></label>
<input name="reddit.thread.blocked_words" value="{{ data['reddit.thread.blocked_words'] }}" type="text" class="input input-bordered bg-slate-900 border-slate-700" placeholder="nsfw, spoiler, politics">
<label class="label"><span class="label-text-alt text-slate-500">Comma-separated. Posts and comments matching these words are skipped.</span></label>
</div>
</div>
<!-- Threads Options -->
@ -143,6 +148,11 @@
<label class="label"><span class="label-text text-slate-400">Min Replies: <span class="val-display font-mono text-indigo-400"></span></span></label>
<input name="threads.thread.min_replies" type="range" min="1" max="50" step="1" class="range range-xs range-indigo" value="{{ data['threads.thread.min_replies'] }}">
</div>
<div class="form-control w-full">
<label class="label"><span class="label-text text-slate-400">Blocked Words</span></label>
<input name="threads.thread.blocked_words" value="{{ data['threads.thread.blocked_words'] }}" type="text" class="input input-bordered bg-slate-900 border-slate-700" placeholder="nsfw, spoiler, politics">
<label class="label"><span class="label-text-alt text-slate-500">Comma-separated. Posts and replies matching these words are skipped.</span></label>
</div>
</div>
</div>

@ -9,7 +9,7 @@ from pathlib import Path
from playwright.sync_api import Browser, BrowserContext, Page, ViewportSize
from utils import settings
from utils.console import print_substep
from utils.console import emit_scraper_event, print_substep
THREADS_LOGIN_URL = "https://www.threads.net/login"
THREADS_COOKIE_FILE = "./video_creation/data/cookie-threads.json"
@ -32,6 +32,7 @@ def login_to_threads(page: Page, _context: BrowserContext) -> None:
)
print_substep("Logging into Threads (via Instagram)...")
emit_scraper_event("login", {"message": "Logging into Threads (via Instagram)..."})
page.goto(THREADS_LOGIN_URL, timeout=0)
page.wait_for_load_state("networkidle")
@ -47,6 +48,7 @@ def login_to_threads(page: Page, _context: BrowserContext) -> None:
json.dump(cookies, f)
print_substep("Logged into Threads and saved session cookies.", style="bold green")
emit_scraper_event("login", {"message": "Logged in successfully"})
def ensure_authenticated_context(browser: Browser, **kwargs) -> BrowserContext:
@ -81,6 +83,7 @@ def ensure_authenticated_context(browser: Browser, **kwargs) -> BrowserContext:
saved_cookies = json.load(f)
context.add_cookies(saved_cookies)
print_substep("Loaded saved Threads session cookies.")
emit_scraper_event("login", {"message": "Loaded saved session cookies"})
except (json.JSONDecodeError, IOError):
print_substep("Saved cookies corrupted. Logging in fresh...")
page = context.new_page()

@ -12,7 +12,7 @@ from playwright.sync_api import BrowserContext, Locator, sync_playwright
from platforms.threads.auth import ensure_authenticated_context
from utils import settings
from utils.console import print_step, print_substep
from utils.console import emit_scraper_event, print_step, print_substep
from utils.voice import sanitize_text
from utils.videos import check_done_by_id
@ -124,6 +124,7 @@ def _extract_text_from_card(link: Locator) -> str:
def _scrape_feed_posts(context: BrowserContext, max_scrolls: int = MAX_FEED_SCROLLS) -> list[dict]:
"""Navigate to threads.net feed, scroll, extract post metadata with engagement metrics."""
print_step("Scraping Threads trending feed...")
emit_scraper_event("browser_launch", {"message": "Scraping Threads trending feed"})
page = context.new_page()
posts: list[dict] = []
seen_ids: set[str] = set()
@ -163,6 +164,15 @@ def _scrape_feed_posts(context: BrowserContext, max_scrolls: int = MAX_FEED_SCRO
})
new_found += 1
emit_scraper_event("post_discovered", {
"username": parsed["username"],
"body": parsed["body"][:100],
"likes": parsed["likes"],
"replies": parsed["replies"],
"reposts": parsed["reposts"],
"post_id": post_id,
})
if new_found > 0:
top = posts[-1]
print_substep(
@ -172,6 +182,13 @@ def _scrape_feed_posts(context: BrowserContext, max_scrolls: int = MAX_FEED_SCRO
style="dim",
)
emit_scraper_event("feed_scroll", {
"scroll": i + 1,
"new_posts": new_found,
"total_posts": len(posts),
"max_scrolls": max_scrolls,
})
if new_found == 0 and i > 5:
break
@ -196,6 +213,7 @@ def _scrape_search_page(context: BrowserContext, query: str, max_scrolls: int =
Uses the same card extraction as the main feed.
"""
print_step(f"Scraping Threads search: '{query}'...")
emit_scraper_event("search_query", {"query": query, "posts_found": 0})
page = context.new_page()
posts: list[dict] = []
seen_ids: set[str] = set()
@ -244,6 +262,7 @@ def _scrape_search_page(context: BrowserContext, query: str, max_scrolls: int =
page.close()
print_substep(f"Search '{query}': {len(posts)} posts.", style="dim")
emit_scraper_event("search_query", {"query": query, "posts_found": len(posts)})
return posts
@ -331,6 +350,13 @@ def _filter_candidates(posts: list[dict]) -> list[dict]:
# Sort by engagement descending — most viral first
candidates.sort(key=lambda p: p.get("_total_engagement", 0), reverse=True)
emit_scraper_event("filter_results", {
"before": len(posts),
"after": len(candidates),
"min_engagement": min_engagement,
"max_age_hours": max_age_hours,
})
age_str = f", max age ≤{max_age_hours}h" if max_age_hours else ""
if min_engagement > 0:
print_substep(
@ -555,8 +581,21 @@ def get_trending_threads_content(POST_ID: Optional[str] = None) -> dict:
f"'{candidate['body'][:60]}...'",
style="dim",
)
emit_scraper_event("visiting_post", {
"post_id": candidate["post_id"],
"url": candidate["url"],
"engagement": eng,
"likes": candidate.get("likes", 0),
"body": candidate.get("body", "")[:60],
"attempt": i + 1,
})
try:
replies = _scrape_post_replies(context, candidate["url"])
emit_scraper_event("replies_found", {
"post_id": candidate["post_id"],
"count": len(replies),
"min_required": min_replies,
})
if len(replies) >= min_replies:
if not candidate.get("body") or len(candidate.get("body", "")) < 50:
full_text = _scrape_main_post_text(context, candidate["url"])
@ -569,6 +608,13 @@ def get_trending_threads_content(POST_ID: Optional[str] = None) -> dict:
f"{candidate['likes']:,} 💬{len(content['comments'])} replies",
style="bold green",
)
emit_scraper_event("post_selected", {
"title": content["thread_title"][:80],
"post_id": candidate["post_id"],
"likes": candidate["likes"],
"replies_count": len(content["comments"]),
"url": candidate["url"],
})
return content
print_substep(
f" Only {len(replies)} replies (need {min_replies}). Trying next...",

@ -19,6 +19,20 @@ def set_progress_callback(cb):
_progress_callback = cb
def emit_scraper_event(event_type: str, data: dict = None):
"""Emit a structured scraper event for GUI visualization.
Called by platform scrapers to stream real-time scraping activity
to the web UI. If no progress callback is set, this is a no-op.
Event types: browser_launch, login, feed_scroll, post_discovered,
search_query, filter_results, visiting_post,
replies_found, post_selected, general
"""
if _progress_callback:
_progress_callback(event=event_type, data=data or {})
def print_markdown(text) -> None:
"""Prints a rich info message. Support Markdown syntax."""

Loading…
Cancel
Save