fix: Threads comment screenshots, TTS count, background encode performance

- Reply screenshots now target by comment_id instead of .first (was capturing main post)
- TTS engine returns actual count (idx+1) instead of last index
- Background chop uses ffmpeg stream-copy instead of moviepy re-encode
- Merged prepare_background crop+scale into overlay filter graph (single encode pass)
- Added -preset veryfast -crf 23 to overlay renders
- Platform-conditional title image (no Reddit template on Threads)

Co-Authored-By: RuFlo <ruv@ruv.net>
pull/2551/head
Hong Phuc 4 weeks ago
parent 9e219ebfbd
commit 263e2784f0

@ -104,7 +104,7 @@ class TTSEngine:
self.call_tts(f"{idx}", process_text(comment["comment_body"])) self.call_tts(f"{idx}", process_text(comment["comment_body"]))
print_substep("Saved Text to MP3 files successfully.", style="bold green") print_substep("Saved Text to MP3 files successfully.", style="bold green")
return self.length, idx return self.length, idx + 1 # count, not last index
def split_post(self, text: str, idx): def split_post(self, text: str, idx):
split_files = [] split_files = []

@ -102,8 +102,10 @@ def get_screenshots_of_threads_posts(content_object: dict, screenshot_num: int)
page.wait_for_timeout(2000) page.wait_for_timeout(2000)
# Threads.net uses div-based cards for replies too. # Threads.net uses div-based cards for replies too.
# Find the first post link and screenshot its card container. # Target the specific reply by its comment_id in the URL.
reply_link = page.locator('a[href*="/post/"]').first # Using .first would pick the main post (appears first in DOM).
reply_id = comment["comment_id"]
reply_link = page.locator(f'a[href*="/{reply_id}"]').first
if reply_link.count() and reply_link.is_visible(): if reply_link.count() and reply_link.is_visible():
card = reply_link.locator('xpath=ancestor::div[contains(@class, "x1a2a7pz")][1]') card = reply_link.locator('xpath=ancestor::div[contains(@class, "x1a2a7pz")][1]')
reply_locator = card.first if card.count() else reply_link reply_locator = card.first if card.count() else reply_link

@ -1,18 +1,26 @@
import json import json
import random import random
import re import re
import subprocess
from pathlib import Path from pathlib import Path
from random import randrange from random import randrange
from typing import Any, Dict, Tuple from typing import Any, Dict, Tuple
import av
import yt_dlp import yt_dlp
from moviepy import AudioFileClip, VideoFileClip from moviepy import AudioFileClip
from moviepy.video.io.ffmpeg_tools import ffmpeg_extract_subclip
from utils import settings from utils import settings
from utils.console import print_step, print_substep from utils.console import print_step, print_substep
def _probe_duration(path: str) -> float:
"""Get media duration in seconds using PyAV."""
with av.open(path) as container:
stream = container.streams[0]
return float(stream.duration * stream.time_base)
def load_background_options(): def load_background_options():
_background_options = {} _background_options = {}
# Load background videos # Load background videos
@ -144,24 +152,19 @@ def chop_background(background_config: Dict[str, Tuple], video_length: int, redd
print_step("Finding a spot in the backgrounds video to chop...✂️") print_step("Finding a spot in the backgrounds video to chop...✂️")
video_choice = f"{background_config['video'][2]}-{background_config['video'][1]}" video_choice = f"{background_config['video'][2]}-{background_config['video'][1]}"
background_video = VideoFileClip(f"assets/backgrounds/video/{video_choice}") src = f"assets/backgrounds/video/{video_choice}"
out = f"assets/temp/{thread_id}/background.mp4"
start_time_video, end_time_video = get_start_and_end_times( start_time_video, end_time_video = get_start_and_end_times(
video_length, background_video.duration video_length, _probe_duration(src)
) )
# Extract video subclip # ffmpeg stream-copy (fast) instead of moviepy re-encode
try: result = subprocess.run([
with VideoFileClip(f"assets/backgrounds/video/{video_choice}") as video: "ffmpeg", "-y", "-ss", str(start_time_video), "-to", str(end_time_video),
new = video.subclipped(start_time_video, end_time_video) "-i", src, "-c", "copy", "-avoid_negative_ts", "make_zero", out,
new.write_videofile(f"assets/temp/{thread_id}/background.mp4") ], capture_output=True)
if result.returncode != 0:
except (OSError, IOError): # ffmpeg issue see #348 stderr = result.stderr.decode("utf-8", errors="replace")
print_substep("FFMPEG issue. Trying again...") raise RuntimeError(f"ffmpeg background extraction failed: {stderr[-500:]}")
ffmpeg_extract_subclip(
f"assets/backgrounds/video/{video_choice}",
start_time_video,
end_time_video,
outputfile=f"assets/temp/{thread_id}/background.mp4",
)
print_substep("Background video chopped successfully!", style="bold green") print_substep("Background video chopped successfully!", style="bold green")
return background_config["video"][2] return background_config["video"][2]

@ -1,5 +1,4 @@
import json import json
import multiprocessing
import os import os
import re import re
import subprocess import subprocess
@ -105,21 +104,6 @@ def name_normalize(name: str) -> str:
return name return name
def prepare_background(reddit_id: str, W: int, H: int) -> str:
"""Crop background video to match target aspect ratio, re-encode without audio."""
input_path = f"assets/temp/{reddit_id}/background.mp4"
output_path = f"assets/temp/{reddit_id}/background_noaudio.mp4"
_run_ffmpeg([
"-i", input_path,
"-vf", f"crop=ih*({W}/{H}):ih,scale={W}:{H}",
"-c:v", "libx264", "-b:v", "20M",
"-an",
"-threads", str(multiprocessing.cpu_count()),
output_path,
], "prepare_background")
return output_path
def get_text_height(draw, text, font, max_width): def get_text_height(draw, text, font, max_width):
lines = textwrap.wrap(text, width=max_width) lines = textwrap.wrap(text, width=max_width)
total_height = 0 total_height = 0
@ -202,13 +186,17 @@ def _build_audio_concat_list(input_paths: list[str], list_path: str) -> None:
def _build_overlay_filter_complex(overlay_items: list[dict], W: int, H: int) -> str: def _build_overlay_filter_complex(overlay_items: list[dict], W: int, H: int) -> str:
"""Build a ffmpeg filter_complex string for overlaying images on background. """Build a ffmpeg filter_complex string for overlaying images on background.
Prepends crop+scale on [0:v] so raw background.mp4 can be used directly
(no separate prepare_background encode pass needed).
Each overlay item: {path, start_time, duration, opacity, scale_w, scale_h} Each overlay item: {path, start_time, duration, opacity, scale_w, scale_h}
""" """
parts = [] parts = []
prev_label = "0:v" # background is the first input # Crop background to target aspect ratio and scale — merged from prepare_background
parts.append(f"[0:v]crop=ih*({W}/{H}):ih,scale={W}:{H}[bg];")
prev_label = "bg"
for i, item in enumerate(overlay_items): for i, item in enumerate(overlay_items):
ov_label = f"ov{i}"
scaled_label = f"sc{i}" scaled_label = f"sc{i}"
faded_label = f"fd{i}" faded_label = f"fd{i}"
@ -222,7 +210,7 @@ def _build_overlay_filter_complex(overlay_items: list[dict], W: int, H: int) ->
) )
# Overlay with timing # Overlay with timing
enable = f"between(t,{item['start_time']},{item['start_time'] + item['duration']})" enable = f"between(t,{item['start_time']},{item['start_time'] + item['duration']})"
next_label = f"out{i}" if i < len(overlay_items) - 1 else "outv" next_label = f"out{i}" if i < len(overlay_items) - 1 else "final"
parts.append( parts.append(
f"[{prev_label}][{faded_label}]overlay=" f"[{prev_label}][{faded_label}]overlay="
f"x=(main_w-overlay_w)/2:y=(main_h-overlay_h)/2:" f"x=(main_w-overlay_w)/2:y=(main_h-overlay_h)/2:"
@ -230,11 +218,8 @@ def _build_overlay_filter_complex(overlay_items: list[dict], W: int, H: int) ->
) )
if i < len(overlay_items) - 1: if i < len(overlay_items) - 1:
parts.append(";") parts.append(";")
ov_label = ov_label # unused, keeps naming consistent
prev_label = next_label prev_label = next_label
# Final scale
parts.append(f";[{prev_label}]scale={W}:{H}[final]")
return "".join(parts) return "".join(parts)
@ -257,8 +242,8 @@ def make_final_video(
print_step("Creating the final video 🎥") print_step("Creating the final video 🎥")
# --- Step 1: Prepare background --- # --- Step 1: Background path (crop+scale merged into overlay filter) ---
background_path = prepare_background(reddit_id, W=W, H=H) background_path = f"assets/temp/{reddit_id}/background.mp4"
# --- Step 2: Concatenate all TTS audio clips --- # --- Step 2: Concatenate all TTS audio clips ---
audio_clip_paths = [] audio_clip_paths = []
@ -274,7 +259,7 @@ def make_final_video(
] ]
else: else:
audio_clip_paths = [f"assets/temp/{reddit_id}/mp3/title.mp3"] audio_clip_paths = [f"assets/temp/{reddit_id}/mp3/title.mp3"]
for i in range(number_of_clips + 1): for i in range(number_of_clips):
audio_clip_paths.append(f"assets/temp/{reddit_id}/mp3/postaudio-{i}.mp3") audio_clip_paths.append(f"assets/temp/{reddit_id}/mp3/postaudio-{i}.mp3")
else: else:
audio_clip_paths = [f"assets/temp/{reddit_id}/mp3/title.mp3"] audio_clip_paths = [f"assets/temp/{reddit_id}/mp3/title.mp3"]
@ -303,17 +288,22 @@ def make_final_video(
screenshot_width = int((W * 45) // 100) screenshot_width = int((W * 45) // 100)
Path(f"assets/temp/{reddit_id}/png").mkdir(parents=True, exist_ok=True) Path(f"assets/temp/{reddit_id}/png").mkdir(parents=True, exist_ok=True)
title_template = Image.open("assets/title_template.png") platform = settings.config["settings"].get("platform", "reddit")
title = reddit_obj["thread_title"]
title = name_normalize(title) # Use actual screenshot for non-Reddit platforms (Threads etc.), Reddit template for Reddit
title_img = create_fancy_thumbnail(title_template, title, "#000000", 5) title_img_path = f"assets/temp/{reddit_id}/png/title.png"
title_img.save(f"assets/temp/{reddit_id}/png/title.png") if platform == "reddit":
title_template = Image.open("assets/title_template.png")
title = reddit_obj["thread_title"]
title = name_normalize(title)
title_img = create_fancy_thumbnail(title_template, title, "#000000", 5)
title_img.save(title_img_path)
overlay_items = [] overlay_items = []
current_time = 0.0 current_time = 0.0
overlay_items.append({ overlay_items.append({
"path": f"assets/temp/{reddit_id}/png/title.png", "path": title_img_path,
"start_time": current_time, "start_time": current_time,
"duration": audio_clips_durations[0], "duration": audio_clips_durations[0],
"opacity": opacity, "opacity": opacity,
@ -335,7 +325,7 @@ def make_final_video(
"scale_h": -1, "scale_h": -1,
}) })
elif settings.config["settings"]["storymodemethod"] == 1: elif settings.config["settings"]["storymodemethod"] == 1:
for i in range(number_of_clips + 1): for i in range(number_of_clips):
img_path = f"assets/temp/{reddit_id}/png/img{i}.png" img_path = f"assets/temp/{reddit_id}/png/img{i}.png"
if not os.path.exists(img_path): if not os.path.exists(img_path):
continue continue
@ -352,21 +342,21 @@ def make_final_video(
}) })
current_time += audio_clips_durations[dur_idx] current_time += audio_clips_durations[dur_idx]
else: else:
for i in range(number_of_clips + 1): for i in range(number_of_clips):
img_path = f"assets/temp/{reddit_id}/png/comment_{i}.png" dur_idx = i + 1 # audio_clips_durations[0] is title, [1..N] are comments
if not os.path.exists(img_path): if dur_idx >= len(audio_clips_durations):
continue
if i >= len(audio_clips_durations):
break break
overlay_items.append({ img_path = f"assets/temp/{reddit_id}/png/comment_{i}.png"
"path": img_path, if os.path.exists(img_path):
"start_time": current_time, overlay_items.append({
"duration": audio_clips_durations[i], "path": img_path,
"opacity": opacity, "start_time": current_time,
"scale_w": screenshot_width, "duration": audio_clips_durations[dur_idx],
"scale_h": -1, "opacity": opacity,
}) "scale_w": screenshot_width,
current_time += audio_clips_durations[i] "scale_h": -1,
})
current_time += audio_clips_durations[dur_idx]
# --- Step 5: Build filter_complex and render --- # --- Step 5: Build filter_complex and render ---
filter_complex = _build_overlay_filter_complex(overlay_items, W, H) filter_complex = _build_overlay_filter_complex(overlay_items, W, H)
@ -436,9 +426,8 @@ def make_final_video(
ffmpeg_inputs + [ ffmpeg_inputs + [
"-filter_complex", filter_complex, "-filter_complex", filter_complex,
"-map", "[final]", "-map", "[final]",
"-c:v", "libx264", "-b:v", "20M", "-c:v", "libx264", "-preset", "veryfast", "-crf", "23",
"-pix_fmt", "yuv420p", "-pix_fmt", "yuv420p",
"-threads", str(multiprocessing.cpu_count()),
"-progress", progress.output_file.name, "-progress", progress.output_file.name,
video_only_path, video_only_path,
], ],
@ -469,9 +458,8 @@ def make_final_video(
ffmpeg_inputs + [ ffmpeg_inputs + [
"-filter_complex", filter_complex, "-filter_complex", filter_complex,
"-map", "[final]", "-map", "[final]",
"-c:v", "libx264", "-b:v", "20M", "-c:v", "libx264", "-preset", "veryfast", "-crf", "23",
"-pix_fmt", "yuv420p", "-pix_fmt", "yuv420p",
"-threads", str(multiprocessing.cpu_count()),
"-progress", progress2.output_file.name, "-progress", progress2.output_file.name,
only_tts_video, only_tts_video,
], ],

Loading…
Cancel
Save