fix: Threads comment screenshots, TTS count, background encode performance

- Reply screenshots now target by comment_id instead of .first (was capturing main post) - TTS engine returns actual count (idx+1) instead of last index - Background chop uses ffmpeg stream-copy instead of moviepy re-encode - Merged prepare_background crop+scale into overlay filter graph (single encode pass) - Added -preset veryfast -crf 23 to overlay renders - Platform-conditional title image (no Reddit template on Threads) Co-Authored-By: RuFlo <ruv@ruv.net>
3 months ago · 263e2784f0
parent 9e219ebfbd
commit 263e2784f0
4 changed files with 64 additions and 71 deletions
--- a/TTS/engine_wrapper.py
+++ b/TTS/engine_wrapper.py
@ -104,7 +104,7 @@ class TTSEngine:
                    self.call_tts(f"{idx}", process_text(comment["comment_body"]))

        print_substep("Saved Text to MP3 files successfully.", style="bold green")
-        return self.length, idx
+        return self.length, idx + 1  # count, not last index

    def split_post(self, text: str, idx):
        split_files = []
--- a/platforms/threads/screenshot.py
+++ b/platforms/threads/screenshot.py
@ -102,8 +102,10 @@ def get_screenshots_of_threads_posts(content_object: dict, screenshot_num: int)
                    page.wait_for_timeout(2000)

                    # Threads.net uses div-based cards for replies too.
-                    # Find the first post link and screenshot its card container.
-                    reply_link = page.locator('a[href*="/post/"]').first
+                    # Target the specific reply by its comment_id in the URL.
+                    # Using .first would pick the main post (appears first in DOM).
+                    reply_id = comment["comment_id"]
+                    reply_link = page.locator(f'a[href*="/{reply_id}"]').first
                    if reply_link.count() and reply_link.is_visible():
                        card = reply_link.locator('xpath=ancestor::div[contains(@class, "x1a2a7pz")][1]')
                        reply_locator = card.first if card.count() else reply_link
--- a/video_creation/background.py
+++ b/video_creation/background.py
@ -1,18 +1,26 @@
 import json
 import random
 import re
+import subprocess
 from pathlib import Path
 from random import randrange
 from typing import Any, Dict, Tuple

+import av
 import yt_dlp
-from moviepy import AudioFileClip, VideoFileClip
-from moviepy.video.io.ffmpeg_tools import ffmpeg_extract_subclip
+from moviepy import AudioFileClip

 from utils import settings
 from utils.console import print_step, print_substep


+def _probe_duration(path: str) -> float:
+    """Get media duration in seconds using PyAV."""
+    with av.open(path) as container:
+        stream = container.streams[0]
+        return float(stream.duration * stream.time_base)
+
+
 def load_background_options():
    _background_options = {}
    # Load background videos
@ -144,24 +152,19 @@ def chop_background(background_config: Dict[str, Tuple], video_length: int, redd

    print_step("Finding a spot in the backgrounds video to chop...✂️")
    video_choice = f"{background_config['video'][2]}-{background_config['video'][1]}"
-    background_video = VideoFileClip(f"assets/backgrounds/video/{video_choice}")
+    src = f"assets/backgrounds/video/{video_choice}"
+    out = f"assets/temp/{thread_id}/background.mp4"
    start_time_video, end_time_video = get_start_and_end_times(
-        video_length, background_video.duration
+        video_length, _probe_duration(src)
    )
-    # Extract video subclip
-    try:
-        with VideoFileClip(f"assets/backgrounds/video/{video_choice}") as video:
-            new = video.subclipped(start_time_video, end_time_video)
-            new.write_videofile(f"assets/temp/{thread_id}/background.mp4")
-
-    except (OSError, IOError):  # ffmpeg issue see #348
-        print_substep("FFMPEG issue. Trying again...")
-        ffmpeg_extract_subclip(
-            f"assets/backgrounds/video/{video_choice}",
-            start_time_video,
-            end_time_video,
-            outputfile=f"assets/temp/{thread_id}/background.mp4",
-        )
+    # ffmpeg stream-copy (fast) instead of moviepy re-encode
+    result = subprocess.run([
+        "ffmpeg", "-y", "-ss", str(start_time_video), "-to", str(end_time_video),
+        "-i", src, "-c", "copy", "-avoid_negative_ts", "make_zero", out,
+    ], capture_output=True)
+    if result.returncode != 0:
+        stderr = result.stderr.decode("utf-8", errors="replace")
+        raise RuntimeError(f"ffmpeg background extraction failed: {stderr[-500:]}")
    print_substep("Background video chopped successfully!", style="bold green")
    return background_config["video"][2]

--- a/video_creation/final_video.py
+++ b/video_creation/final_video.py
@ -1,5 +1,4 @@
 import json
-import multiprocessing
 import os
 import re
 import subprocess
@ -105,21 +104,6 @@ def name_normalize(name: str) -> str:
    return name


-def prepare_background(reddit_id: str, W: int, H: int) -> str:
-    """Crop background video to match target aspect ratio, re-encode without audio."""
-    input_path = f"assets/temp/{reddit_id}/background.mp4"
-    output_path = f"assets/temp/{reddit_id}/background_noaudio.mp4"
-    _run_ffmpeg([
-        "-i", input_path,
-        "-vf", f"crop=ih*({W}/{H}):ih,scale={W}:{H}",
-        "-c:v", "libx264", "-b:v", "20M",
-        "-an",
-        "-threads", str(multiprocessing.cpu_count()),
-        output_path,
-    ], "prepare_background")
-    return output_path
-
-
 def get_text_height(draw, text, font, max_width):
    lines = textwrap.wrap(text, width=max_width)
    total_height = 0
@ -202,13 +186,17 @@ def _build_audio_concat_list(input_paths: list[str], list_path: str) -> None:
 def _build_overlay_filter_complex(overlay_items: list[dict], W: int, H: int) -> str:
    """Build a ffmpeg filter_complex string for overlaying images on background.

+    Prepends crop+scale on [0:v] so raw background.mp4 can be used directly
+    (no separate prepare_background encode pass needed).
+
    Each overlay item: {path, start_time, duration, opacity, scale_w, scale_h}
    """
    parts = []
-    prev_label = "0:v"  # background is the first input
+    # Crop background to target aspect ratio and scale — merged from prepare_background
+    parts.append(f"[0:v]crop=ih*({W}/{H}):ih,scale={W}:{H}[bg];")
+    prev_label = "bg"

    for i, item in enumerate(overlay_items):
-        ov_label = f"ov{i}"
        scaled_label = f"sc{i}"
        faded_label = f"fd{i}"

@ -222,7 +210,7 @@ def _build_overlay_filter_complex(overlay_items: list[dict], W: int, H: int) ->
        )
        # Overlay with timing
        enable = f"between(t,{item['start_time']},{item['start_time'] + item['duration']})"
-        next_label = f"out{i}" if i < len(overlay_items) - 1 else "outv"
+        next_label = f"out{i}" if i < len(overlay_items) - 1 else "final"
        parts.append(
            f"[{prev_label}][{faded_label}]overlay="
            f"x=(main_w-overlay_w)/2:y=(main_h-overlay_h)/2:"
@ -230,11 +218,8 @@ def _build_overlay_filter_complex(overlay_items: list[dict], W: int, H: int) ->
        )
        if i < len(overlay_items) - 1:
            parts.append(";")
-        ov_label = ov_label  # unused, keeps naming consistent
        prev_label = next_label

-    # Final scale
-    parts.append(f";[{prev_label}]scale={W}:{H}[final]")
    return "".join(parts)


@ -257,8 +242,8 @@ def make_final_video(

    print_step("Creating the final video 🎥")

-    # --- Step 1: Prepare background ---
-    background_path = prepare_background(reddit_id, W=W, H=H)
+    # --- Step 1: Background path (crop+scale merged into overlay filter) ---
+    background_path = f"assets/temp/{reddit_id}/background.mp4"

    # --- Step 2: Concatenate all TTS audio clips ---
    audio_clip_paths = []
@ -274,7 +259,7 @@ def make_final_video(
            ]
        else:
            audio_clip_paths = [f"assets/temp/{reddit_id}/mp3/title.mp3"]
-            for i in range(number_of_clips + 1):
+            for i in range(number_of_clips):
                audio_clip_paths.append(f"assets/temp/{reddit_id}/mp3/postaudio-{i}.mp3")
    else:
        audio_clip_paths = [f"assets/temp/{reddit_id}/mp3/title.mp3"]
@ -303,17 +288,22 @@ def make_final_video(
    screenshot_width = int((W * 45) // 100)
    Path(f"assets/temp/{reddit_id}/png").mkdir(parents=True, exist_ok=True)

-    title_template = Image.open("assets/title_template.png")
-    title = reddit_obj["thread_title"]
-    title = name_normalize(title)
-    title_img = create_fancy_thumbnail(title_template, title, "#000000", 5)
-    title_img.save(f"assets/temp/{reddit_id}/png/title.png")
+    platform = settings.config["settings"].get("platform", "reddit")
+
+    # Use actual screenshot for non-Reddit platforms (Threads etc.), Reddit template for Reddit
+    title_img_path = f"assets/temp/{reddit_id}/png/title.png"
+    if platform == "reddit":
+        title_template = Image.open("assets/title_template.png")
+        title = reddit_obj["thread_title"]
+        title = name_normalize(title)
+        title_img = create_fancy_thumbnail(title_template, title, "#000000", 5)
+        title_img.save(title_img_path)

    overlay_items = []
    current_time = 0.0

    overlay_items.append({
-        "path": f"assets/temp/{reddit_id}/png/title.png",
+        "path": title_img_path,
        "start_time": current_time,
        "duration": audio_clips_durations[0],
        "opacity": opacity,
@ -335,7 +325,7 @@ def make_final_video(
                    "scale_h": -1,
                })
        elif settings.config["settings"]["storymodemethod"] == 1:
-            for i in range(number_of_clips + 1):
+            for i in range(number_of_clips):
                img_path = f"assets/temp/{reddit_id}/png/img{i}.png"
                if not os.path.exists(img_path):
                    continue
@ -352,21 +342,21 @@ def make_final_video(
                })
                current_time += audio_clips_durations[dur_idx]
    else:
-        for i in range(number_of_clips + 1):
-            img_path = f"assets/temp/{reddit_id}/png/comment_{i}.png"
-            if not os.path.exists(img_path):
-                continue
-            if i >= len(audio_clips_durations):
+        for i in range(number_of_clips):
+            dur_idx = i + 1  # audio_clips_durations[0] is title, [1..N] are comments
+            if dur_idx >= len(audio_clips_durations):
                break
-            overlay_items.append({
-                "path": img_path,
-                "start_time": current_time,
-                "duration": audio_clips_durations[i],
-                "opacity": opacity,
-                "scale_w": screenshot_width,
-                "scale_h": -1,
-            })
-            current_time += audio_clips_durations[i]
+            img_path = f"assets/temp/{reddit_id}/png/comment_{i}.png"
+            if os.path.exists(img_path):
+                overlay_items.append({
+                    "path": img_path,
+                    "start_time": current_time,
+                    "duration": audio_clips_durations[dur_idx],
+                    "opacity": opacity,
+                    "scale_w": screenshot_width,
+                    "scale_h": -1,
+                })
+            current_time += audio_clips_durations[dur_idx]

    # --- Step 5: Build filter_complex and render ---
    filter_complex = _build_overlay_filter_complex(overlay_items, W, H)
@ -436,9 +426,8 @@ def make_final_video(
            ffmpeg_inputs + [
                "-filter_complex", filter_complex,
                "-map", "[final]",
-                "-c:v", "libx264", "-b:v", "20M",
+                "-c:v", "libx264", "-preset", "veryfast", "-crf", "23",
                "-pix_fmt", "yuv420p",
-                "-threads", str(multiprocessing.cpu_count()),
                "-progress", progress.output_file.name,
                video_only_path,
            ],
@ -469,9 +458,8 @@ def make_final_video(
                ffmpeg_inputs + [
                    "-filter_complex", filter_complex,
                    "-map", "[final]",
-                    "-c:v", "libx264", "-b:v", "20M",
+                    "-c:v", "libx264", "-preset", "veryfast", "-crf", "23",
                    "-pix_fmt", "yuv420p",
-                    "-threads", str(multiprocessing.cpu_count()),
                    "-progress", progress2.output_file.name,
                    only_tts_video,
                ],