From 22868d5759efb4bdb24e7dfd6da9cbdd63fbd9e3 Mon Sep 17 00:00:00 2001 From: Hong Phuc Date: Tue, 5 May 2026 22:32:33 +0700 Subject: [PATCH] =?UTF-8?q?fix:=20re-review=20findings=20=E2=80=94=20os.sy?= =?UTF-8?q?stem,=20truncation,=20recursion,=20swallowed=20errors?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Replace missed os.system() in posttextparser with subprocess.run - Add truncate() after json.dump in videos.py save_data (trailing bytes bug) - Replace exit() with sys.exit() in subreddit.py - Add retry depth limit (50) to prevent infinite recursion in get_subreddit_threads - Log search-scraping errors instead of silent except: pass in scraper.py Co-Authored-By: RuFlo --- platforms/threads/scraper.py | 4 ++-- reddit/subreddit.py | 14 ++++++++++++-- utils/posttextparser.py | 7 ++++++- utils/videos.py | 1 + 4 files changed, 21 insertions(+), 5 deletions(-) diff --git a/platforms/threads/scraper.py b/platforms/threads/scraper.py index dee8005..d7728c0 100644 --- a/platforms/threads/scraper.py +++ b/platforms/threads/scraper.py @@ -559,8 +559,8 @@ def get_trending_threads_content(POST_ID: Optional[str] = None) -> dict: for sp in search_posts: if sp["post_id"] not in existing_ids: posts.append(sp) - except Exception: - pass + except Exception as e: + print_substep(f"Search query failed: {e}", "yellow") if not posts: raise RuntimeError("No posts found in feed. Try again later.") diff --git a/reddit/subreddit.py b/reddit/subreddit.py index 25427c5..04ca7cf 100644 --- a/reddit/subreddit.py +++ b/reddit/subreddit.py @@ -1,4 +1,5 @@ import re +import sys import praw from praw.models import MoreComments @@ -105,11 +106,20 @@ def get_subreddit_threads(POST_ID: str): submission = get_subreddit_undone(threads, subreddit) if submission is None: - return get_subreddit_threads(POST_ID) # submission already done. rerun + # submission already done — retry with depth limit to prevent infinite recursion + if not hasattr(get_subreddit_threads, "_retry_depth"): + get_subreddit_threads._retry_depth = 0 + get_subreddit_threads._retry_depth += 1 + if get_subreddit_threads._retry_depth > 50: + raise RuntimeError("Exceeded retry limit (50) looking for an undone submission") + try: + return get_subreddit_threads(POST_ID) + finally: + get_subreddit_threads._retry_depth -= 1 elif not submission.num_comments and settings.config["settings"]["storymode"] == "false": print_substep("No comments found. Skipping.") - exit() + sys.exit() submission = check_done(submission) # double-checking diff --git a/utils/posttextparser.py b/utils/posttextparser.py index b26ab0f..7a085af 100644 --- a/utils/posttextparser.py +++ b/utils/posttextparser.py @@ -1,5 +1,7 @@ import os import re +import subprocess +import sys import time from typing import List @@ -30,7 +32,10 @@ def posttextparser(obj, *, tried: bool = False) -> List[str]: nlp = spacy.load("en_core_web_sm") except OSError as e: if not tried: - os.system("python -m spacy download en_core_web_sm") + subprocess.run( + [sys.executable, "-m", "spacy", "download", "en_core_web_sm"], + check=False, + ) time.sleep(5) return posttextparser(obj, tried=True) print_step( diff --git a/utils/videos.py b/utils/videos.py index b352968..2c109cf 100755 --- a/utils/videos.py +++ b/utils/videos.py @@ -60,6 +60,7 @@ def save_data(subreddit: str, filename: str, reddit_title: str, reddit_id: str, done_vids.append(payload) raw_vids.seek(0) json.dump(done_vids, raw_vids, ensure_ascii=False, indent=4) + raw_vids.truncate() def check_done_by_id(post_id: str) -> bool: