diff --git a/platforms/threads/scraper.py b/platforms/threads/scraper.py index dee8005..d7728c0 100644 --- a/platforms/threads/scraper.py +++ b/platforms/threads/scraper.py @@ -559,8 +559,8 @@ def get_trending_threads_content(POST_ID: Optional[str] = None) -> dict: for sp in search_posts: if sp["post_id"] not in existing_ids: posts.append(sp) - except Exception: - pass + except Exception as e: + print_substep(f"Search query failed: {e}", "yellow") if not posts: raise RuntimeError("No posts found in feed. Try again later.") diff --git a/reddit/subreddit.py b/reddit/subreddit.py index 25427c5..04ca7cf 100644 --- a/reddit/subreddit.py +++ b/reddit/subreddit.py @@ -1,4 +1,5 @@ import re +import sys import praw from praw.models import MoreComments @@ -105,11 +106,20 @@ def get_subreddit_threads(POST_ID: str): submission = get_subreddit_undone(threads, subreddit) if submission is None: - return get_subreddit_threads(POST_ID) # submission already done. rerun + # submission already done — retry with depth limit to prevent infinite recursion + if not hasattr(get_subreddit_threads, "_retry_depth"): + get_subreddit_threads._retry_depth = 0 + get_subreddit_threads._retry_depth += 1 + if get_subreddit_threads._retry_depth > 50: + raise RuntimeError("Exceeded retry limit (50) looking for an undone submission") + try: + return get_subreddit_threads(POST_ID) + finally: + get_subreddit_threads._retry_depth -= 1 elif not submission.num_comments and settings.config["settings"]["storymode"] == "false": print_substep("No comments found. Skipping.") - exit() + sys.exit() submission = check_done(submission) # double-checking diff --git a/utils/posttextparser.py b/utils/posttextparser.py index b26ab0f..7a085af 100644 --- a/utils/posttextparser.py +++ b/utils/posttextparser.py @@ -1,5 +1,7 @@ import os import re +import subprocess +import sys import time from typing import List @@ -30,7 +32,10 @@ def posttextparser(obj, *, tried: bool = False) -> List[str]: nlp = spacy.load("en_core_web_sm") except OSError as e: if not tried: - os.system("python -m spacy download en_core_web_sm") + subprocess.run( + [sys.executable, "-m", "spacy", "download", "en_core_web_sm"], + check=False, + ) time.sleep(5) return posttextparser(obj, tried=True) print_step( diff --git a/utils/videos.py b/utils/videos.py index b352968..2c109cf 100755 --- a/utils/videos.py +++ b/utils/videos.py @@ -60,6 +60,7 @@ def save_data(subreddit: str, filename: str, reddit_title: str, reddit_id: str, done_vids.append(payload) raw_vids.seek(0) json.dump(done_vids, raw_vids, ensure_ascii=False, indent=4) + raw_vids.truncate() def check_done_by_id(post_id: str) -> bool: