"""Fetches content from Meta Threads via the Graph API.""" import requests from typing import Optional from utils import settings from utils.console import print_step, print_substep from utils.voice import sanitize_text from utils.videos import check_done_by_id GRAPH_API_BASE = "https://graph.threads.net/v1.0" def _get_headers() -> dict: """Returns HTTP headers with Bearer token for Graph API requests.""" token = settings.config["threads"]["creds"]["access_token"] if not token: raise RuntimeError( "Threads API: access_token is required. " "Set it in config.toml under [threads.creds]." ) return {"Authorization": f"Bearer {token}"} def _api_get(url: str, params: dict = None) -> dict: """Makes a GET request to Threads Graph API with error handling.""" try: resp = requests.get(url, headers=_get_headers(), params=params or {}, timeout=15) resp.raise_for_status() return resp.json() except requests.exceptions.HTTPError as e: if e.response.status_code == 401: raise RuntimeError( "Threads API: Invalid or expired access_token. " "Tokens are valid for 60 days. Refresh at: " "https://developers.facebook.com/tools/explorer/" ) from e if e.response.status_code == 400: error_msg = e.response.json().get("error", {}).get("message", str(e)) raise RuntimeError(f"Threads API: Bad request — {error_msg}") from e raise RuntimeError(f"Threads API: HTTP {e.response.status_code}") from e except requests.exceptions.ConnectionError as e: raise RuntimeError("Threads API: Cannot connect. Check internet connection.") from e except requests.exceptions.Timeout as e: raise RuntimeError("Threads API: Request timed out.") from e def _fetch_post(post_id: str) -> dict: """Fetches a single Threads post by ID.""" url = f"{GRAPH_API_BASE}/{post_id}" params = {"fields": "id,text,timestamp,permalink,is_quote_post,media_type"} return _api_get(url, params) def _fetch_replies(post_id: str, limit: int = 50) -> list: """Fetches all replies to a Threads post, handling pagination.""" url = f"{GRAPH_API_BASE}/{post_id}/replies" params = { "fields": "id,text,timestamp,username,permalink", "limit": limit, } results = [] while url: data = _api_get(url, params) results.extend(data.get("data", [])) # Handle pagination — next URL is provided in paging.next url = data.get("paging", {}).get("next") params = {} # Next URL already includes all params return results def _pick_best_post() -> tuple: """ Fetches recent posts from the user and returns the first one with enough replies that hasn't been processed yet. Returns: tuple: (post_dict, replies_list) Raises: RuntimeError: If no eligible posts are found. """ user_id = settings.config["threads"]["creds"]["user_id"] if not user_id: raise RuntimeError( "Threads API: user_id is required. " "Set it in config.toml under [threads.creds]." ) url = f"{GRAPH_API_BASE}/{user_id}/threads" params = {"fields": "id,text,timestamp,permalink,media_type", "limit": 25} data = _api_get(url, params) posts = data.get("data", []) min_replies = settings.config["threads"]["thread"]["min_replies"] for post in posts: if check_done_by_id(post["id"]): continue replies = _fetch_replies(post["id"]) if len(replies) >= min_replies: return post, replies raise RuntimeError( f"No eligible Threads posts found. " f"Ensure you have posts with at least {min_replies} replies." ) def get_threads_content(POST_ID: str = None) -> dict: """ Fetches Threads content (post + replies) and returns it in the standard content_object format. Args: POST_ID (str, optional): Specific post ID to fetch. If None, auto-selects. Returns: dict: Standard content_object matching the pipeline contract. Raises: RuntimeError: On API errors or if no eligible content found. """ print_step("Fetching Threads content...") # Determine which post to fetch if POST_ID: post = _fetch_post(POST_ID) replies = _fetch_replies(POST_ID) elif settings.config["threads"]["thread"].get("post_id"): post_id = settings.config["threads"]["thread"]["post_id"] post = _fetch_post(post_id) replies = _fetch_replies(post_id) else: post, replies = _pick_best_post() # Load content filters from config max_len = settings.config["threads"]["thread"]["max_reply_length"] min_len = settings.config["threads"]["thread"]["min_reply_length"] blocked_raw = settings.config["threads"]["thread"].get("blocked_words", "") blocked = [w.strip().lower() for w in blocked_raw.split(",") if w.strip()] # Build content object in standard format content = { "thread_id": post["id"], "thread_title": (post.get("text") or "")[:280], # Threads has no separate title "thread_url": post["permalink"], "is_nsfw": False, # Threads API doesn't provide NSFW flag "thread_category": "threads", # Generic field for output folder naming "comments": [], } # Filter and add replies for reply in replies: body = reply.get("text", "").strip() if not body: continue # Check blocked words if any(w in body.lower() for w in blocked): continue # Check length constraints if not (min_len <= len(body) <= max_len): continue # Sanitize text sanitised = sanitize_text(body) if not sanitised: continue content["comments"].append({ "comment_body": body, "comment_url": reply["permalink"], "comment_id": reply["id"], }) # Log summary title_preview = content["thread_title"][:60] print_substep( f"Fetched Threads post '{title_preview}...' " f"with {len(content['comments'])} replies.", style="bold green", ) return content