You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
191 lines
6.2 KiB
191 lines
6.2 KiB
"""Fetches content from Meta Threads via the Graph API."""
|
|
|
|
import requests
|
|
from typing import Optional
|
|
|
|
from utils import settings
|
|
from utils.console import print_step, print_substep
|
|
from utils.voice import sanitize_text
|
|
from utils.videos import check_done_by_id
|
|
|
|
|
|
GRAPH_API_BASE = "https://graph.threads.net/v1.0"
|
|
|
|
|
|
def _get_headers() -> dict:
|
|
"""Returns HTTP headers with Bearer token for Graph API requests."""
|
|
token = settings.config["threads"]["creds"]["access_token"]
|
|
if not token:
|
|
raise RuntimeError(
|
|
"Threads API: access_token is required. "
|
|
"Set it in config.toml under [threads.creds]."
|
|
)
|
|
return {"Authorization": f"Bearer {token}"}
|
|
|
|
|
|
def _api_get(url: str, params: dict = None) -> dict:
|
|
"""Makes a GET request to Threads Graph API with error handling."""
|
|
try:
|
|
resp = requests.get(url, headers=_get_headers(), params=params or {}, timeout=15)
|
|
resp.raise_for_status()
|
|
return resp.json()
|
|
except requests.exceptions.HTTPError as e:
|
|
if e.response.status_code == 401:
|
|
raise RuntimeError(
|
|
"Threads API: Invalid or expired access_token. "
|
|
"Tokens are valid for 60 days. Refresh at: "
|
|
"https://developers.facebook.com/tools/explorer/"
|
|
) from e
|
|
if e.response.status_code == 400:
|
|
error_msg = e.response.json().get("error", {}).get("message", str(e))
|
|
raise RuntimeError(f"Threads API: Bad request — {error_msg}") from e
|
|
raise RuntimeError(f"Threads API: HTTP {e.response.status_code}") from e
|
|
except requests.exceptions.ConnectionError as e:
|
|
raise RuntimeError("Threads API: Cannot connect. Check internet connection.") from e
|
|
except requests.exceptions.Timeout as e:
|
|
raise RuntimeError("Threads API: Request timed out.") from e
|
|
|
|
|
|
def _fetch_post(post_id: str) -> dict:
|
|
"""Fetches a single Threads post by ID."""
|
|
url = f"{GRAPH_API_BASE}/{post_id}"
|
|
params = {"fields": "id,text,timestamp,permalink,is_quote_post,media_type"}
|
|
return _api_get(url, params)
|
|
|
|
|
|
def _fetch_replies(post_id: str, limit: int = 50) -> list:
|
|
"""Fetches all replies to a Threads post, handling pagination."""
|
|
url = f"{GRAPH_API_BASE}/{post_id}/replies"
|
|
params = {
|
|
"fields": "id,text,timestamp,username,permalink",
|
|
"limit": limit,
|
|
}
|
|
results = []
|
|
|
|
while url:
|
|
data = _api_get(url, params)
|
|
results.extend(data.get("data", []))
|
|
# Handle pagination — next URL is provided in paging.next
|
|
url = data.get("paging", {}).get("next")
|
|
params = {} # Next URL already includes all params
|
|
|
|
return results
|
|
|
|
|
|
def _pick_best_post() -> tuple:
|
|
"""
|
|
Fetches recent posts from the user and returns the first one
|
|
with enough replies that hasn't been processed yet.
|
|
|
|
Returns:
|
|
tuple: (post_dict, replies_list)
|
|
|
|
Raises:
|
|
RuntimeError: If no eligible posts are found.
|
|
"""
|
|
user_id = settings.config["threads"]["creds"]["user_id"]
|
|
if not user_id:
|
|
raise RuntimeError(
|
|
"Threads API: user_id is required. "
|
|
"Set it in config.toml under [threads.creds]."
|
|
)
|
|
|
|
url = f"{GRAPH_API_BASE}/{user_id}/threads"
|
|
params = {"fields": "id,text,timestamp,permalink,media_type", "limit": 25}
|
|
|
|
data = _api_get(url, params)
|
|
posts = data.get("data", [])
|
|
|
|
min_replies = settings.config["threads"]["thread"]["min_replies"]
|
|
|
|
for post in posts:
|
|
if check_done_by_id(post["id"]):
|
|
continue
|
|
|
|
replies = _fetch_replies(post["id"])
|
|
if len(replies) >= min_replies:
|
|
return post, replies
|
|
|
|
raise RuntimeError(
|
|
f"No eligible Threads posts found. "
|
|
f"Ensure you have posts with at least {min_replies} replies."
|
|
)
|
|
|
|
|
|
def get_threads_content(POST_ID: str = None) -> dict:
|
|
"""
|
|
Fetches Threads content (post + replies) and returns it in the standard content_object format.
|
|
|
|
Args:
|
|
POST_ID (str, optional): Specific post ID to fetch. If None, auto-selects.
|
|
|
|
Returns:
|
|
dict: Standard content_object matching the pipeline contract.
|
|
|
|
Raises:
|
|
RuntimeError: On API errors or if no eligible content found.
|
|
"""
|
|
print_step("Fetching Threads content...")
|
|
|
|
# Determine which post to fetch
|
|
if POST_ID:
|
|
post = _fetch_post(POST_ID)
|
|
replies = _fetch_replies(POST_ID)
|
|
elif settings.config["threads"]["thread"].get("post_id"):
|
|
post_id = settings.config["threads"]["thread"]["post_id"]
|
|
post = _fetch_post(post_id)
|
|
replies = _fetch_replies(post_id)
|
|
else:
|
|
post, replies = _pick_best_post()
|
|
|
|
# Load content filters from config
|
|
max_len = settings.config["threads"]["thread"]["max_reply_length"]
|
|
min_len = settings.config["threads"]["thread"]["min_reply_length"]
|
|
blocked_raw = settings.config["threads"]["thread"].get("blocked_words", "")
|
|
blocked = [w.strip().lower() for w in blocked_raw.split(",") if w.strip()]
|
|
|
|
# Build content object in standard format
|
|
content = {
|
|
"thread_id": post["id"],
|
|
"thread_title": (post.get("text") or "")[:280], # Threads has no separate title
|
|
"thread_url": post["permalink"],
|
|
"is_nsfw": False, # Threads API doesn't provide NSFW flag
|
|
"thread_category": "threads", # Generic field for output folder naming
|
|
"comments": [],
|
|
}
|
|
|
|
# Filter and add replies
|
|
for reply in replies:
|
|
body = reply.get("text", "").strip()
|
|
if not body:
|
|
continue
|
|
|
|
# Check blocked words
|
|
if any(w in body.lower() for w in blocked):
|
|
continue
|
|
|
|
# Check length constraints
|
|
if not (min_len <= len(body) <= max_len):
|
|
continue
|
|
|
|
# Sanitize text
|
|
sanitised = sanitize_text(body)
|
|
if not sanitised:
|
|
continue
|
|
|
|
content["comments"].append({
|
|
"comment_body": body,
|
|
"comment_url": reply["permalink"],
|
|
"comment_id": reply["id"],
|
|
})
|
|
|
|
# Log summary
|
|
title_preview = content["thread_title"][:60]
|
|
print_substep(
|
|
f"Fetched Threads post '{title_preview}...' "
|
|
f"with {len(content['comments'])} replies.",
|
|
style="bold green",
|
|
)
|
|
|
|
return content
|