You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
177 lines
6.5 KiB
177 lines
6.5 KiB
"""Shared Playwright authentication for Threads.
|
|
|
|
Used by both the screenshotter (screenshot.py) and the web scraper (scraper.py).
|
|
"""
|
|
|
|
import json
|
|
from pathlib import Path
|
|
from urllib.parse import urlparse
|
|
|
|
from playwright.sync_api import Browser, BrowserContext, Page, TimeoutError, ViewportSize
|
|
|
|
from utils import settings
|
|
from utils.console import emit_scraper_event, print_substep
|
|
|
|
THREADS_LOGIN_URL = "https://www.threads.com/login"
|
|
THREADS_AUTH_CHECK_URL = "https://www.threads.net/"
|
|
THREADS_COOKIE_FILE = "./video_creation/data/cookie-threads.json"
|
|
DEFAULT_USER_AGENT = (
|
|
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
|
|
"AppleWebKit/537.36 (KHTML, like Gecko) "
|
|
"Chrome/120.0.0.0 Safari/537.36"
|
|
)
|
|
THREADS_HOSTS = {
|
|
"threads.com",
|
|
"www.threads.com",
|
|
"threads.net",
|
|
"www.threads.net",
|
|
}
|
|
THREADS_NON_AUTH_PATH_PREFIXES = (
|
|
"/login",
|
|
"/challenge",
|
|
"/checkpoint",
|
|
"/consent",
|
|
"/accountsuspended",
|
|
"/suspended",
|
|
)
|
|
|
|
|
|
def _is_logged_in_threads_url(url: str) -> bool:
|
|
"""Return True once Threads has navigated away from the login page."""
|
|
parsed = urlparse(url)
|
|
hostname = (parsed.hostname or "").casefold()
|
|
path = (parsed.path or "/").lower()
|
|
return parsed.scheme == "https" and hostname in THREADS_HOSTS and not any(
|
|
path.startswith(prefix) for prefix in THREADS_NON_AUTH_PATH_PREFIXES
|
|
)
|
|
|
|
|
|
def _has_login_gate(page: Page) -> bool:
|
|
"""Detect Threads login modal overlaying the page."""
|
|
gate_selectors = [
|
|
'input[autocomplete="username"]',
|
|
'input[autocomplete="current-password"]',
|
|
'div[role="dialog"] button:has-text("Log in")',
|
|
]
|
|
for selector in gate_selectors:
|
|
try:
|
|
if page.locator(selector).first.is_visible():
|
|
return True
|
|
except Exception:
|
|
continue
|
|
return False
|
|
|
|
|
|
def login_to_threads(page: Page, _context: BrowserContext) -> None:
|
|
"""Log into Threads via Instagram credentials and persist session cookies."""
|
|
username = settings.config["threads"]["creds"].get("username", "").strip()
|
|
password = settings.config["threads"]["creds"].get("password", "").strip()
|
|
|
|
if not username or not password:
|
|
raise RuntimeError(
|
|
"Threads login requires credentials. "
|
|
"Set threads.creds.username and threads.creds.password in config.toml"
|
|
)
|
|
|
|
print_substep("Logging into Threads (via Instagram)...")
|
|
emit_scraper_event("login", {"message": "Logging into Threads (via Instagram)..."})
|
|
page.goto(THREADS_LOGIN_URL, timeout=0)
|
|
page.wait_for_load_state("domcontentloaded")
|
|
|
|
username_input = page.locator('input[autocomplete="username"]')
|
|
password_input = page.locator('input[autocomplete="current-password"]')
|
|
username_input.fill(username)
|
|
password_input.fill(password)
|
|
|
|
# CloakBrowser can be slightly flaky with the humanized button click even
|
|
# when the credentials are correct. Try the visible login button first,
|
|
# then fall back to pressing Enter in the password field.
|
|
submit_attempts = [
|
|
("button click", lambda: page.get_by_role("button", name="Log in", exact=True).first.click()),
|
|
("password Enter", lambda: password_input.press("Enter")),
|
|
]
|
|
last_error = None
|
|
for index, (label, submit) in enumerate(submit_attempts):
|
|
try:
|
|
submit()
|
|
# Threads currently redirects to threads.com on successful login,
|
|
# but older saved sessions and some flows may still hit threads.net.
|
|
page.wait_for_url(_is_logged_in_threads_url, timeout=30000 if index == 0 else 15000)
|
|
break
|
|
except TimeoutError as error:
|
|
last_error = error
|
|
if index == len(submit_attempts) - 1:
|
|
raise
|
|
print_substep(f"Threads login via {label} timed out. Retrying...", style="yellow")
|
|
|
|
page.wait_for_load_state("domcontentloaded")
|
|
|
|
cookies = _context.cookies()
|
|
Path(THREADS_COOKIE_FILE).parent.mkdir(parents=True, exist_ok=True)
|
|
with open(THREADS_COOKIE_FILE, "w") as f:
|
|
json.dump(cookies, f)
|
|
|
|
print_substep("Logged into Threads and saved session cookies.", style="bold green")
|
|
emit_scraper_event("login", {"message": "Logged in successfully"})
|
|
|
|
|
|
def ensure_authenticated_context(browser: Browser, **kwargs) -> BrowserContext:
|
|
"""Create a Playwright browser context with Threads session cookies loaded.
|
|
|
|
Loads saved cookies from cookie-threads.json. If no valid session exists,
|
|
performs a fresh login and persists the cookies.
|
|
|
|
Keyword arguments override defaults for locale, viewport, device_scale_factor,
|
|
color_scheme, and user_agent.
|
|
"""
|
|
theme = settings.config["settings"]["theme"]
|
|
W = int(settings.config["settings"]["resolution_w"])
|
|
H = int(settings.config["settings"]["resolution_h"])
|
|
dsf = (W // 600) + 1
|
|
|
|
defaults = {
|
|
"locale": "en-US",
|
|
"color_scheme": "dark" if theme == "dark" else "light",
|
|
"viewport": ViewportSize(width=W, height=H),
|
|
"device_scale_factor": dsf,
|
|
"user_agent": DEFAULT_USER_AGENT,
|
|
}
|
|
defaults.update(kwargs)
|
|
|
|
context = browser.new_context(**defaults)
|
|
|
|
cookie_path = Path(THREADS_COOKIE_FILE)
|
|
if cookie_path.exists():
|
|
try:
|
|
with open(cookie_path, encoding="utf-8") as f:
|
|
saved_cookies = json.load(f)
|
|
context.add_cookies(saved_cookies)
|
|
print_substep("Loaded saved Threads session cookies.")
|
|
emit_scraper_event("login", {"message": "Loaded saved session cookies"})
|
|
|
|
# Verify loaded cookies are still valid
|
|
page = context.new_page()
|
|
try:
|
|
page.goto(THREADS_AUTH_CHECK_URL, timeout=0)
|
|
page.wait_for_load_state("domcontentloaded")
|
|
page.wait_for_timeout(1500)
|
|
if _has_login_gate(page):
|
|
print_substep("Saved Threads cookies expired. Re-logging in...", style="yellow")
|
|
context.clear_cookies()
|
|
cookie_path.unlink(missing_ok=True)
|
|
login_to_threads(page, context)
|
|
finally:
|
|
page.close()
|
|
except (json.JSONDecodeError, IOError):
|
|
print_substep("Saved cookies corrupted. Logging in fresh...")
|
|
page = context.new_page()
|
|
login_to_threads(page, context)
|
|
page.close()
|
|
else:
|
|
print_substep("No saved cookies found. Logging in...")
|
|
page = context.new_page()
|
|
login_to_threads(page, context)
|
|
page.close()
|
|
|
|
return context
|