Feat: Add Gemini AI for Reddit thread summarization

This commit introduces the capability to summarize Reddit thread content (title + selftext) using the Google Gemini API.

Key changes:
- Added `google-generativeai` to `requirements.txt`.
- Updated `utils/.config.template.toml` and `utils/settings.py` to include a new `[gemini]` configuration section for `api_key` and `enable_summary`. Includes interactive prompt for API key if summarization is enabled but key is missing.
- Created `utils/gemini_client.py` module:
    - `initialize_gemini()`: Configures the Gemini client with API key and initializes the `gemini-1.5-pro-latest` model (configurable via settings).
    - `summarize_text_with_gemini()`: Takes text, generates a prompt, calls the Gemini API, and returns the summary. Includes error handling for API calls and content safety.
- Integrated into `main.py`:
    - Gemini client is initialized at startup if enabled.
    - In `generate_audio_and_screenshots()`, if summarization is enabled, the thread's selftext is replaced with the Gemini-generated summary before being passed to TTS. Falls back to original text on error.
- Added relevant logging and documentation for the new feature.
pull/2364/head
google-labs-jules[bot] 2 months ago
parent 47612a09ac
commit 1cb4be234a

@ -29,6 +29,7 @@ from utils.ffmpeg_install import ffmpeg_install
# Assuming it was for generating a unique ID from the reddit object,
# this functionality will be implicitly handled by using reddit_object["thread_id"]
from utils.version import checkversion
from utils.gemini_client import summarize_text_with_gemini # Added for summarization
from video_creation.background import (
chop_background,
download_background_audio,
@ -140,9 +141,45 @@ def get_reddit_data(post_id_override: str = None) -> Dict[str, Any]:
return reddit_object
def generate_audio_and_screenshots(reddit_object: Dict[str, Any]) -> Tuple[int, int]:
"""Generates TTS audio for the reddit content and takes screenshots."""
logging.info("Generating audio and screenshots...")
length, number_of_comments = save_text_to_mp3(reddit_object)
"""
Generates TTS audio for the Reddit content (potentially summarized) and takes screenshots.
"""
logging.info("Preparing content for audio and screenshots...")
# --- Gemini Summarization Step ---
if settings.config.get("gemini", {}).get("enable_summary"):
logging.info("Gemini summarization enabled. Attempting to summarize thread content.")
# Construct text for summarization: title + selftext
# Ensure 'thread_title' and 'thread_selftext' exist. 'thread_selftext' might be empty for image/link posts.
title = reddit_object.get("thread_title", "")
selftext = reddit_object.get("thread_selftext", "") # This is what TTSEngine uses for the main post
if selftext and selftext.strip(): # Only summarize if there's actual selftext
text_to_summarize = f"Titel: {title}\n\nBericht:\n{selftext}"
logging.debug(f"Text to summarize (first 200 chars): {text_to_summarize[:200]}")
summary = summarize_text_with_gemini(text_to_summarize)
if summary:
logging.info("Successfully summarized thread content with Gemini.")
# Replace the original selftext with the summary for TTS
# The TTSEngine in voices.py specifically looks for 'thread_selftext' for the main post.
reddit_object["thread_selftext"] = summary
# The title is usually read out separately by TTSEngine.
# If the summary should also include/replace the title for TTS, this logic might need adjustment
# or the prompt to Gemini could be to make the summary inclusive of the title's context.
# Current prompt: "Vat de volgende Reddit-thread samen in een boeiend en beknopt verhaal..."
# This implies the summary might naturally incorporate the title's essence.
logging.debug(f"Using Gemini summary for TTS (first 100 chars): {summary[:100]}")
else:
logging.warning("Failed to get summary from Gemini, or summary was empty. Using original selftext.")
else:
logging.info("No selftext found or selftext is empty. Skipping Gemini summarization for this post.")
else:
logging.info("Gemini summarization is not enabled.")
# --- End Gemini Summarization ---
logging.info("Proceeding to generate TTS audio and screenshots with (potentially summarized) content...")
length, number_of_comments = save_text_to_mp3(reddit_object) # save_text_to_mp3 uses reddit_object["thread_selftext"]
final_length = math.ceil(length)
get_screenshots_of_reddit_posts(reddit_object, number_of_comments)
logging.info("Audio and screenshots generated.")
@ -238,6 +275,20 @@ if __name__ == "__main__":
app_config = initialize_app_checks_and_config()
# Initialize Gemini client if enabled
if app_config.get("gemini", {}).get("enable_summary"):
try:
from utils.gemini_client import initialize_gemini
if not initialize_gemini():
logging.warning("Gemini client initialization failed or was skipped. Summarization will not be available.")
else:
logging.info("Gemini client initialized for summarization.")
except ImportError:
logging.error("Failed to import gemini_client. Summarization will not be available. Ensure google-generativeai is installed.")
except Exception as e:
logging.error(f"An unexpected error occurred during Gemini initialization: {e}", exc_info=True)
try:
post_ids_str = app_config.get("reddit", {}).get("thread", {}).get("post_id")
times_to_run = app_config.get("settings", {}).get("times_to_run")

@ -26,3 +26,4 @@ tiktok-uploader==1.1.1
google-api-python-client==2.171.0
google-auth-oauthlib==1.2.2
google-auth-httplib2==0.2.0
google-generativeai>=0.3.0 # Added for Gemini

@ -56,3 +56,8 @@ python_voice = { optional = false, default = "1", example = "1", explanation = "
py_voice_num = { optional = false, default = "2", example = "2", explanation = "The number of system voices (2 are pre-installed in Windows)" }
silence_duration = { optional = true, example = "0.1", explanation = "Time in seconds between TTS comments", default = 0.3, type = "float" }
no_emojis = { optional = false, type = "bool", default = false, example = false, options = [true, false,], explanation = "Whether to remove emojis from the comments" }
[gemini] # New section for Gemini AI settings
api_key = { optional = true, default = "", type = "str", explanation = "Your Google Gemini API Key. Required if enable_summary is true.", example = "AIzaxxxxxxxxxxxxxxxxxxxxxxx" } # Ensure type is str
enable_summary = { optional = true, type = "bool", default = false, example = false, options = [true, false,], explanation = "Enable summarizing Reddit thread selftext using Gemini AI." }
# Future Gemini settings could go here, e.g., model_name, specific prompts, etc.

@ -0,0 +1,158 @@
# utils/gemini_client.py
"""
Client module for interacting with the Google Gemini API.
Provides functionalities to initialize the Gemini client and to perform
specific generative AI tasks, such as text summarization.
The API key and model preferences are typically read from the application settings.
"""
import logging
import google.generativeai as genai
from google.api_core import exceptions as google_exceptions # For more specific error handling
from utils import settings # To access API key and other settings
logger = logging.getLogger(__name__)
# Global variable to hold the initialized model, to avoid re-initializing if not necessary
# However, for a simple function call per summary, initializing model inside might be fine too.
# Let's try initializing it once.
_gemini_model = None
_gemini_initialized = False
def initialize_gemini() -> bool:
"""
Initializes the Google Gemini client with the API key from settings.
Should be called once at the start of the application if Gemini features are enabled.
Returns:
bool: True if initialization was successful or already initialized, False otherwise.
"""
global _gemini_model, _gemini_initialized
if _gemini_initialized:
logger.debug("Gemini client already initialized.")
return True
if not settings.config.get("gemini", {}).get("enable_summary", False):
logger.info("Gemini summary is not enabled in settings. Skipping Gemini client initialization.")
return False # Not an error, but not initialized for summarization
api_key = settings.config.get("gemini", {}).get("api_key")
if not api_key:
logger.error("Gemini API key not found in settings. Cannot initialize Gemini client.")
return False
try:
logger.info("Configuring Google Gemini API...")
genai.configure(api_key=api_key)
# Use gemini-1.5-pro-latest as requested
model_name = settings.config.get("gemini", {}).get("model_name", "gemini-1.5-pro-latest")
logger.info(f"Attempting to initialize Gemini model: {model_name}")
_gemini_model = genai.GenerativeModel(model_name)
_gemini_initialized = True
logger.info(f"Google Gemini client initialized successfully with model: {model_name}.")
return True
except Exception as e:
logger.error(f"Failed to initialize Google Gemini client: {e}", exc_info=True)
_gemini_initialized = False # Explicitly mark as not initialized on error
_gemini_model = None
return False
def summarize_text_with_gemini(text_to_summarize: str) -> str | None:
"""
Summarizes the given text using the Google Gemini API (gemini-1.0-pro model).
Args:
text_to_summarize (str): The text content to be summarized.
Returns:
Optional[str]: The summarized text, or None if summarization fails or Gemini is not enabled/initialized.
"""
global _gemini_model, _gemini_initialized
if not _gemini_initialized or _gemini_model is None:
# Attempt to initialize if not done yet (e.g. if called directly without main.py's init)
# However, it's better if initialize_gemini() is called explicitly once.
# For robustness, let's check and log.
logger.warning("Gemini client not initialized or model not available. Call initialize_gemini() first.")
# Optionally, could try to initialize here:
# if not initialize_gemini():
# return None
# But for now, require prior initialization.
return None
if not text_to_summarize or not text_to_summarize.strip():
logger.warning("Text to summarize is empty. Skipping Gemini call.")
return None
# Consider token limits of Gemini Pro (e.g., 30720 input tokens)
# If text_to_summarize is very long, it might need truncation or chunking.
# For now, sending the whole text.
# A simple truncation:
# MAX_INPUT_LENGTH_APPROX = 28000 # Leave some room for prompt and overhead
# if len(text_to_summarize) > MAX_INPUT_LENGTH_APPROX:
# logger.warning(f"Input text length ({len(text_to_summarize)}) is very long. Truncating to {MAX_INPUT_LENGTH_APPROX} characters for Gemini.")
# text_to_summarize = text_to_summarize[:MAX_INPUT_LENGTH_APPROX]
prompt = f"Vat de volgende Reddit-thread samen in een boeiend en beknopt verhaal. Geef alleen de samenvatting terug, zonder extra inleidende of afsluitende zinnen:\n\n{text_to_summarize}"
logger.info(f"Sending text (length: {len(text_to_summarize)}) to Gemini for summarization.")
logger.debug(f"Gemini prompt (first 100 chars of text): Vat de volgende Reddit-thread samen... {text_to_summarize[:100]}...")
try:
# Safety settings can be added here if needed, e.g.
# safety_settings = [
# {"category": "HARM_CATEGORY_HARASSMENT", "threshold": "BLOCK_NONE"},
# {"category": "HARM_CATEGORY_HATE_SPEECH", "threshold": "BLOCK_NONE"},
# {"category": "HARM_CATEGORY_SEXUALLY_EXPLICIT", "threshold": "BLOCK_NONE"},
# {"category": "HARM_CATEGORY_DANGEROUS_CONTENT", "threshold": "BLOCK_NONE"},
# ]
# response = _gemini_model.generate_content(prompt, safety_settings=safety_settings)
response = _gemini_model.generate_content(prompt)
if response.parts:
summary = response.text # .text joins parts automatically
logger.info("Successfully received summary from Gemini.")
logger.debug(f"Gemini summary: {summary[:100]}...") # Log beginning of summary
return summary
else:
# This might happen if content is blocked by safety filters without raising an error in parts
logger.warning("Gemini response contained no parts (summary might be empty or blocked).")
if response.prompt_feedback and response.prompt_feedback.block_reason:
logger.warning(f"Gemini content generation blocked. Reason: {response.prompt_feedback.block_reason_message or response.prompt_feedback.block_reason}")
return None
except google_exceptions.RetryError as e:
logger.error(f"Gemini API request failed after retries (RetryError): {e}", exc_info=True)
except google_exceptions.GoogleAPIError as e: # General Google API error
logger.error(f"Gemini API request failed (GoogleAPIError): {e}", exc_info=True)
except Exception as e: # Catch any other exceptions
logger.error(f"An unexpected error occurred while calling Gemini API: {e}", exc_info=True)
return None
# Example usage (for testing this module directly, if needed)
if __name__ == '__main__':
# This example requires settings to be mocked or a dummy config for testing.
# For now, this is just a placeholder.
print("Testing gemini_client.py (requires manual setup of config or mocking)")
# Dummy config for direct testing:
# settings.config = {
# "gemini": {
# "enable_summary": True,
# "api_key": "YOUR_ACTUAL_API_KEY_FOR_TESTING_ONLY",
# }
# }
# if initialize_gemini():
# sample_text = "Dit is een lange testtekst over een Reddit thread die heel interessant was. Het ging over katten en honden en hoe ze soms wel en soms niet met elkaar overweg kunnen. De gebruiker vroeg om advies omdat zijn kat en hond constant ruzie maakten. Er waren veel commentaren met goede tips."
# summary = summarize_text_with_gemini(sample_text)
# if summary:
# print("\n--- Summary ---")
# print(summary)
# else:
# print("\nFailed to get summary.")
# else:
# print("Failed to initialize Gemini for testing.")

@ -260,9 +260,43 @@ Overwrite with a fresh configuration based on the template? (y/n)[/blue]""")
If you see any prompts, that means that you have unset/incorrectly set variables, please input the correct values.\
"""
)
crawl(template, check_vars)
with open(config_file, "w") as f:
crawl(template, check_vars) # Populates global `config` by validating against template
# --- Custom validation for Gemini settings ---
if "gemini" in config and isinstance(config["gemini"], dict):
gemini_settings = config["gemini"]
if gemini_settings.get("enable_summary") is True:
if not gemini_settings.get("api_key"):
logger.warning("Gemini summary is enabled, but API key is missing.")
# Prompt user for API key if missing and summary is enabled
# This uses the `handle_input` which is part of `utils.console`
# and uses Rich Console for prompting.
gemini_api_key_checks = template.get("gemini", {}).get("api_key", {})
# Ensure the 'type' is 'str' for handle_input if not otherwise specified for this direct call
gemini_api_key_checks.setdefault("type", "str")
# We need a local Rich Console instance if handle_input relies on a global one not set here.
# However, handle_input itself creates a Console instance.
api_key_value = handle_input(
message="[#C0CAF5 bold]Gemini API Key ([red]required as summary is enabled[/#C0CAF5 bold]): ",
extra_info=gemini_api_key_checks.get("explanation", "Enter your Google Gemini API Key."),
check_type=_get_safe_type_converter(gemini_api_key_checks.get("type", "str")), # Pass callable
optional=False, # It's not optional if enable_summary is true
err_message="API Key cannot be empty when summary is enabled."
# Potentially add nmin for basic validation if desired
)
config["gemini"]["api_key"] = api_key_value
if not api_key_value: # Double check if user somehow bypassed
logger.error("Gemini API Key is required when enable_summary is true. Configuration incomplete.")
return False # Indicate failure
else:
logger.debug("Gemini summary enabled and API key is present.")
# --- End custom validation ---
with open(config_file, "w", encoding="utf-8") as f: # Ensure encoding
toml.dump(config, f)
logger.info(f"Configuration successfully checked and saved to {config_file}")
return config

Loading…
Cancel
Save