diff --git a/.dockerignore b/.dockerignore index 1d1fe94..1653ff2 100644 --- a/.dockerignore +++ b/.dockerignore @@ -1 +1,2 @@ -Dockerfile \ No newline at end of file +Dockerfile +results \ No newline at end of file diff --git a/.python-version b/.python-version new file mode 100644 index 0000000..c8cfe39 --- /dev/null +++ b/.python-version @@ -0,0 +1 @@ +3.10 diff --git a/README.md b/README.md index d39a114..8042755 100644 --- a/README.md +++ b/README.md @@ -37,27 +37,61 @@ The only original thing being done is the editing and gathering of all materials ## Installation 👩‍💻 -1. Clone this repository -2. Run `pip install -r requirements.txt` -3. Run `python -m playwright install` and `python -m playwright install-deps` +1. Clone this repository: + ```sh + git clone https://github.com/elebumm/RedditVideoMakerBot.git + cd RedditVideoMakerBot + ``` + +2. Create and activate a virtual environment: + - On **Windows**: + ```sh + python -m venv ./venv + .\venv\Scripts\activate + ``` + - On **macOS and Linux**: + ```sh + python3 -m venv ./venv + source ./venv/bin/activate + ``` + +3. Install the required dependencies: + ```sh + pip install -r requirements.txt + ``` + +4. Install Playwright and its dependencies: + ```sh + python -m playwright install + python -m playwright install-deps + ``` + +--- **EXPERIMENTAL!!!!** -On macOS and Linux (debian, arch, fedora and centos, and based on those), you can run an install script that will automatically install steps 1 to 3. (requires bash) + - On macOS and Linux (Debian, Arch, Fedora, CentOS, and based on those), you can run an installation script that will automatically install steps 1 to 3. (requires bash) + - `bash <(curl -sL https://raw.githubusercontent.com/elebumm/RedditVideoMakerBot/master/install.sh)` + - This can also be used to update the installation -`bash <(curl -sL https://raw.githubusercontent.com/elebumm/RedditVideoMakerBot/master/install.sh)` +--- -This can also be used to update the installation +5. Run the bot: + ```sh + python main.py + ``` -4. Run `python main.py` -5. Visit [the Reddit Apps page.](https://www.reddit.com/prefs/apps), and set up an app that is a "script". Paste any URL in redirect URL. Ex:`https://jasoncameron.dev` -6. The bot will ask you to fill in your details to connect to the Reddit API, and configure the bot to your liking -7. Enjoy 😎 -8. If you need to reconfigure the bot, simply open the `config.toml` file and delete the lines that need to be changed. On the next run of the bot, it will help you reconfigure those options. +6. Visit [the Reddit Apps page](https://www.reddit.com/prefs/apps), and set up an app that is a "script". Paste any URL in the redirect URL field, for example: `https://jasoncameron.dev`. -(Note if you got an error installing or running the bot try first rerunning the command with a three after the name e.g. python3 or pip3) +7. The bot will prompt you to fill in your details to connect to the Reddit API and configure the bot to your liking. -If you want to read more detailed guide about the bot, please refer to the [documentation](https://reddit-video-maker-bot.netlify.app/) +8. Enjoy 😎 + +9. If you need to reconfigure the bot, simply open the `config.toml` file and delete the lines that need to be changed. On the next run of the bot, it will help you reconfigure those options. + +(Note: If you encounter any errors installing or running the bot, try using `python3` or `pip3` instead of `python` or `pip`.) + +For a more detailed guide about the bot, please refer to the [documentation](https://reddit-video-maker-bot.netlify.app/). ## Video diff --git a/TTS/GTTS.py b/TTS/GTTS.py index bff100f..2e2d530 100644 --- a/TTS/GTTS.py +++ b/TTS/GTTS.py @@ -10,7 +10,7 @@ class GTTS: self.max_chars = 5000 self.voices = [] - def run(self, text, filepath): + def run(self, text, filepath, random_voice: bool = False): tts = gTTS( text=text, lang=settings.config["reddit"]["thread"]["post_lang"] or "en", diff --git a/TTS/elevenlabs.py b/TTS/elevenlabs.py index e896621..c1f478e 100644 --- a/TTS/elevenlabs.py +++ b/TTS/elevenlabs.py @@ -35,4 +35,4 @@ class elevenlabs: def randomvoice(self): if self.client is None: self.initialize() - return random.choice(self.client.voices.get_all().voices).voice_name + return random.choice(self.client.voices.get_all().voices).name diff --git a/TTS/engine_wrapper.py b/TTS/engine_wrapper.py index 6d498d2..1026a6d 100644 --- a/TTS/engine_wrapper.py +++ b/TTS/engine_wrapper.py @@ -5,9 +5,9 @@ from typing import Tuple import numpy as np import translators +from moviepy import AudioFileClip from moviepy.audio.AudioClip import AudioClip -from moviepy.audio.fx.volumex import volumex -from moviepy.editor import AudioFileClip +from moviepy.audio.fx import MultiplyVolume from rich.progress import track from utils import settings @@ -112,7 +112,6 @@ class TTSEngine: ] self.create_silence_mp3() - idy = None for idy, text_cut in enumerate(split_text): newtext = process_text(text_cut) # print(f"{idx}-{idy}: {newtext}\n") @@ -144,11 +143,18 @@ class TTSEngine: print("OSError") def call_tts(self, filename: str, text: str): - self.tts_module.run( - text, - filepath=f"{self.path}/{filename}.mp3", - random_voice=settings.config["settings"]["tts"]["random_voice"], - ) + if settings.config["settings"]["tts"]["voice_choice"] == "googletranslate": + # GTTS does not have the argument 'random_voice' + self.tts_module.run( + text, + filepath=f"{self.path}/{filename}.mp3", + ) + else: + self.tts_module.run( + text, + filepath=f"{self.path}/{filename}.mp3", + random_voice=settings.config["settings"]["tts"]["random_voice"], + ) # try: # self.length += MP3(f"{self.path}/{filename}.mp3").info.length # except (MutagenError, HeaderNotFoundError): @@ -164,12 +170,12 @@ class TTSEngine: def create_silence_mp3(self): silence_duration = settings.config["settings"]["tts"]["silence_duration"] silence = AudioClip( - make_frame=lambda t: np.sin(440 * 2 * np.pi * t), + frame_function=lambda t: np.sin(440 * 2 * np.pi * t), duration=silence_duration, fps=44100, ) - silence = volumex(silence, 0) - silence.write_audiofile(f"{self.path}/silence.mp3", fps=44100, verbose=False, logger=None) + silence = silence.with_effects([MultiplyVolume(0)]) + silence.write_audiofile(f"{self.path}/silence.mp3", fps=44100, logger=None) def process_text(text: str, clean: bool = True): diff --git a/TTS/openai_tts.py b/TTS/openai_tts.py new file mode 100644 index 0000000..a267163 --- /dev/null +++ b/TTS/openai_tts.py @@ -0,0 +1,91 @@ +import random + +import requests + +from utils import settings + + +class OpenAITTS: + """ + A Text-to-Speech engine that uses an OpenAI-like TTS API endpoint to generate audio from text. + + Attributes: + max_chars (int): Maximum number of characters allowed per API call. + api_key (str): API key loaded from settings. + api_url (str): The complete API endpoint URL, built from a base URL provided in the config. + available_voices (list): Static list of supported voices (according to current docs). + """ + + def __init__(self): + # Set maximum input size based on API limits (4096 characters per request) + self.max_chars = 4096 + self.api_key = settings.config["settings"]["tts"].get("openai_api_key") + if not self.api_key: + raise ValueError( + "No OpenAI API key provided in settings! Please set 'openai_api_key' in your config." + ) + + # Lese den Basis-URL aus der Konfiguration (z. B. "https://api.openai.com/v1" oder "https://api.openai.com/v1/") + base_url = settings.config["settings"]["tts"].get( + "openai_api_url", "https://api.openai.com/v1" + ) + # Entferne ggf. den abschließenden Slash + if base_url.endswith("/"): + base_url = base_url[:-1] + # Hänge den TTS-spezifischen Pfad an + self.api_url = base_url + "/audio/speech" + + # Set the available voices to a static list as per OpenAI TTS documentation. + self.available_voices = self.get_available_voices() + + def get_available_voices(self): + """ + Return a static list of supported voices for the OpenAI TTS API. + + According to the documentation, supported voices include: + "alloy", "ash", "coral", "echo", "fable", "onyx", "nova", "sage", "shimmer" + """ + return ["alloy", "ash", "coral", "echo", "fable", "onyx", "nova", "sage", "shimmer"] + + def randomvoice(self): + """ + Select and return a random voice from the available voices. + """ + return random.choice(self.available_voices) + + def run(self, text, filepath, random_voice: bool = False): + """ + Convert the provided text to speech and save the resulting audio to the specified filepath. + + Args: + text (str): The input text to convert. + filepath (str): The file path where the generated audio will be saved. + random_voice (bool): If True, select a random voice from the available voices. + """ + # Choose voice based on configuration or randomly if requested. + if random_voice: + voice = self.randomvoice() + else: + voice = settings.config["settings"]["tts"].get("openai_voice_name", "alloy") + voice = str(voice).lower() # Ensure lower-case as expected by the API + + # Select the model from configuration; default to 'tts-1' + model = settings.config["settings"]["tts"].get("openai_model", "tts-1") + + # Create Payload for API-request + payload = { + "model": model, + "voice": voice, + "input": text, + "response_format": "mp3", # allowed formats: "mp3", "aac", "opus", "flac", "pcm" or "wav" + } + headers = {"Authorization": f"Bearer {self.api_key}", "Content-Type": "application/json"} + try: + response = requests.post(self.api_url, headers=headers, json=payload) + if response.status_code != 200: + raise RuntimeError(f"Error from TTS API: {response.status_code} {response.text}") + # Write response as binary into file. + with open(filepath, "wb") as f: + f.write(response.content) + except Exception as e: + raise RuntimeError(f"Failed to generate audio with OpenAI TTS API: {str(e)}") diff --git a/main.py b/main.py index 849663d..742fedf 100755 --- a/main.py +++ b/main.py @@ -4,7 +4,7 @@ import sys from os import name from pathlib import Path from subprocess import Popen -from typing import NoReturn +from typing import Dict, NoReturn from prawcore import ResponseException @@ -13,7 +13,7 @@ from utils import settings from utils.cleanup import cleanup from utils.console import print_markdown, print_step, print_substep from utils.ffmpeg_install import ffmpeg_install -from utils.id import id +from utils.id import extract_id from utils.version import checkversion from video_creation.background import ( chop_background, @@ -25,7 +25,7 @@ from video_creation.final_video import make_final_video from video_creation.screenshot_downloader import get_screenshots_of_reddit_posts from video_creation.voices import save_text_to_mp3 -__VERSION__ = "3.3.0" +__VERSION__ = "3.4.0" print( """ @@ -42,11 +42,15 @@ print_markdown( ) checkversion(__VERSION__) +reddit_id: str +reddit_object: Dict[str, str | list] + def main(POST_ID=None) -> None: - global redditid, reddit_object + global reddit_id, reddit_object reddit_object = get_subreddit_threads(POST_ID) - redditid = id(reddit_object) + reddit_id = extract_id(reddit_object) + print_substep(f"Thread ID is {reddit_id}", style="bold blue") length, number_of_comments = save_text_to_mp3(reddit_object) length = math.ceil(length) get_screenshots_of_reddit_posts(reddit_object, number_of_comments) @@ -64,22 +68,22 @@ def run_many(times) -> None: for x in range(1, times + 1): print_step( f'on the {x}{("th", "st", "nd", "rd", "th", "th", "th", "th", "th", "th")[x % 10]} iteration of {times}' - ) # correct 1st 2nd 3rd 4th 5th.... + ) main() Popen("cls" if name == "nt" else "clear", shell=True).wait() def shutdown() -> NoReturn: - if "redditid" in globals(): + if "reddit_id" in globals(): print_markdown("## Clearing temp files") - cleanup(redditid) + cleanup(reddit_id) print("Exiting...") sys.exit() if __name__ == "__main__": - if sys.version_info.major != 3 or sys.version_info.minor not in [10, 11]: + if sys.version_info.major != 3 or sys.version_info.minor not in [10, 11, 12]: print( "Hey! Congratulations, you've made it so far (which is pretty rare with no Python 3.10). Unfortunately, this program only works on Python 3.10. Please install Python 3.10 and try again." ) @@ -122,6 +126,7 @@ if __name__ == "__main__": except Exception as err: config["settings"]["tts"]["tiktok_sessionid"] = "REDACTED" config["settings"]["tts"]["elevenlabs_api_key"] = "REDACTED" + config["settings"]["tts"]["openai_api_key"] = "REDACTED" print_step( f"Sorry, something went wrong with this version! Try again, and feel free to report this issue at GitHub or the Discord community.\n" f"Version: {__VERSION__} \n" diff --git a/requirements.txt b/requirements.txt index e6e2e7b..720aea5 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,24 +1,21 @@ -boto3==1.34.127 -botocore==1.34.127 -gTTS==2.5.1 -moviepy==1.0.3 -playwright==1.44.0 -praw==7.7.1 -prawcore~=2.3.0 +boto3==1.36.8 +botocore==1.36.8 +gTTS==2.5.4 +moviepy==2.2.1 +playwright==1.49.1 +praw==7.8.1 requests==2.32.3 -rich==13.7.1 +rich==13.9.4 toml==0.10.2 -translators==5.9.2 -pyttsx3==2.90 -Pillow==10.3.0 -tomlkit==0.12.5 -Flask==3.0.3 +translators==5.9.9 +pyttsx3==2.98 +tomlkit==0.13.2 +Flask==3.1.1 clean-text==0.6.0 -unidecode==1.3.8 -spacy==3.7.5 -torch==2.3.1 -transformers==4.41.2 +unidecode==1.4.0 +spacy==3.8.7 +torch==2.7.0 +transformers==4.52.4 ffmpeg-python==0.2.0 -elevenlabs==1.3.0 -yt-dlp==2024.5.27 -numpy==1.26.4 +elevenlabs==1.57.0 +yt-dlp==2025.5.22 diff --git a/utils/.config.template.toml b/utils/.config.template.toml index f4a3af0..9185a29 100644 --- a/utils/.config.template.toml +++ b/utils/.config.template.toml @@ -44,7 +44,7 @@ background_thumbnail_font_size = { optional = true, type = "int", default = 96, background_thumbnail_font_color = { optional = true, default = "255,255,255", example = "255,255,255", explanation = "Font color in RGB format for the thumbnail text" } [settings.tts] -voice_choice = { optional = false, default = "tiktok", options = ["elevenlabs", "streamlabspolly", "tiktok", "googletranslate", "awspolly", "pyttsx", ], example = "tiktok", explanation = "The voice platform used for TTS generation. " } +voice_choice = { optional = false, default = "tiktok", options = ["elevenlabs", "streamlabspolly", "tiktok", "googletranslate", "awspolly", "pyttsx", "OpenAI"], example = "tiktok", explanation = "The voice platform used for TTS generation. " } random_voice = { optional = false, type = "bool", default = true, example = true, options = [true, false,], explanation = "Randomizes the voice used for each comment" } elevenlabs_voice_name = { optional = false, default = "Bella", example = "Bella", explanation = "The voice used for elevenlabs", options = ["Adam", "Antoni", "Arnold", "Bella", "Domi", "Elli", "Josh", "Rachel", "Sam", ] } elevenlabs_api_key = { optional = true, example = "21f13f91f54d741e2ae27d2ab1b99d59", explanation = "Elevenlabs API key" } @@ -56,3 +56,7 @@ python_voice = { optional = false, default = "1", example = "1", explanation = " py_voice_num = { optional = false, default = "2", example = "2", explanation = "The number of system voices (2 are pre-installed in Windows)" } silence_duration = { optional = true, example = "0.1", explanation = "Time in seconds between TTS comments", default = 0.3, type = "float" } no_emojis = { optional = false, type = "bool", default = false, example = false, options = [true, false,], explanation = "Whether to remove emojis from the comments" } +openai_api_url = { optional = true, default = "https://api.openai.com/v1/", example = "https://api.openai.com/v1/", explanation = "The API endpoint URL for OpenAI TTS generation" } +openai_api_key = { optional = true, example = "sk-abc123def456...", explanation = "Your OpenAI API key for TTS generation" } +openai_voice_name = { optional = false, default = "alloy", example = "alloy", explanation = "The voice used for OpenAI TTS generation", options = ["alloy", "ash", "coral", "echo", "fable", "onyx", "nova", "sage", "shimmer", "af_heart"] } +openai_model = { optional = false, default = "tts-1", example = "tts-1", explanation = "The model variant used for OpenAI TTS generation", options = ["tts-1", "tts-1-hd", "gpt-4o-mini-tts"] } diff --git a/utils/ai_methods.py b/utils/ai_methods.py index e628942..6926a71 100644 --- a/utils/ai_methods.py +++ b/utils/ai_methods.py @@ -12,7 +12,7 @@ def mean_pooling(model_output, attention_mask): ) -# This function sort the given threads based on their total similarity with the given keywords +# This function sorts the given threads based on their total similarity with the given keywords def sort_by_similarity(thread_objects, keywords): # Initialize tokenizer + model. tokenizer = AutoTokenizer.from_pretrained("sentence-transformers/all-MiniLM-L6-v2") @@ -34,7 +34,7 @@ def sort_by_similarity(thread_objects, keywords): threads_embeddings = model(**encoded_threads) threads_embeddings = mean_pooling(threads_embeddings, encoded_threads["attention_mask"]) - # Keywords inference + # Keyword inference encoded_keywords = tokenizer(keywords, padding=True, truncation=True, return_tensors="pt") with torch.no_grad(): keywords_embeddings = model(**encoded_keywords) @@ -53,7 +53,7 @@ def sort_by_similarity(thread_objects, keywords): similarity_scores, indices = torch.sort(total_scores, descending=True) - threads_sentences = np.array(threads_sentences)[indices.numpy()] + # threads_sentences = np.array(threads_sentences)[indices.numpy()] thread_objects = np.array(thread_objects)[indices.numpy()].tolist() diff --git a/utils/console.py b/utils/console.py index 18c3248..a9abf4b 100644 --- a/utils/console.py +++ b/utils/console.py @@ -102,7 +102,7 @@ def handle_input( user_input = input("").strip() if check_type is not False: try: - isinstance(eval(user_input), check_type) + isinstance(eval(user_input), check_type) # fixme: remove eval return check_type(user_input) except: console.print( diff --git a/utils/ffmpeg_install.py b/utils/ffmpeg_install.py index b2c673d..b41bad6 100644 --- a/utils/ffmpeg_install.py +++ b/utils/ffmpeg_install.py @@ -28,8 +28,8 @@ def ffmpeg_install_windows(): for root, dirs, files in os.walk(ffmpeg_extracted_folder, topdown=False): for file in files: os.remove(os.path.join(root, file)) - for dir in dirs: - os.rmdir(os.path.join(root, dir)) + for directory in dirs: + os.rmdir(os.path.join(root, directory)) os.rmdir(ffmpeg_extracted_folder) # Extract FFmpeg @@ -110,7 +110,7 @@ def ffmpeg_install(): stdout=subprocess.PIPE, stderr=subprocess.PIPE, ) - except FileNotFoundError as e: + except FileNotFoundError: # Check if there's ffmpeg.exe in the current directory if os.path.exists("./ffmpeg.exe"): print( diff --git a/utils/gui_utils.py b/utils/gui_utils.py index f683adf..31c135f 100644 --- a/utils/gui_utils.py +++ b/utils/gui_utils.py @@ -25,7 +25,9 @@ def get_checks(): # Get current config (from config.toml) as dict -def get_config(obj: dict, done={}): +def get_config(obj: dict, done=None): + if done is None: + done = {} for key in obj.keys(): if not isinstance(obj[key], dict): done[key] = obj[key] @@ -44,13 +46,13 @@ def check(value, checks): if not incorrect and "type" in checks: try: - value = eval(checks["type"])(value) + value = eval(checks["type"])(value) # fixme remove eval except Exception: incorrect = True if ( not incorrect and "options" in checks and value not in checks["options"] - ): # FAILSTATE Value is not one of the options + ): # FAILSTATE Value isn't one of the options incorrect = True if ( not incorrect @@ -59,7 +61,7 @@ def check(value, checks): (isinstance(value, str) and re.match(checks["regex"], value) is None) or not isinstance(value, str) ) - ): # FAILSTATE Value doesn't match regex, or has regex but is not a string. + ): # FAILSTATE Value doesn't match regular expression, or has regular expression but isn't a string. incorrect = True if ( @@ -88,17 +90,17 @@ def check(value, checks): return value -# Modify settings (after form is submitted) +# Modify settings (after the form is submitted) def modify_settings(data: dict, config_load, checks: dict): # Modify config settings - def modify_config(obj: dict, name: str, value: any): + def modify_config(obj: dict, config_name: str, value: any): for key in obj.keys(): - if name == key: + if config_name == key: obj[key] = value elif not isinstance(obj[key], dict): continue else: - modify_config(obj[key], name, value) + modify_config(obj[key], config_name, value) # Remove empty/incorrect key-value pairs data = {key: value for key, value in data.items() if value and key in checks.keys()} @@ -158,7 +160,7 @@ def add_background(youtube_uri, filename, citation, position): youtube_uri = f"https://www.youtube.com/watch?v={regex.group(1)}" - # Check if position is valid + # Check if the position is valid if position == "" or position == "center": position = "center" @@ -178,7 +180,7 @@ def add_background(youtube_uri, filename, citation, position): filename = filename.replace(" ", "_") - # Check if background doesn't already exist + # Check if the background doesn't already exist with open("utils/backgrounds.json", "r", encoding="utf-8") as backgrounds: data = json.load(backgrounds) diff --git a/utils/id.py b/utils/id.py index 3d76593..792d9c9 100644 --- a/utils/id.py +++ b/utils/id.py @@ -1,12 +1,14 @@ import re +from typing import Optional from utils.console import print_substep -def id(reddit_obj: dict): +def extract_id(reddit_obj: dict, field: Optional[str] = "thread_id"): """ This function takes a reddit object and returns the post id """ - id = re.sub(r"[^\w\s-]", "", reddit_obj["thread_id"]) - print_substep(f"Thread ID is {id}", style="bold blue") - return id + if field not in reddit_obj.keys(): + raise ValueError(f"Field '{field}' not found in reddit object") + reddit_id = re.sub(r"[^\w\s-]", "", reddit_obj[field]) + return reddit_id diff --git a/utils/imagenarator.py b/utils/imagenarator.py index 4520fb3..b56b1d8 100644 --- a/utils/imagenarator.py +++ b/utils/imagenarator.py @@ -7,6 +7,7 @@ from rich.progress import track from TTS.engine_wrapper import process_text from utils.fonts import getheight, getsize +from utils.id import extract_id def draw_multiple_line_text( @@ -62,19 +63,20 @@ def imagemaker( Render Images for video """ texts = reddit_obj["thread_post"] - id = re.sub(r"[^\w\s-]", "", reddit_obj["thread_id"]) - + reddit_id = extract_id(reddit_obj) if transparent: font = ImageFont.truetype(os.path.join("fonts", "Roboto-Bold.ttf"), 100) else: font = ImageFont.truetype(os.path.join("fonts", "Roboto-Regular.ttf"), 100) - image = Image.new("RGBA", size, theme) + size = (1920, 1080) for idx, text in track(enumerate(texts), "Rendering Image"): image = Image.new("RGBA", size, theme) text = process_text(text, False) + draw_multiple_line_text( image, text, font, txtclr, padding, wrap=30, transparent=transparent ) image.save(f"assets/temp/{id}/png/img{idx}.png") + diff --git a/utils/settings.py b/utils/settings.py index 2ebaef3..6b8242b 100755 --- a/utils/settings.py +++ b/utils/settings.py @@ -30,7 +30,7 @@ def check(value, checks, name): incorrect = True if not incorrect and "type" in checks: try: - value = eval(checks["type"])(value) + value = eval(checks["type"])(value) # fixme remove eval except: incorrect = True @@ -78,7 +78,7 @@ def check(value, checks, name): + str(name) + "[#F7768E bold]=", extra_info=get_check_value("explanation", ""), - check_type=eval(get_check_value("type", "False")), + check_type=eval(get_check_value("type", "False")), # fixme remove eval default=get_check_value("default", NotImplemented), match=get_check_value("regex", ""), err_message=get_check_value("input_error", "Incorrect input"), diff --git a/video_creation/background.py b/video_creation/background.py index 43be69a..aad552d 100644 --- a/video_creation/background.py +++ b/video_creation/background.py @@ -6,7 +6,7 @@ from random import randrange from typing import Any, Dict, Tuple import yt_dlp -from moviepy.editor import AudioFileClip, VideoFileClip +from moviepy import AudioFileClip, VideoFileClip from moviepy.video.io.ffmpeg_tools import ffmpeg_extract_subclip from utils import settings @@ -14,26 +14,26 @@ from utils.console import print_step, print_substep def load_background_options(): - background_options = {} + _background_options = {} # Load background videos with open("./utils/background_videos.json") as json_file: - background_options["video"] = json.load(json_file) + _background_options["video"] = json.load(json_file) # Load background audios with open("./utils/background_audios.json") as json_file: - background_options["audio"] = json.load(json_file) + _background_options["audio"] = json.load(json_file) # Remove "__comment" from backgrounds - del background_options["video"]["__comment"] - del background_options["audio"]["__comment"] + del _background_options["video"]["__comment"] + del _background_options["audio"]["__comment"] - for name in list(background_options["video"].keys()): - pos = background_options["video"][name][3] + for name in list(_background_options["video"].keys()): + pos = _background_options["video"][name][3] if pos != "center": - background_options["video"][name][3] = lambda t: ("center", pos + t) + _background_options["video"][name][3] = lambda t: ("center", pos + t) - return background_options + return _background_options def get_start_and_end_times(video_length: int, length_of_clip: int) -> Tuple[int, int]: @@ -124,10 +124,11 @@ def chop_background(background_config: Dict[str, Tuple], video_length: int, redd """Generates the background audio and footage to be used in the video and writes it to assets/temp/background.mp3 and assets/temp/background.mp4 Args: + reddit_object (Dict[str,str]) : Reddit object background_config (Dict[str,Tuple]]) : Current background configuration video_length (int): Length of the clip where the background footage is to be taken out of """ - id = re.sub(r"[^\w\s-]", "", reddit_object["thread_id"]) + thread_id = re.sub(r"[^\w\s-]", "", reddit_object["thread_id"]) if settings.config["settings"]["background"][f"background_audio_volume"] == 0: print_step("Volume was set to 0. Skipping background audio creation . . .") @@ -138,8 +139,8 @@ def chop_background(background_config: Dict[str, Tuple], video_length: int, redd start_time_audio, end_time_audio = get_start_and_end_times( video_length, background_audio.duration ) - background_audio = background_audio.subclip(start_time_audio, end_time_audio) - background_audio.write_audiofile(f"assets/temp/{id}/background.mp3") + background_audio = background_audio.subclipped(start_time_audio, end_time_audio) + background_audio.write_audiofile(f"assets/temp/{thread_id}/background.mp3") print_step("Finding a spot in the backgrounds video to chop...✂️") video_choice = f"{background_config['video'][2]}-{background_config['video'][1]}" @@ -149,17 +150,18 @@ def chop_background(background_config: Dict[str, Tuple], video_length: int, redd ) # Extract video subclip try: + with VideoFileClip(f"assets/backgrounds/video/{video_choice}") as video: + new = video.subclipped(start_time_video, end_time_video) + new.write_videofile(f"assets/temp/{thread_id}/background.mp4") + + except (OSError, IOError): # ffmpeg issue see #348 + print_substep("FFMPEG issue. Trying again...") ffmpeg_extract_subclip( f"assets/backgrounds/video/{video_choice}", start_time_video, end_time_video, - targetname=f"assets/temp/{id}/background.mp4", + outputfile=f"assets/temp/{thread_id}/background.mp4", ) - except (OSError, IOError): # ffmpeg issue see #348 - print_substep("FFMPEG issue. Trying again...") - with VideoFileClip(f"assets/backgrounds/video/{video_choice}") as video: - new = video.subclip(start_time_video, end_time_video) - new.write_videofile(f"assets/temp/{id}/background.mp4") print_substep("Background video chopped successfully!", style="bold green") return background_config["video"][2] diff --git a/video_creation/final_video.py b/video_creation/final_video.py index 101d0f7..c8be6f5 100644 --- a/video_creation/final_video.py +++ b/video_creation/final_video.py @@ -19,6 +19,7 @@ from utils import settings from utils.cleanup import cleanup from utils.console import print_step, print_substep from utils.fonts import getheight +from utils.id import extract_id from utils.thumbnail import create_thumbnail from utils.videos import save_data @@ -92,7 +93,7 @@ def prepare_background(reddit_id: str, W: int, H: int) -> str: output_path, an=None, **{ - "c:v": "h264", + "c:v": "h264_nvenc", "b:v": "20M", "b:a": "192k", "threads": multiprocessing.cpu_count(), @@ -108,19 +109,61 @@ def prepare_background(reddit_id: str, W: int, H: int) -> str: return output_path +def get_text_height(draw, text, font, max_width): + lines = textwrap.wrap(text, width=max_width) + total_height = 0 + for line in lines: + _, _, _, height = draw.textbbox((0, 0), line, font=font) + total_height += height + return total_height + + def create_fancy_thumbnail(image, text, text_color, padding, wrap=35): + """ + It will take the 1px from the middle of the template and will be resized (stretched) vertically to accommodate the extra height needed for the title. + """ print_step(f"Creating fancy thumbnail for: {text}") font_title_size = 47 font = ImageFont.truetype(os.path.join("fonts", "Roboto-Bold.ttf"), font_title_size) image_width, image_height = image.size - lines = textwrap.wrap(text, width=wrap) - y = ( - (image_height / 2) - - (((getheight(font, text) + (len(lines) * padding) / len(lines)) * len(lines)) / 2) - + 30 - ) + + # Calculate text height to determine new image height draw = ImageDraw.Draw(image) + text_height = get_text_height(draw, text, font, wrap) + lines = textwrap.wrap(text, width=wrap) + # This are -50 to reduce the empty space at the bottom of the image, + # change it as per your requirement if needed otherwise leave it. + new_image_height = image_height + text_height + padding * (len(lines) - 1) - 50 + + # Separate the image into top, middle (1px), and bottom parts + top_part_height = image_height // 2 + middle_part_height = 1 # 1px height middle section + bottom_part_height = image_height - top_part_height - middle_part_height + + top_part = image.crop((0, 0, image_width, top_part_height)) + middle_part = image.crop((0, top_part_height, image_width, top_part_height + middle_part_height)) + bottom_part = image.crop((0, top_part_height + middle_part_height, image_width, image_height)) + + # Stretch the middle part + new_middle_height = new_image_height - top_part_height - bottom_part_height + middle_part = middle_part.resize((image_width, new_middle_height)) + + # Create new image with the calculated height + new_image = Image.new("RGBA", (image_width, new_image_height)) + + # Paste the top, stretched middle, and bottom parts into the new image + new_image.paste(top_part, (0, 0)) + new_image.paste(middle_part, (0, top_part_height)) + new_image.paste(bottom_part, (0, top_part_height + new_middle_height)) + + # Draw the title text on the new image + draw = ImageDraw.Draw(new_image) + y = top_part_height + padding + for line in lines: + draw.text((120, y), line, font=font, fill=text_color, align="left") + y += get_text_height(draw, line, font, wrap) + padding + # Draw the username "PlotPulse" at the specific position username_font = ImageFont.truetype(os.path.join("fonts", "Roboto-Bold.ttf"), 30) draw.text( (205, 825), @@ -130,39 +173,7 @@ def create_fancy_thumbnail(image, text, text_color, padding, wrap=35): align="left", ) - if len(lines) == 3: - lines = textwrap.wrap(text, width=wrap + 10) - font_title_size = 40 - font = ImageFont.truetype(os.path.join("fonts", "Roboto-Bold.ttf"), font_title_size) - y = ( - (image_height / 2) - - (((getheight(font, text) + (len(lines) * padding) / len(lines)) * len(lines)) / 2) - + 35 - ) - elif len(lines) == 4: - lines = textwrap.wrap(text, width=wrap + 10) - font_title_size = 35 - font = ImageFont.truetype(os.path.join("fonts", "Roboto-Bold.ttf"), font_title_size) - y = ( - (image_height / 2) - - (((getheight(font, text) + (len(lines) * padding) / len(lines)) * len(lines)) / 2) - + 40 - ) - elif len(lines) > 4: - lines = textwrap.wrap(text, width=wrap + 10) - font_title_size = 30 - font = ImageFont.truetype(os.path.join("fonts", "Roboto-Bold.ttf"), font_title_size) - y = ( - (image_height / 2) - - (((getheight(font, text) + (len(lines) * padding) / len(lines)) * len(lines)) / 2) - + 30 - ) - - for line in lines: - draw.text((120, y), line, font=font, fill=text_color, align="left") - y += getheight(font, line) + padding - - return image + return new_image def merge_background_audio(audio: ffmpeg, reddit_id: str): @@ -204,7 +215,7 @@ def make_final_video( opacity = settings.config["settings"]["opacity"] - reddit_id = re.sub(r"[^\w\s-]", "", reddit_obj["thread_id"]) + reddit_id = extract_id(reddit_obj) allowOnlyTTSFolder: bool = ( settings.config["settings"]["background"]["enable_extra_audio"] @@ -343,8 +354,8 @@ def make_final_video( ) current_time += audio_clips_durations[i] - title = re.sub(r"[^\w\s-]", "", reddit_obj["thread_title"]) - idx = re.sub(r"[^\w\s-]", "", reddit_obj["thread_id"]) + title = extract_id(reddit_obj, "thread_title") + idx = extract_id(reddit_obj) title_thumb = reddit_obj["thread_title"] filename = f"{name_normalize(title)[:251]}" @@ -427,7 +438,7 @@ def make_final_video( path, f="mp4", **{ - "c:v": "h264", + "c:v": "h264_nvenc", "b:v": "20M", "b:a": "192k", "threads": multiprocessing.cpu_count(), @@ -457,7 +468,7 @@ def make_final_video( path, f="mp4", **{ - "c:v": "h264", + "c:v": "h264_nvenc", "b:v": "20M", "b:a": "192k", "threads": multiprocessing.cpu_count(), diff --git a/video_creation/screenshot_downloader.py b/video_creation/screenshot_downloader.py index 57675b6..37f5fd5 100644 --- a/video_creation/screenshot_downloader.py +++ b/video_creation/screenshot_downloader.py @@ -34,7 +34,7 @@ def get_screenshots_of_reddit_posts(reddit_object: dict, screenshot_num: int): # ! Make sure the reddit screenshots folder exists Path(f"assets/temp/{reddit_id}/png").mkdir(parents=True, exist_ok=True) - # set the theme and disable non-essential cookies + # set the theme and turn off non-essential cookies if settings.config["settings"]["theme"] == "dark": cookie_file = open("./video_creation/data/cookie-dark-mode.json", encoding="utf-8") bgcolor = (33, 33, 36, 255) @@ -60,7 +60,6 @@ def get_screenshots_of_reddit_posts(reddit_object: dict, screenshot_num: int): transparent = False if storymode and settings.config["settings"]["storymodemethod"] == 1: - # for idx,item in enumerate(reddit_object["thread_post"]): print_substep("Generating images...") return imagemaker( theme=bgcolor, @@ -83,11 +82,15 @@ def get_screenshots_of_reddit_posts(reddit_object: dict, screenshot_num: int): dsf = (W // 600) + 1 context = browser.new_context( - locale=lang or "en-us", + locale=lang or "en-CA,en;q=0.9", color_scheme="dark", viewport=ViewportSize(width=W, height=H), device_scale_factor=dsf, - user_agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36", + user_agent=f"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/{browser.version}.0.0.0 Safari/537.36", + extra_http_headers={ + "Dnt": "1", + "Sec-Ch-Ua": '"Not A(Brand";v="8", "Chromium";v="132", "Google Chrome";v="132"', + }, ) cookies = json.load(cookie_file) cookie_file.close() @@ -108,17 +111,12 @@ def get_screenshots_of_reddit_posts(reddit_object: dict, screenshot_num: int): login_error_div = page.locator(".AnimatedForm__errorMessage").first if login_error_div.is_visible(): - login_error_message = login_error_div.inner_text() - if login_error_message.strip() == "": - # The div element is empty, no error - pass - else: - # The div contains an error message - print_substep( - "Your reddit credentials are incorrect! Please modify them accordingly in the config.toml file.", - style="red", - ) - exit() + + print_substep( + "Your reddit credentials are incorrect! Please modify them accordingly in the config.toml file.", + style="red", + ) + exit() else: pass diff --git a/video_creation/voices.py b/video_creation/voices.py index ad94a14..3d48e9e 100644 --- a/video_creation/voices.py +++ b/video_creation/voices.py @@ -6,6 +6,7 @@ from TTS.aws_polly import AWSPolly from TTS.elevenlabs import elevenlabs from TTS.engine_wrapper import TTSEngine from TTS.GTTS import GTTS +from TTS.openai_tts import OpenAITTS from TTS.pyttsx import pyttsx from TTS.streamlabs_polly import StreamlabsPolly from TTS.TikTok import TikTok @@ -21,6 +22,7 @@ TTSProviders = { "TikTok": TikTok, "pyttsx": pyttsx, "ElevenLabs": elevenlabs, + "OpenAI": OpenAITTS, }