Merge branch 'develop' into fix_tts_random_voice

8 months ago · 005cf37ad0
parent 20640fd562 6c55a73e71
commit 005cf37ad0
15 changed files with 173 additions and 114 deletions
--- a/.dockerignore
+++ b/.dockerignore
@ -1 +1,2 @@
-Dockerfile
+Dockerfile
+results
--- a/README.md
+++ b/README.md
@ -37,27 +37,61 @@ The only original thing being done is the editing and gathering of all materials

 ## Installation 👩‍💻

-1. Clone this repository
-2. Run `pip install -r requirements.txt`
-3. Run `python -m playwright install` and `python -m playwright install-deps`
+1. Clone this repository:
+    ```sh
+    git clone https://github.com/elebumm/RedditVideoMakerBot.git
+    cd RedditVideoMakerBot
+    ```
+
+2. Create and activate a virtual environment:
+    - On **Windows**:
+        ```sh
+        python -m venv ./venv
+        .\venv\Scripts\activate
+        ```
+    - On **macOS and Linux**:
+        ```sh
+        python3 -m venv ./venv
+        source ./venv/bin/activate
+        ```
+
+3. Install the required dependencies:
+    ```sh
+    pip install -r requirements.txt
+    ```
+
+4. Install Playwright and its dependencies:
+    ```sh
+    python -m playwright install
+    python -m playwright install-deps
+    ```
+
+---

 **EXPERIMENTAL!!!!**

-On macOS and Linux (debian, arch, fedora and centos, and based on those), you can run an install script that will automatically install steps 1 to 3. (requires bash)
+   - On macOS and Linux (Debian, Arch, Fedora, CentOS, and based on those), you can run an installation script that will automatically install steps 1 to 3. (requires bash)
+   - `bash <(curl -sL https://raw.githubusercontent.com/elebumm/RedditVideoMakerBot/master/install.sh)`
+   - This can also be used to update the installation

-`bash <(curl -sL https://raw.githubusercontent.com/elebumm/RedditVideoMakerBot/master/install.sh)`
+---

-This can also be used to update the installation
+5. Run the bot:
+    ```sh
+    python main.py
+    ```

-4. Run `python main.py`
-5. Visit [the Reddit Apps page.](https://www.reddit.com/prefs/apps), and set up an app that is a "script". Paste any URL in redirect URL. Ex:`https://jasoncameron.dev`
-6. The bot will ask you to fill in your details to connect to the Reddit API, and configure the bot to your liking
-7. Enjoy 😎
-8. If you need to reconfigure the bot, simply open the `config.toml` file and delete the lines that need to be changed. On the next run of the bot, it will help you reconfigure those options.
+6. Visit [the Reddit Apps page](https://www.reddit.com/prefs/apps), and set up an app that is a "script". Paste any URL in the redirect URL field, for example: `https://jasoncameron.dev`.

-(Note if you got an error installing or running the bot try first rerunning the command with a three after the name e.g. python3 or pip3)
+7. The bot will prompt you to fill in your details to connect to the Reddit API and configure the bot to your liking.

-If you want to read more detailed guide about the bot, please refer to the [documentation](https://reddit-video-maker-bot.netlify.app/)
+8. Enjoy 😎
+
+9. If you need to reconfigure the bot, simply open the `config.toml` file and delete the lines that need to be changed. On the next run of the bot, it will help you reconfigure those options.
+
+(Note: If you encounter any errors installing or running the bot, try using `python3` or `pip3` instead of `python` or `pip`.)
+
+For a more detailed guide about the bot, please refer to the [documentation](https://reddit-video-maker-bot.netlify.app/).

 ## Video

--- a/TTS/elevenlabs.py
+++ b/TTS/elevenlabs.py
@ -35,4 +35,4 @@ class elevenlabs:
    def randomvoice(self):
        if self.client is None:
            self.initialize()
-        return random.choice(self.client.voices.get_all().voices).voice_name
+        return random.choice(self.client.voices.get_all().voices).name
--- a/TTS/engine_wrapper.py
+++ b/TTS/engine_wrapper.py
@ -144,11 +144,18 @@ class TTSEngine:
            print("OSError")

    def call_tts(self, filename: str, text: str):
-        self.tts_module.run(
-            text,
-            filepath=f"{self.path}/{filename}.mp3",
-            random_voice=settings.config["settings"]["tts"]["random_voice"],
-        )
+        if settings.config["settings"]["tts"]["voice_choice"] == "googletranslate":
+            # GTTS does not have the argument 'random_voice'
+            self.tts_module.run(
+                text,
+                filepath=f"{self.path}/{filename}.mp3",
+            )
+        else:
+            self.tts_module.run(
+                text,
+                filepath=f"{self.path}/{filename}.mp3",
+                random_voice=settings.config["settings"]["tts"]["random_voice"],
+            )
        # try:
        #     self.length += MP3(f"{self.path}/{filename}.mp3").info.length
        # except (MutagenError, HeaderNotFoundError):
--- a/main.py
+++ b/main.py
@ -4,7 +4,7 @@ import sys
 from os import name
 from pathlib import Path
 from subprocess import Popen
-from typing import NoReturn
+from typing import Dict, NoReturn

 from prawcore import ResponseException

@ -13,7 +13,7 @@ from utils import settings
 from utils.cleanup import cleanup
 from utils.console import print_markdown, print_step, print_substep
 from utils.ffmpeg_install import ffmpeg_install
-from utils.id import id
+from utils.id import extract_id
 from utils.version import checkversion
 from video_creation.background import (
    chop_background,
@ -42,11 +42,15 @@ print_markdown(
 )
 checkversion(__VERSION__)

+reddit_id: str
+reddit_object: Dict[str, str | list]
+

 def main(POST_ID=None) -> None:
-    global redditid, reddit_object
+    global reddit_id, reddit_object
    reddit_object = get_subreddit_threads(POST_ID)
-    redditid = id(reddit_object)
+    reddit_id = extract_id(reddit_object)
+    print_substep(f"Thread ID is {reddit_id}", style="bold blue")
    length, number_of_comments = save_text_to_mp3(reddit_object)
    length = math.ceil(length)
    get_screenshots_of_reddit_posts(reddit_object, number_of_comments)
@ -64,22 +68,22 @@ def run_many(times) -> None:
    for x in range(1, times + 1):
        print_step(
            f'on the {x}{("th", "st", "nd", "rd", "th", "th", "th", "th", "th", "th")[x % 10]} iteration of {times}'
-        )  # correct 1st 2nd 3rd 4th 5th....
+        )
        main()
        Popen("cls" if name == "nt" else "clear", shell=True).wait()


 def shutdown() -> NoReturn:
-    if "redditid" in globals():
+    if "reddit_id" in globals():
        print_markdown("## Clearing temp files")
-        cleanup(redditid)
+        cleanup(reddit_id)

    print("Exiting...")
    sys.exit()


 if __name__ == "__main__":
-    if sys.version_info.major != 3 or sys.version_info.minor not in [10, 11]:
+    if sys.version_info.major != 3 or sys.version_info.minor not in [10, 11, 12]:
        print(
            "Hey! Congratulations, you've made it so far (which is pretty rare with no Python 3.10). Unfortunately, this program only works on Python 3.10. Please install Python 3.10 and try again."
        )
--- a/requirements.txt
+++ b/requirements.txt
@ -1,24 +1,24 @@
 boto3==1.34.127
 botocore==1.34.127
-gTTS==2.5.1
+gTTS==2.5.4
 moviepy==1.0.3
-playwright==1.44.0
-praw==7.7.1
+playwright==1.48.0
+praw==7.8.1
 prawcore~=2.3.0
 requests==2.32.3
-rich==13.7.1
+rich==13.9.4
 toml==0.10.2
-translators==5.9.2
-pyttsx3==2.90
-Pillow==10.3.0
-tomlkit==0.12.5
-Flask==3.0.3
+translators==5.9.3
+pyttsx3==2.98
+Pillow==10.4.0
+tomlkit==0.13.2
+Flask==3.1.0
 clean-text==0.6.0
 unidecode==1.3.8
-spacy==3.7.5
-torch==2.3.1
-transformers==4.41.2
+spacy==3.8.3
+torch==2.4.1
+transformers==4.48.1
 ffmpeg-python==0.2.0
-elevenlabs==1.3.0
-yt-dlp==2024.5.27
+elevenlabs==1.8.1
+yt-dlp==2025.1.15
 numpy==1.26.4
--- a/utils/ai_methods.py
+++ b/utils/ai_methods.py
@ -12,7 +12,7 @@ def mean_pooling(model_output, attention_mask):
    )


-# This function sort the given threads based on their total similarity with the given keywords
+# This function sorts the given threads based on their total similarity with the given keywords
 def sort_by_similarity(thread_objects, keywords):
    # Initialize tokenizer + model.
    tokenizer = AutoTokenizer.from_pretrained("sentence-transformers/all-MiniLM-L6-v2")
@ -34,7 +34,7 @@ def sort_by_similarity(thread_objects, keywords):
        threads_embeddings = model(**encoded_threads)
    threads_embeddings = mean_pooling(threads_embeddings, encoded_threads["attention_mask"])

-    # Keywords inference
+    # Keyword inference
    encoded_keywords = tokenizer(keywords, padding=True, truncation=True, return_tensors="pt")
    with torch.no_grad():
        keywords_embeddings = model(**encoded_keywords)
@ -53,7 +53,7 @@ def sort_by_similarity(thread_objects, keywords):

    similarity_scores, indices = torch.sort(total_scores, descending=True)

-    threads_sentences = np.array(threads_sentences)[indices.numpy()]
+    # threads_sentences = np.array(threads_sentences)[indices.numpy()]

    thread_objects = np.array(thread_objects)[indices.numpy()].tolist()

--- a/utils/console.py
+++ b/utils/console.py
@ -102,7 +102,7 @@ def handle_input(
        user_input = input("").strip()
        if check_type is not False:
            try:
-                isinstance(eval(user_input), check_type)
+                isinstance(eval(user_input), check_type)  # fixme: remove eval
                return check_type(user_input)
            except:
                console.print(
--- a/utils/ffmpeg_install.py
+++ b/utils/ffmpeg_install.py
@ -28,8 +28,8 @@ def ffmpeg_install_windows():
            for root, dirs, files in os.walk(ffmpeg_extracted_folder, topdown=False):
                for file in files:
                    os.remove(os.path.join(root, file))
-                for dir in dirs:
-                    os.rmdir(os.path.join(root, dir))
+                for directory in dirs:
+                    os.rmdir(os.path.join(root, directory))
            os.rmdir(ffmpeg_extracted_folder)

        # Extract FFmpeg
@ -110,7 +110,7 @@ def ffmpeg_install():
            stdout=subprocess.PIPE,
            stderr=subprocess.PIPE,
        )
-    except FileNotFoundError as e:
+    except FileNotFoundError:
        # Check if there's ffmpeg.exe in the current directory
        if os.path.exists("./ffmpeg.exe"):
            print(
--- a/utils/gui_utils.py
+++ b/utils/gui_utils.py
@ -25,7 +25,9 @@ def get_checks():


 # Get current config (from config.toml) as dict
-def get_config(obj: dict, done={}):
+def get_config(obj: dict, done=None):
+    if done is None:
+        done = {}
    for key in obj.keys():
        if not isinstance(obj[key], dict):
            done[key] = obj[key]
@ -44,13 +46,13 @@ def check(value, checks):

    if not incorrect and "type" in checks:
        try:
-            value = eval(checks["type"])(value)
+            value = eval(checks["type"])(value)  # fixme remove eval
        except Exception:
            incorrect = True

    if (
        not incorrect and "options" in checks and value not in checks["options"]
-    ):  # FAILSTATE Value is not one of the options
+    ):  # FAILSTATE Value isn't one of the options
        incorrect = True
    if (
        not incorrect
@ -59,7 +61,7 @@ def check(value, checks):
            (isinstance(value, str) and re.match(checks["regex"], value) is None)
            or not isinstance(value, str)
        )
-    ):  # FAILSTATE Value doesn't match regex, or has regex but is not a string.
+    ):  # FAILSTATE Value doesn't match regular expression, or has regular expression but isn't a string.
        incorrect = True

    if (
@ -88,17 +90,17 @@ def check(value, checks):
    return value


-# Modify settings (after form is submitted)
+# Modify settings (after the form is submitted)
 def modify_settings(data: dict, config_load, checks: dict):
    # Modify config settings
-    def modify_config(obj: dict, name: str, value: any):
+    def modify_config(obj: dict, config_name: str, value: any):
        for key in obj.keys():
-            if name == key:
+            if config_name == key:
                obj[key] = value
            elif not isinstance(obj[key], dict):
                continue
            else:
-                modify_config(obj[key], name, value)
+                modify_config(obj[key], config_name, value)

    # Remove empty/incorrect key-value pairs
    data = {key: value for key, value in data.items() if value and key in checks.keys()}
@ -158,7 +160,7 @@ def add_background(youtube_uri, filename, citation, position):

    youtube_uri = f"https://www.youtube.com/watch?v={regex.group(1)}"

-    # Check if position is valid
+    # Check if the position is valid
    if position == "" or position == "center":
        position = "center"

@ -178,7 +180,7 @@ def add_background(youtube_uri, filename, citation, position):

    filename = filename.replace(" ", "_")

-    # Check if background doesn't already exist
+    # Check if the background doesn't already exist
    with open("utils/backgrounds.json", "r", encoding="utf-8") as backgrounds:
        data = json.load(backgrounds)

--- a/utils/id.py
+++ b/utils/id.py
@ -1,12 +1,14 @@
 import re
+from typing import Optional

 from utils.console import print_substep


-def id(reddit_obj: dict):
+def extract_id(reddit_obj: dict, field: Optional[str] = "thread_id"):
    """
    This function takes a reddit object and returns the post id
    """
-    id = re.sub(r"[^\w\s-]", "", reddit_obj["thread_id"])
-    print_substep(f"Thread ID is {id}", style="bold blue")
-    return id
+    if field not in reddit_obj.keys():
+        raise ValueError(f"Field '{field}' not found in reddit object")
+    reddit_id = re.sub(r"[^\w\s-]", "", reddit_obj[field])
+    return reddit_id
--- a/utils/imagenarator.py
+++ b/utils/imagenarator.py
@ -7,6 +7,7 @@ from rich.progress import track

 from TTS.engine_wrapper import process_text
 from utils.fonts import getheight, getsize
+from utils.id import extract_id


 def draw_multiple_line_text(
@ -58,18 +59,16 @@ def imagemaker(theme, reddit_obj: dict, txtclr, padding=5, transparent=False) ->
    Render Images for video
    """
    texts = reddit_obj["thread_post"]
-    id = re.sub(r"[^\w\s-]", "", reddit_obj["thread_id"])
-
+    reddit_id = extract_id(reddit_obj)
    if transparent:
        font = ImageFont.truetype(os.path.join("fonts", "Roboto-Bold.ttf"), 100)
    else:
        font = ImageFont.truetype(os.path.join("fonts", "Roboto-Regular.ttf"), 100)
-    size = (1920, 1080)

-    image = Image.new("RGBA", size, theme)
+    size = (1920, 1080)

    for idx, text in track(enumerate(texts), "Rendering Image"):
        image = Image.new("RGBA", size, theme)
        text = process_text(text, False)
        draw_multiple_line_text(image, text, font, txtclr, padding, wrap=30, transparent=transparent)
-        image.save(f"assets/temp/{id}/png/img{idx}.png")
+        image.save(f"assets/temp/{reddit_id}/png/img{idx}.png")
--- a/utils/settings.py
+++ b/utils/settings.py
@ -30,7 +30,7 @@ def check(value, checks, name):
        incorrect = True
    if not incorrect and "type" in checks:
        try:
-            value = eval(checks["type"])(value)
+            value = eval(checks["type"])(value)  # fixme remove eval
        except:
            incorrect = True

@ -78,7 +78,7 @@ def check(value, checks, name):
            + str(name)
            + "[#F7768E bold]=",
            extra_info=get_check_value("explanation", ""),
-            check_type=eval(get_check_value("type", "False")),
+            check_type=eval(get_check_value("type", "False")),  # fixme remove eval
            default=get_check_value("default", NotImplemented),
            match=get_check_value("regex", ""),
            err_message=get_check_value("input_error", "Incorrect input"),
--- a/video_creation/final_video.py
+++ b/video_creation/final_video.py
@ -19,6 +19,7 @@ from utils import settings
 from utils.cleanup import cleanup
 from utils.console import print_step, print_substep
 from utils.fonts import getheight
+from utils.id import extract_id
 from utils.thumbnail import create_thumbnail
 from utils.videos import save_data

@ -108,19 +109,61 @@ def prepare_background(reddit_id: str, W: int, H: int) -> str:
    return output_path


+def get_text_height(draw, text, font, max_width):
+    lines = textwrap.wrap(text, width=max_width)
+    total_height = 0
+    for line in lines:
+        _, _, _, height = draw.textbbox((0, 0), line, font=font)
+        total_height += height
+    return total_height
+
+
 def create_fancy_thumbnail(image, text, text_color, padding, wrap=35):
+    """
+    It will take the 1px from the middle of the template and will be resized (stretched) vertically to accommodate the extra height needed for the title.
+    """
    print_step(f"Creating fancy thumbnail for: {text}")
    font_title_size = 47
    font = ImageFont.truetype(os.path.join("fonts", "Roboto-Bold.ttf"), font_title_size)
    image_width, image_height = image.size
-    lines = textwrap.wrap(text, width=wrap)
-    y = (
-        (image_height / 2)
-        - (((getheight(font, text) + (len(lines) * padding) / len(lines)) * len(lines)) / 2)
-        + 30
-    )
+
+    # Calculate text height to determine new image height
    draw = ImageDraw.Draw(image)
+    text_height = get_text_height(draw, text, font, wrap)
+    lines = textwrap.wrap(text, width=wrap)
+    # This are -50 to reduce the empty space at the bottom of the image,
+    # change it as per your requirement if needed otherwise leave it.
+    new_image_height = image_height + text_height + padding * (len(lines) - 1) - 50
+
+    # Separate the image into top, middle (1px), and bottom parts
+    top_part_height = image_height // 2
+    middle_part_height = 1  # 1px height middle section
+    bottom_part_height = image_height - top_part_height - middle_part_height
+
+    top_part = image.crop((0, 0, image_width, top_part_height))
+    middle_part = image.crop((0, top_part_height, image_width, top_part_height + middle_part_height))
+    bottom_part = image.crop((0, top_part_height + middle_part_height, image_width, image_height))
+
+    # Stretch the middle part
+    new_middle_height = new_image_height - top_part_height - bottom_part_height
+    middle_part = middle_part.resize((image_width, new_middle_height))
+
+    # Create new image with the calculated height
+    new_image = Image.new("RGBA", (image_width, new_image_height))
+
+    # Paste the top, stretched middle, and bottom parts into the new image
+    new_image.paste(top_part, (0, 0))
+    new_image.paste(middle_part, (0, top_part_height))
+    new_image.paste(bottom_part, (0, top_part_height + new_middle_height))
+
+    # Draw the title text on the new image
+    draw = ImageDraw.Draw(new_image)
+    y = top_part_height + padding
+    for line in lines:
+        draw.text((120, y), line, font=font, fill=text_color, align="left")
+        y += get_text_height(draw, line, font, wrap) + padding

+    # Draw the username "PlotPulse" at the specific position
    username_font = ImageFont.truetype(os.path.join("fonts", "Roboto-Bold.ttf"), 30)
    draw.text(
        (205, 825),
@ -130,39 +173,7 @@ def create_fancy_thumbnail(image, text, text_color, padding, wrap=35):
        align="left",
    )

-    if len(lines) == 3:
-        lines = textwrap.wrap(text, width=wrap + 10)
-        font_title_size = 40
-        font = ImageFont.truetype(os.path.join("fonts", "Roboto-Bold.ttf"), font_title_size)
-        y = (
-            (image_height / 2)
-            - (((getheight(font, text) + (len(lines) * padding) / len(lines)) * len(lines)) / 2)
-            + 35
-        )
-    elif len(lines) == 4:
-        lines = textwrap.wrap(text, width=wrap + 10)
-        font_title_size = 35
-        font = ImageFont.truetype(os.path.join("fonts", "Roboto-Bold.ttf"), font_title_size)
-        y = (
-            (image_height / 2)
-            - (((getheight(font, text) + (len(lines) * padding) / len(lines)) * len(lines)) / 2)
-            + 40
-        )
-    elif len(lines) > 4:
-        lines = textwrap.wrap(text, width=wrap + 10)
-        font_title_size = 30
-        font = ImageFont.truetype(os.path.join("fonts", "Roboto-Bold.ttf"), font_title_size)
-        y = (
-            (image_height / 2)
-            - (((getheight(font, text) + (len(lines) * padding) / len(lines)) * len(lines)) / 2)
-            + 30
-        )
-
-    for line in lines:
-        draw.text((120, y), line, font=font, fill=text_color, align="left")
-        y += getheight(font, line) + padding
-
-    return image
+    return new_image


 def merge_background_audio(audio: ffmpeg, reddit_id: str):
@ -204,7 +215,7 @@ def make_final_video(

    opacity = settings.config["settings"]["opacity"]

-    reddit_id = re.sub(r"[^\w\s-]", "", reddit_obj["thread_id"])
+    reddit_id = extract_id(reddit_obj)

    allowOnlyTTSFolder: bool = (
        settings.config["settings"]["background"]["enable_extra_audio"]
@ -343,8 +354,8 @@ def make_final_video(
            )
            current_time += audio_clips_durations[i]

-    title = re.sub(r"[^\w\s-]", "", reddit_obj["thread_title"])
-    idx = re.sub(r"[^\w\s-]", "", reddit_obj["thread_id"])
+    title = extract_id(reddit_obj, "thread_title")
+    idx = extract_id(reddit_obj)
    title_thumb = reddit_obj["thread_title"]

    filename = f"{name_normalize(title)[:251]}"
--- a/video_creation/screenshot_downloader.py
+++ b/video_creation/screenshot_downloader.py
@ -34,7 +34,7 @@ def get_screenshots_of_reddit_posts(reddit_object: dict, screenshot_num: int):
    # ! Make sure the reddit screenshots folder exists
    Path(f"assets/temp/{reddit_id}/png").mkdir(parents=True, exist_ok=True)

-    # set the theme and disable non-essential cookies
+    # set the theme and turn off non-essential cookies
    if settings.config["settings"]["theme"] == "dark":
        cookie_file = open("./video_creation/data/cookie-dark-mode.json", encoding="utf-8")
        bgcolor = (33, 33, 36, 255)
@ -60,7 +60,6 @@ def get_screenshots_of_reddit_posts(reddit_object: dict, screenshot_num: int):
        transparent = False

    if storymode and settings.config["settings"]["storymodemethod"] == 1:
-        # for idx,item in enumerate(reddit_object["thread_post"]):
        print_substep("Generating images...")
        return imagemaker(
            theme=bgcolor,