feat: ✨ Changed how the video looks based on duration

- Added video compression for faster upload to YouTube with good quality. - Added Debug mode for reusing parts of the content previously created instead of recreating it. Useful when debugging.
1 year ago · 5755e0792b
parent 4fce079dba
commit 5755e0792b
10 changed files with 132 additions and 58 deletions
--- a/ffmpeg2pass-0.log.mbtree
+++ b/ffmpeg2pass-0.log.mbtree
--- a/main.py
+++ b/main.py
@ -26,6 +26,7 @@ from video_creation.final_video import make_final_video
 from video_creation.screenshot_downloader import get_screenshots_of_reddit_posts
 from video_creation.voices import save_text_to_mp3
 from utils.ffmpeg_install import ffmpeg_install
+from utils.compressor import compress_video

 __VERSION__ = "3.2.1"

@ -54,9 +55,9 @@ def main(POST_ID=None) -> None:

    length, number_of_comments = save_text_to_mp3(reddit_object)
    length = math.ceil(length)
-    # length, number_of_comments = 120, 18
+    reel = length <= 60

-    get_screenshots_of_reddit_posts(reddit_object, number_of_comments)
+    get_screenshots_of_reddit_posts(reddit_object, number_of_comments, reel)
    bg_config = {
        "video": get_background_config("video"),
        "audio": get_background_config("audio"),
@ -64,22 +65,23 @@ def main(POST_ID=None) -> None:
    download_background_video(bg_config["video"])
    download_background_audio(bg_config["audio"])
    chop_background(bg_config, length, reddit_object)
-    video_path = make_final_video(number_of_comments, length, reddit_object, bg_config)
+    video_path = make_final_video(number_of_comments, length, reddit_object, bg_config, reel)
+    video_path = compress_video(video_path)

-    video_data, thumbnail_text = get_video_data(post_text)
-    print("Video title:", video_data['title'])
-    print("Video description:", video_data['description'])
-    print("Video tags:", video_data['tags'])
+    # video_data, thumbnail_text = get_video_data(post_text)
+    # print("Video title:", video_data['title'])
+    # print("Video description:", video_data['description'])
+    # print("Video tags:", video_data['tags'])
    
-    thumbnail = generate_image(thumbnail_text, f"./assets/temp/{reddit_object['thread_id']}/thumbnail_image.png")
+    # thumbnail = generate_image(thumbnail_text, f"./assets/temp/{reddit_object['thread_id']}/thumbnail_image.png")
    # thumbnail = "thumbnail.png"
-    thumbnail = add_text(
-        thumbnail_path=thumbnail,
-        text=video_data["thumbnail_text"],
-        save_path=f"./assets/temp/{reddit_object['thread_id']}/thumbnail.png"
-    )
-    print("Thumbnail generated successfully at:", thumbnail)
-    upload_video_to_youtube(video_path, video_data, thumbnail)
+    # thumbnail = add_text(
+    #     thumbnail_path=thumbnail,
+    #     text=video_data["thumbnail_text"],
+    #     save_path=f"./assets/temp/{reddit_object['thread_id']}/thumbnail.png"
+    # )
+    # print("Thumbnail generated successfully at:", thumbnail)
+    # upload_video_to_youtube(video_path, video_data, thumbnail)


 def run_many(times) -> None:
@ -135,7 +137,7 @@ if __name__ == "__main__":

    from video_data_generation.gemini import get_video_data
    from video_data_generation.image_generation import generate_image, add_text
-    from utils.youtube_uploader import upload_video_to_youtube
+    # from utils.youtube_uploader import upload_video_to_youtube

    if (
        not settings.config["settings"]["tts"]["tiktok_sessionid"]
--- a/utils/.config.template.toml
+++ b/utils/.config.template.toml
@ -28,11 +28,15 @@ opacity = { optional = false, default = 0.9, example = 0.8, explanation = "Sets
 storymode = { optional = true, type = "bool", default = false, example = false, options = [true, false,], explanation = "Only read out title and post content, great for subreddits with stories" }
 storymodemethod= { optional = true, default = 1, example = 1, explanation = "Style that's used for the storymode. Set to 0 for single picture display in whole video, set to 1 for fancy looking video ", type = "int", nmin = 0, oob_error = "It's very hard to run something less than once.", options = [0, 1] }
 storymode_max_length = { optional = true, default = 1000, example = 1000, explanation = "Max length of the storymode video in characters. 200 characters are approximately 50 seconds.", type = "int", nmin = 1, oob_error = "It's very hard to make a video under a second." }
-resolution_w = { optional = false, default = 1080, example = 1440, explantation = "Sets the width in pixels of the final video" }
-resolution_h = { optional = false, default = 1920, example = 2560, explantation = "Sets the height in pixels of the final video" }
 zoom = { optional = true, default = 1, example = 1.1, explanation = "Sets the browser zoom level. Useful if you want the text larger.", type = "float", nmin = 0.1, nmax = 2, oob_error = "The text is really difficult to read at a zoom level higher than 2" }
 run_every = { optional = false, default = 24, example = 5, explanation = "How often should the bot create a video (in hours).", type = "int", nmin = 4, nmax = 48, oob_error = "Please choose a number between 4 and 48." }

+[settings.debug]
+debug = { optional = false, type = "bool", default = false, example = false, options = [true, false, ], explanation = "Debug mode (Whether to delete temp files after creating the video or not)" }
+reuse_mp3 = { optional = false, type = "bool", default = false, example = false, options = [true, false, ], explanation = "Use mp3 files from temp data" }
+reuse_images = { optional = false, type = "bool", default = false, example = false, options = [true, false, ], explanation = "Use images from temp data" }
+reuse_video = { optional = false, type = "bool", default = false, example = false, options = [true, false, ], explanation = "Use already generated video" }
+
 [settings.background]
 background_video = { optional = true, default = "mudrunner", example = "rocket-league", options = ["mudrunner", "granny-remake", ""], explanation = "Sets the background for the video based on game name" }
 background_audio = { optional = true, default = "eerie", example = "chill-summer", options = ["eerie", "mysterious", "hybrid",""], explanation = "Sets the background audio for the video" }
--- a/utils/compressor.py
+++ b/utils/compressor.py
@ -0,0 +1,40 @@
+import os
+import ffmpeg
+
+
+def compress_video(video_full_path):
+    # Reference: https://en.wikipedia.org/wiki/Bit_rate#Encoding_bit_rate
+    min_audio_bitrate = 32000
+    max_audio_bitrate = 256000
+
+    output_file_name = video_full_path[:-4] + '_compressed.mp4'
+
+    probe = ffmpeg.probe(video_full_path)
+    # Video duration, in s.
+    duration = float(probe['format']['duration'])
+    # Video output size
+    target_size = os.path.getsize(video_full_path) / 5000
+    # Audio bitrate, in bps.
+    audio_bitrate = float(next((s for s in probe['streams'] if s['codec_type'] == 'audio'), None)['bit_rate'])
+    # Target total bitrate, in bps.
+    target_total_bitrate = (target_size * 1024 * 8) / (1.073741824 * duration)
+
+    # Target audio bitrate, in bps
+    if 10 * audio_bitrate > target_total_bitrate:
+        audio_bitrate = target_total_bitrate / 10
+        if audio_bitrate < min_audio_bitrate < target_total_bitrate:
+            audio_bitrate = min_audio_bitrate
+        elif audio_bitrate > max_audio_bitrate:
+            audio_bitrate = max_audio_bitrate
+    # Target video bitrate, in bps.
+    video_bitrate = target_total_bitrate - audio_bitrate
+
+    i = ffmpeg.input(video_full_path)
+    ffmpeg.output(i, os.devnull,
+                  **{'c:v': 'libx264', 'b:v': video_bitrate, 'pass': 1, 'f': 'mp4'}
+                  ).overwrite_output().run()
+    ffmpeg.output(i, output_file_name,
+                  **{'c:v': 'libx264', 'b:v': video_bitrate, 'pass': 2, 'c:a': 'aac', 'b:a': audio_bitrate}
+                  ).overwrite_output().run()
+    
+    return output_file_name
--- a/utils/imagenarator.py
+++ b/utils/imagenarator.py
@ -7,6 +7,7 @@ from PIL import Image, ImageDraw, ImageFont
 from rich.progress import track
 from TTS.engine_wrapper import process_text
 from utils.process_post import process_post
+from utils import settings


 def draw_multiple_line_text(
@ -59,13 +60,14 @@ def draw_multiple_line_text(
        y += line_height + padding


-def imagemaker(theme, reddit_obj: dict, txtclr, padding=5, transparent=False) -> None:
+def imagemaker(theme, reddit_obj: dict, txtclr, padding=5, transparent=False, reel=False) -> None:
    """
    Render Images for video
    """
-    # return
+    if settings.config["settings"]["debug"]["reuse_images"]: return
+
    title = process_text(reddit_obj["thread_title"], False)
-    texts = process_post(reddit_obj["thread_post"])
+    texts = process_post(reddit_obj["thread_post"], reel)
    id = re.sub(r"[^\w\s-]", "", reddit_obj["thread_id"])

    if transparent:
@ -91,13 +93,13 @@ def imagemaker(theme, reddit_obj: dict, txtclr, padding=5, transparent=False) ->
                sub_text = text[i]
                image = Image.new("RGBA", size, theme)
                sub_text = process_text(sub_text, False)
-                draw_multiple_line_text(image, sub_text, font, txtclr, padding, wrap=30, transparent=transparent)
+                draw_multiple_line_text(image, sub_text, font, txtclr, padding, wrap=25, transparent=transparent)
                image.save(f"assets/temp/{id}/png/img{idx}-{i+1}.png")
                weights[f"{idx}-{i+1}"] = round(len(sub_text) / total_text_length, 3)
        else:
            image = Image.new("RGBA", size, theme)
            text = process_text(text, False)
-            draw_multiple_line_text(image, text, font, txtclr, padding, wrap=30, transparent=transparent)
+            draw_multiple_line_text(image, text, font, txtclr, padding, wrap=25, transparent=transparent)
            image.save(f"assets/temp/{id}/png/img{idx}.png")
    
    with open(f"assets/temp/{id}/weights.json", 'w') as file:
--- a/utils/process_post.py
+++ b/utils/process_post.py
@ -1,6 +1,7 @@
-def process_post(reddit_thread_post):
+def process_post(reddit_thread_post, reel):
    texts = reddit_thread_post
-    threshold = 80
+    if reel: threshold = 80
+    else: threshold = 60
    for i in range(len(texts)):
        if len(texts[i]) > threshold:
            texts[i] = split_text(texts[i], threshold)
--- a/video_creation/final_video.py
+++ b/video_creation/final_video.py
@ -136,6 +136,7 @@ def make_final_video(
    length: int,
    reddit_obj: dict,
    background_config: Dict[str, Tuple],
+    reel = False
 ):
    """Gathers audio clips, gathers all screenshots, stitches them together and saves the final video to assets/temp
    Args:
@ -144,19 +145,23 @@ def make_final_video(
        reddit_obj (dict): The reddit object that contains the posts to read.
        background_config (Tuple[str, str, str, Any]): The background config to use.
    """
-    # title = re.sub(r"[^\w\s-]", "", reddit_obj["thread_title"])
-    # filename = f"{name_normalize(title)[:251]}"
-    # p = f'results/{settings.config["reddit"]["thread"]["subreddit"]}' + f"/{filename}"
-    # print((
-    #         p[:251] + ".mp4"
-    #     ))
-    # return (
-    #         p[:251] + ".mp4"
-    #     )
-
-    # settings values
-    W: Final[int] = int(settings.config["settings"]["resolution_w"])
-    H: Final[int] = int(settings.config["settings"]["resolution_h"])
+    if settings.config["settings"]["debug"]["reuse_video"]:
+        title = re.sub(r"[^\w\s-]", "", reddit_obj["thread_title"])
+        filename = f"{name_normalize(title)[:251]}"
+        p = f'results/{settings.config["reddit"]["thread"]["subreddit"]}' + f"/{filename}"
+        print((
+                p[:251] + ".mp4"
+            ))
+        return (
+                p[:251] + ".mp4"
+            )
+
+    if reel:
+        W: Final[int] = 1080
+        H: Final[int] = 1920
+    else:
+        W: Final[int] = 1920
+        H: Final[int] = 1080

    opacity = settings.config["settings"]["opacity"]

@ -184,12 +189,12 @@ def make_final_video(
            audio_clips = [ffmpeg.input(f"assets/temp/{reddit_id}/mp3/title.mp3")]
            audio_clips.insert(1, ffmpeg.input(f"assets/temp/{reddit_id}/mp3/postaudio.mp3"))
        elif settings.config["settings"]["storymodemethod"] == 1:
-            audio_clips = [
-                ffmpeg.input(f"assets/temp/{reddit_id}/mp3/postaudio-{i}.mp3")
-                for i in track(range(number_of_clips + 1), "Collecting the audio files...")
-            ]
-            audio_clips.insert(0, ffmpeg.input(f"assets/temp/{reddit_id}/mp3/title.mp3"))
-            # pass
+            if not settings.config["settings"]["debug"]["reuse_mp3"]:
+                audio_clips = [
+                    ffmpeg.input(f"assets/temp/{reddit_id}/mp3/postaudio-{i}.mp3")
+                    for i in track(range(number_of_clips + 1), "Collecting the audio files...")
+                ]
+                audio_clips.insert(0, ffmpeg.input(f"assets/temp/{reddit_id}/mp3/title.mp3"))

    else:
        audio_clips = [
@ -205,15 +210,17 @@ def make_final_video(
            0,
            float(ffmpeg.probe(f"assets/temp/{reddit_id}/mp3/title.mp3")["format"]["duration"]),
        )
-    # Comment those as well when testing
-    audio_concat = ffmpeg.concat(*audio_clips, a=1, v=0)
-    ffmpeg.output(
-        audio_concat, f"assets/temp/{reddit_id}/audio.mp3", **{"b:a": "192k"}
-    ).overwrite_output().run(quiet=True)
+    
+    if not settings.config["settings"]["debug"]["reuse_mp3"]:
+        audio_concat = ffmpeg.concat(*audio_clips, a=1, v=0)
+        ffmpeg.output(
+            audio_concat, f"assets/temp/{reddit_id}/audio.mp3", **{"b:a": "192k"}
+        ).overwrite_output().run(quiet=True)

    console.log(f"[bold green] Video Will Be: {length} Seconds Long")

-    screenshot_width = int((W * 45) // 100)
+    if reel: screenshot_width = int((W * 45) // 100)
+    else: screenshot_width = W

    # audio = AudioSegment.from_mp3(f"assets/temp/{reddit_id}/audio.mp3")
    # louder_audio = audio + 10
--- a/video_creation/screenshot_downloader.py
+++ b/video_creation/screenshot_downloader.py
@ -18,7 +18,7 @@ from utils.videos import save_data
 __all__ = ["download_screenshots_of_reddit_posts"]


-def get_screenshots_of_reddit_posts(reddit_object: dict, screenshot_num: int):
+def get_screenshots_of_reddit_posts(reddit_object: dict, screenshot_num: int, reel=False):
    """Downloads screenshots of reddit posts as seen on the web. Downloads to assets/temp/png

    Args:
@ -26,11 +26,16 @@ def get_screenshots_of_reddit_posts(reddit_object: dict, screenshot_num: int):
        screenshot_num (int): Number of screenshots to download
    """
    # settings values
-    W: Final[int] = int(settings.config["settings"]["resolution_w"])
-    H: Final[int] = int(settings.config["settings"]["resolution_h"])
    lang: Final[str] = settings.config["reddit"]["thread"]["post_lang"]
    storymode: Final[bool] = settings.config["settings"]["storymode"]

+    if reel:
+        W: Final[int] = 1080
+        H: Final[int] = 1920
+    else:
+        W: Final[int] = 1920
+        H: Final[int] = 1080
+
    print_step("Downloading screenshots of reddit posts...")
    reddit_id = re.sub(r"[^\w\s-]", "", reddit_object["thread_id"])
    # ! Make sure the reddit screenshots folder exists
@ -68,6 +73,7 @@ def get_screenshots_of_reddit_posts(reddit_object: dict, screenshot_num: int):
            reddit_obj=reddit_object,
            txtclr=txtcolor,
            transparent=transparent,
+            reel=reel
        )

    screenshot_num: int
--- a/video_creation/voices.py
+++ b/video_creation/voices.py
@ -1,5 +1,7 @@
+import glob
 from typing import Tuple

+from pydub import AudioSegment
 from rich.console import Console

 from TTS.GTTS import GTTS
@ -36,6 +38,11 @@ def save_text_to_mp3(reddit_obj) -> Tuple[int, int]:
        tuple[int,int]: (total length of the audio, the number of comments audio was generated for)
    """

+    if settings.config["settings"]["debug"]["reuse_mp3"]:
+        comments = len(glob.glob(f"./assets/temp/{reddit_obj['thread_id']}/mp3/*")) - 2
+        audio = AudioSegment.from_mp3(f"./assets/temp/{reddit_obj['thread_id']}/audio.mp3")
+        return audio.duration_seconds, comments
+
    voice = settings.config["settings"]["tts"]["voice_choice"]
    if str(voice).casefold() in map(lambda _: _.casefold(), TTSProviders):
        text_to_mp3 = TTSEngine(get_case_insensitive_key_value(TTSProviders, voice), reddit_obj)
--- a/video_data_generation/image_generation.py
+++ b/video_data_generation/image_generation.py
@ -1,7 +1,7 @@
 import os
 import requests
 from bs4 import BeautifulSoup
-from PIL import Image, ImageDraw, ImageFont
+from PIL import Image, ImageFont

 from utils.imagenarator import draw_multiple_line_text

@ -60,10 +60,15 @@ def generate_image(prompt, save_path):
    }

    response = s.request("POST", url, headers=headers, data=payload)
-    image_url = response.json()['images'][0]['src']
-    image = s.get(image_url).content
-    with open(save_path, 'wb') as file:
-        file.write(image)
+    try:
+        image_url = response.json()['images'][0]['src']
+        image = s.get(image_url).content
+        with open(save_path, 'wb') as file:
+            file.write(image)
+    except Exception as e:
+        if response.json().get('error') is not None:
+            return "./assets/thumbnail_bg.png"
+        raise e
    return save_path

 def add_text(thumbnail_path, text, save_path):