feat: ✨ Changed how text appears on the screen in story mode

2 years ago · 6f17fbbe69
parent d2394f9c7b
commit 6f17fbbe69
5 changed files with 127 additions and 35 deletions
--- a/TTS/unreal_speech.py
+++ b/TTS/unreal_speech.py
@ -33,8 +33,8 @@ class UnrealSpeech:
            'Text': text, # Up to 1000 characters
            'VoiceId': voice, # Dan, Will, Scarlett, Liv, Amy
            'Bitrate': '192k', # 320k, 256k, 192k, ...
-            'Speed': '-0.15', # -1.0 to 1.0
+            'Speed': settings.config["settings"]["tts"]["unreal_speech_voice_speed"],
-            'Pitch': '1.2', # -0.5 to 1.5
+            'Pitch': settings.config["settings"]["tts"]["unreal_speech_voice_pitch"],
            'Codec': 'libmp3lame', # libmp3lame or pcm_mulaw
        }
        headers = {'Authorization' : f'Bearer {api_key}'}
--- a/utils/.config.template.toml
+++ b/utils/.config.template.toml
@ -49,6 +49,8 @@ elevenlabs_voice_name = { optional = false, default = "Bella", example = "Bella"
 elevenlabs_api_key = { optional = true, example = "21f13f91f54d741e2ae27d2ab1b99d59", explanation = "Elevenlabs API key" }
 unreal_speech_api_key = { optional = true, example = "21f13f91f54d741e2ae27d2ab1b99d59", explanation = "Unreal Speech API key" }
 unreal_speech_voice_name = { optional = false, default = "Liv", example = "Liv", explanation = "The voice used for Unreal Speech", options = ["Scarlett", "Amy", "Liv", "Dan", "Will", ] }
 unreal_speech_voice_pitch = { optional = false, default = "1", example = "1.2", explanation = "The pitch of the voice used for Unreal Speech (0.5 to 1.5)", type = "float" }
 unreal_speech_voice_speed = { optional = false, default = "0", example = "-0.15", explanation = "The speed of the voice used for Unreal Speech (-1.0 to 1.0)", type = "float" }
 aws_polly_voice = { optional = false, default = "Matthew", example = "Matthew", explanation = "The voice used for AWS Polly" }
 streamlabs_polly_voice = { optional = false, default = "Matthew", example = "Matthew", explanation = "The voice used for Streamlabs Polly" }
 tiktok_voice = { optional = true, default = "en_us_001", example = "en_us_006", explanation = "The voice used for TikTok TTS" }
--- a/utils/imagenarator.py
+++ b/utils/imagenarator.py
@ -1,3 +1,4 @@
 import json
 import re
 import textwrap
 import os
@ -5,6 +6,7 @@ import os
 from PIL import Image, ImageDraw, ImageFont
 from rich.progress import track
 from TTS.engine_wrapper import process_text
 from utils.process_post import process_post
 def draw_multiple_line_text(
@ -56,7 +58,7 @@ def imagemaker(theme, reddit_obj: dict, txtclr, padding=5, transparent=False) ->
    Render Images for video
    """
    title = process_text(reddit_obj["thread_title"], False)
-    texts = reddit_obj["thread_post"]
+    texts = process_post(reddit_obj["thread_post"])
    id = re.sub(r"[^\w\s-]", "", reddit_obj["thread_id"])
    if transparent:
@ -74,8 +76,22 @@ def imagemaker(theme, reddit_obj: dict, txtclr, padding=5, transparent=False) ->
    image.save(f"assets/temp/{id}/png/title.png")
    weights = dict()
    for idx, text in track(enumerate(texts), "Rendering Image"):
-        image = Image.new("RGBA", size, theme)
+        if isinstance(text, tuple):
-        text = process_text(text, False)
+            total_text_length = sum(len(t) for t in text)
-        draw_multiple_line_text(image, text, font, txtclr, padding, wrap=30, transparent=transparent)
+            for i in range(len(text)):
-        image.save(f"assets/temp/{id}/png/img{idx}.png")
+                sub_text = text[i]
                image = Image.new("RGBA", size, theme)
                sub_text = process_text(sub_text, False)
                draw_multiple_line_text(image, sub_text, font, txtclr, padding, wrap=30, transparent=transparent)
                image.save(f"assets/temp/{id}/png/img{idx}-{i+1}.png")
                weights[f"{idx}-{i+1}"] = round(len(sub_text) / total_text_length, 3)
        else:
            image = Image.new("RGBA", size, theme)
            text = process_text(text, False)
            draw_multiple_line_text(image, text, font, txtclr, padding, wrap=30, transparent=transparent)
            image.save(f"assets/temp/{id}/png/img{idx}.png")
    with open(f"assets/temp/{id}/weights.json", 'w') as file:
        file.write(json.dumps(weights, indent=4))
--- a/utils/process_post.py
+++ b/utils/process_post.py
@ -0,0 +1,33 @@
 def process_post(reddit_thread_post):
    texts = reddit_thread_post
    threshold = 60
    for i in range(len(texts)):
        if len(texts[i]) > threshold:
            texts[i] = split_text(texts[i], threshold)
    return texts
 def split_text(text, threshold):
    text = text.split(' ')
    new_text = ''
    texts = []
    # for i in range(threshold+1,1,-1):
    #     if (len(text) / i) - (len(text) // i) >= 0.7:
    #         threshold = i
    #         # print("Found:", threshold)
    #         break
    for i in text:
        if new_text == '':
            new_text = i
            continue
        new_text += ' ' + i
        if len(new_text) >= threshold:
            texts.append(new_text)
            new_text = ''
    if new_text != '':
        texts.append(new_text)
    if len(texts) == 1: return texts[0]
    return tuple(texts)
--- a/video_creation/final_video.py
+++ b/video_creation/final_video.py
@ -1,15 +1,19 @@
 import json
 import multiprocessing
 import os
 import re
 from os.path import exists  # Needs to be imported specifically
 from typing import Final
 from typing import Tuple, Any, Dict
 import glob
 import ffmpeg
 import translators
 from PIL import Image
 from rich.console import Console
 from rich.progress import track
 from pydub import AudioSegment
 from pydub.playback import play
 from utils.cleanup import cleanup
 from utils.console import print_step, print_substep
@ -155,7 +159,8 @@ def make_final_video(
    print_step("Creating the final video 🎥")
-    background_clip = ffmpeg.input(prepare_background(reddit_id, W=W, H=H))
+    # background_clip = ffmpeg.input(prepare_background(reddit_id, W=W, H=H))
    background_clip = ffmpeg.input(f"assets/temp/{reddit_id}/background_noaudio.mp4")
    # Gather all audio clips
    audio_clips = list()
@ -169,11 +174,12 @@ def make_final_video(
            audio_clips = [ffmpeg.input(f"assets/temp/{reddit_id}/mp3/title.mp3")]
            audio_clips.insert(1, ffmpeg.input(f"assets/temp/{reddit_id}/mp3/postaudio.mp3"))
        elif settings.config["settings"]["storymodemethod"] == 1:
-            audio_clips = [
+            # audio_clips = [
-                ffmpeg.input(f"assets/temp/{reddit_id}/mp3/postaudio-{i}.mp3")
+            #     ffmpeg.input(f"assets/temp/{reddit_id}/mp3/postaudio-{i}.mp3")
-                for i in track(range(number_of_clips + 1), "Collecting the audio files...")
+            #     for i in track(range(number_of_clips + 1), "Collecting the audio files...")
-            ]
+            # ]
-            audio_clips.insert(0, ffmpeg.input(f"assets/temp/{reddit_id}/mp3/title.mp3"))
+            # audio_clips.insert(0, ffmpeg.input(f"assets/temp/{reddit_id}/mp3/title.mp3"))
            pass
    else:
        audio_clips = [
@ -189,14 +195,19 @@ def make_final_video(
            0,
            float(ffmpeg.probe(f"assets/temp/{reddit_id}/mp3/title.mp3")["format"]["duration"]),
        )
-    audio_concat = ffmpeg.concat(*audio_clips, a=1, v=0)
+    # audio_concat = ffmpeg.concat(*audio_clips, a=1, v=0)
-    ffmpeg.output(
+    # ffmpeg.output(
-        audio_concat, f"assets/temp/{reddit_id}/audio.mp3", **{"b:a": "192k"}
+    #     audio_concat, f"assets/temp/{reddit_id}/audio.mp3", **{"b:a": "192k"}
-    ).overwrite_output().run(quiet=True)
+    # ).overwrite_output().run(quiet=True)
    console.log(f"[bold green] Video Will Be: {length} Seconds Long")
    screenshot_width = int((W * 45) // 100)
    # audio = AudioSegment.from_mp3(f"assets/temp/{reddit_id}/audio.mp3")
    # louder_audio = audio + 10
    # louder_audio.export(f"assets/temp/{reddit_id}/audio.mp3", format='mp3')
    audio = ffmpeg.input(f"assets/temp/{reddit_id}/audio.mp3")
    final_audio = merge_background_audio(audio, reddit_id)
@ -221,6 +232,15 @@ def make_final_video(
            0,
            float(ffmpeg.probe(f"assets/temp/{reddit_id}/mp3/title.mp3")["format"]["duration"]),
        )
        background_clip = background_clip.overlay(
            image_clips[0],
            enable=f"between(t,{current_time},{current_time + audio_clips_durations[0]})",
            x="(main_w-overlay_w)/2",
            y="(main_h-overlay_h)/2",
        )
        current_time += audio_clips_durations[0]
        if settings.config["settings"]["storymodemethod"] == 0:
            image_clips.insert(
                1,
@ -228,26 +248,47 @@ def make_final_video(
                    "scale", screenshot_width, -1
                ),
            )
            background_clip = background_clip.overlay(
                image_clips[0],
                enable=f"between(t,{current_time},{current_time + audio_clips_durations[0]})",
                x="(main_w-overlay_w)/2",
                y="(main_h-overlay_h)/2",
            )
            current_time += audio_clips_durations[0]
        elif settings.config["settings"]["storymodemethod"] == 1:
-            for i in track(range(0, number_of_clips + 1), "Collecting the image files..."):
+            with open(f"assets/temp/{reddit_id}/weights.json", 'r') as file:
-                image_clips.append(
+                weights = json.loads(file.read())
-                    ffmpeg.input(f"assets/temp/{reddit_id}/png/img{i}.png")["v"].filter(
+            for i in track(range(1, number_of_clips + 1), "Collecting the image files..."):
-                        "scale", screenshot_width, -1
+                # Get all sub images
                sub_images = glob.glob(f"assets/temp/{reddit_id}/png/img{i-1}-*.png")
                if sub_images:
                    images = []
                    for image in sub_images:
                        weight_id = image.split("img")[-1][:-4]
                        images.append(
                            (
                                ffmpeg.input(image)["v"].filter(
                                    "scale", screenshot_width, -1
                                ),
                                weights[weight_id]
                            )
                        )
                    image_clips.append(images)
                    vid_time = current_time
                    for image in image_clips[i]:
                        background_clip = background_clip.overlay(
                            image[0],
                            enable=f"between(t,{vid_time},{vid_time + audio_clips_durations[i] * image[1]})",
                            x="(main_w-overlay_w)/2",
                            y="(main_h-overlay_h)/2",
                        )
                        vid_time += audio_clips_durations[i] * image[1]
                else:
                    image_clips.append(
                        ffmpeg.input(f"assets/temp/{reddit_id}/png/img{i-1}.png")["v"].filter(
                            "scale", screenshot_width, -1
                        )
                    )
                    background_clip = background_clip.overlay(
                        image_clips[i],
                        enable=f"between(t,{current_time},{current_time + audio_clips_durations[i]})",
                        x="(main_w-overlay_w)/2",
                        y="(main_h-overlay_h)/2",
                    )
                )
                background_clip = background_clip.overlay(
                    image_clips[i],
                    enable=f"between(t,{current_time},{current_time + audio_clips_durations[i]})",
                    x="(main_w-overlay_w)/2",
                    y="(main_h-overlay_h)/2",
                )
                current_time += audio_clips_durations[i]
    else:
        for i in range(0, number_of_clips + 1):