diff --git a/TTS/unreal_speech.py b/TTS/unreal_speech.py index 62d7d9d..4504dc0 100644 --- a/TTS/unreal_speech.py +++ b/TTS/unreal_speech.py @@ -33,8 +33,8 @@ class UnrealSpeech: 'Text': text, # Up to 1000 characters 'VoiceId': voice, # Dan, Will, Scarlett, Liv, Amy 'Bitrate': '192k', # 320k, 256k, 192k, ... - 'Speed': '-0.15', # -1.0 to 1.0 - 'Pitch': '1.2', # -0.5 to 1.5 + 'Speed': settings.config["settings"]["tts"]["unreal_speech_voice_speed"], + 'Pitch': settings.config["settings"]["tts"]["unreal_speech_voice_pitch"], 'Codec': 'libmp3lame', # libmp3lame or pcm_mulaw } headers = {'Authorization' : f'Bearer {api_key}'} diff --git a/utils/.config.template.toml b/utils/.config.template.toml index aa422ee..32854ed 100644 --- a/utils/.config.template.toml +++ b/utils/.config.template.toml @@ -49,6 +49,8 @@ elevenlabs_voice_name = { optional = false, default = "Bella", example = "Bella" elevenlabs_api_key = { optional = true, example = "21f13f91f54d741e2ae27d2ab1b99d59", explanation = "Elevenlabs API key" } unreal_speech_api_key = { optional = true, example = "21f13f91f54d741e2ae27d2ab1b99d59", explanation = "Unreal Speech API key" } unreal_speech_voice_name = { optional = false, default = "Liv", example = "Liv", explanation = "The voice used for Unreal Speech", options = ["Scarlett", "Amy", "Liv", "Dan", "Will", ] } +unreal_speech_voice_pitch = { optional = false, default = "1", example = "1.2", explanation = "The pitch of the voice used for Unreal Speech (0.5 to 1.5)", type = "float" } +unreal_speech_voice_speed = { optional = false, default = "0", example = "-0.15", explanation = "The speed of the voice used for Unreal Speech (-1.0 to 1.0)", type = "float" } aws_polly_voice = { optional = false, default = "Matthew", example = "Matthew", explanation = "The voice used for AWS Polly" } streamlabs_polly_voice = { optional = false, default = "Matthew", example = "Matthew", explanation = "The voice used for Streamlabs Polly" } tiktok_voice = { optional = true, default = "en_us_001", example = "en_us_006", explanation = "The voice used for TikTok TTS" } diff --git a/utils/imagenarator.py b/utils/imagenarator.py index 9356540..67d08ee 100644 --- a/utils/imagenarator.py +++ b/utils/imagenarator.py @@ -1,3 +1,4 @@ +import json import re import textwrap import os @@ -5,6 +6,7 @@ import os from PIL import Image, ImageDraw, ImageFont from rich.progress import track from TTS.engine_wrapper import process_text +from utils.process_post import process_post def draw_multiple_line_text( @@ -56,7 +58,7 @@ def imagemaker(theme, reddit_obj: dict, txtclr, padding=5, transparent=False) -> Render Images for video """ title = process_text(reddit_obj["thread_title"], False) - texts = reddit_obj["thread_post"] + texts = process_post(reddit_obj["thread_post"]) id = re.sub(r"[^\w\s-]", "", reddit_obj["thread_id"]) if transparent: @@ -74,8 +76,22 @@ def imagemaker(theme, reddit_obj: dict, txtclr, padding=5, transparent=False) -> image.save(f"assets/temp/{id}/png/title.png") + weights = dict() for idx, text in track(enumerate(texts), "Rendering Image"): - image = Image.new("RGBA", size, theme) - text = process_text(text, False) - draw_multiple_line_text(image, text, font, txtclr, padding, wrap=30, transparent=transparent) - image.save(f"assets/temp/{id}/png/img{idx}.png") + if isinstance(text, tuple): + total_text_length = sum(len(t) for t in text) + for i in range(len(text)): + sub_text = text[i] + image = Image.new("RGBA", size, theme) + sub_text = process_text(sub_text, False) + draw_multiple_line_text(image, sub_text, font, txtclr, padding, wrap=30, transparent=transparent) + image.save(f"assets/temp/{id}/png/img{idx}-{i+1}.png") + weights[f"{idx}-{i+1}"] = round(len(sub_text) / total_text_length, 3) + else: + image = Image.new("RGBA", size, theme) + text = process_text(text, False) + draw_multiple_line_text(image, text, font, txtclr, padding, wrap=30, transparent=transparent) + image.save(f"assets/temp/{id}/png/img{idx}.png") + + with open(f"assets/temp/{id}/weights.json", 'w') as file: + file.write(json.dumps(weights, indent=4)) diff --git a/utils/process_post.py b/utils/process_post.py new file mode 100644 index 0000000..ef03370 --- /dev/null +++ b/utils/process_post.py @@ -0,0 +1,33 @@ +def process_post(reddit_thread_post): + texts = reddit_thread_post + threshold = 60 + for i in range(len(texts)): + if len(texts[i]) > threshold: + texts[i] = split_text(texts[i], threshold) + return texts + +def split_text(text, threshold): + text = text.split(' ') + new_text = '' + texts = [] + # for i in range(threshold+1,1,-1): + # if (len(text) / i) - (len(text) // i) >= 0.7: + # threshold = i + # # print("Found:", threshold) + # break + + for i in text: + if new_text == '': + new_text = i + continue + + new_text += ' ' + i + if len(new_text) >= threshold: + texts.append(new_text) + new_text = '' + + if new_text != '': + texts.append(new_text) + + if len(texts) == 1: return texts[0] + return tuple(texts) \ No newline at end of file diff --git a/video_creation/final_video.py b/video_creation/final_video.py index 84ca249..8d551e9 100644 --- a/video_creation/final_video.py +++ b/video_creation/final_video.py @@ -1,15 +1,19 @@ +import json import multiprocessing import os import re from os.path import exists # Needs to be imported specifically from typing import Final from typing import Tuple, Any, Dict +import glob import ffmpeg import translators from PIL import Image from rich.console import Console from rich.progress import track +from pydub import AudioSegment +from pydub.playback import play from utils.cleanup import cleanup from utils.console import print_step, print_substep @@ -155,7 +159,8 @@ def make_final_video( print_step("Creating the final video 🎥") - background_clip = ffmpeg.input(prepare_background(reddit_id, W=W, H=H)) + # background_clip = ffmpeg.input(prepare_background(reddit_id, W=W, H=H)) + background_clip = ffmpeg.input(f"assets/temp/{reddit_id}/background_noaudio.mp4") # Gather all audio clips audio_clips = list() @@ -169,11 +174,12 @@ def make_final_video( audio_clips = [ffmpeg.input(f"assets/temp/{reddit_id}/mp3/title.mp3")] audio_clips.insert(1, ffmpeg.input(f"assets/temp/{reddit_id}/mp3/postaudio.mp3")) elif settings.config["settings"]["storymodemethod"] == 1: - audio_clips = [ - ffmpeg.input(f"assets/temp/{reddit_id}/mp3/postaudio-{i}.mp3") - for i in track(range(number_of_clips + 1), "Collecting the audio files...") - ] - audio_clips.insert(0, ffmpeg.input(f"assets/temp/{reddit_id}/mp3/title.mp3")) + # audio_clips = [ + # ffmpeg.input(f"assets/temp/{reddit_id}/mp3/postaudio-{i}.mp3") + # for i in track(range(number_of_clips + 1), "Collecting the audio files...") + # ] + # audio_clips.insert(0, ffmpeg.input(f"assets/temp/{reddit_id}/mp3/title.mp3")) + pass else: audio_clips = [ @@ -189,14 +195,19 @@ def make_final_video( 0, float(ffmpeg.probe(f"assets/temp/{reddit_id}/mp3/title.mp3")["format"]["duration"]), ) - audio_concat = ffmpeg.concat(*audio_clips, a=1, v=0) - ffmpeg.output( - audio_concat, f"assets/temp/{reddit_id}/audio.mp3", **{"b:a": "192k"} - ).overwrite_output().run(quiet=True) + # audio_concat = ffmpeg.concat(*audio_clips, a=1, v=0) + # ffmpeg.output( + # audio_concat, f"assets/temp/{reddit_id}/audio.mp3", **{"b:a": "192k"} + # ).overwrite_output().run(quiet=True) console.log(f"[bold green] Video Will Be: {length} Seconds Long") screenshot_width = int((W * 45) // 100) + + # audio = AudioSegment.from_mp3(f"assets/temp/{reddit_id}/audio.mp3") + # louder_audio = audio + 10 + # louder_audio.export(f"assets/temp/{reddit_id}/audio.mp3", format='mp3') + audio = ffmpeg.input(f"assets/temp/{reddit_id}/audio.mp3") final_audio = merge_background_audio(audio, reddit_id) @@ -221,6 +232,15 @@ def make_final_video( 0, float(ffmpeg.probe(f"assets/temp/{reddit_id}/mp3/title.mp3")["format"]["duration"]), ) + + background_clip = background_clip.overlay( + image_clips[0], + enable=f"between(t,{current_time},{current_time + audio_clips_durations[0]})", + x="(main_w-overlay_w)/2", + y="(main_h-overlay_h)/2", + ) + current_time += audio_clips_durations[0] + if settings.config["settings"]["storymodemethod"] == 0: image_clips.insert( 1, @@ -228,26 +248,47 @@ def make_final_video( "scale", screenshot_width, -1 ), ) - background_clip = background_clip.overlay( - image_clips[0], - enable=f"between(t,{current_time},{current_time + audio_clips_durations[0]})", - x="(main_w-overlay_w)/2", - y="(main_h-overlay_h)/2", - ) - current_time += audio_clips_durations[0] elif settings.config["settings"]["storymodemethod"] == 1: - for i in track(range(0, number_of_clips + 1), "Collecting the image files..."): - image_clips.append( - ffmpeg.input(f"assets/temp/{reddit_id}/png/img{i}.png")["v"].filter( - "scale", screenshot_width, -1 + with open(f"assets/temp/{reddit_id}/weights.json", 'r') as file: + weights = json.loads(file.read()) + for i in track(range(1, number_of_clips + 1), "Collecting the image files..."): + # Get all sub images + sub_images = glob.glob(f"assets/temp/{reddit_id}/png/img{i-1}-*.png") + if sub_images: + images = [] + for image in sub_images: + weight_id = image.split("img")[-1][:-4] + images.append( + ( + ffmpeg.input(image)["v"].filter( + "scale", screenshot_width, -1 + ), + weights[weight_id] + ) + ) + image_clips.append(images) + + vid_time = current_time + for image in image_clips[i]: + background_clip = background_clip.overlay( + image[0], + enable=f"between(t,{vid_time},{vid_time + audio_clips_durations[i] * image[1]})", + x="(main_w-overlay_w)/2", + y="(main_h-overlay_h)/2", + ) + vid_time += audio_clips_durations[i] * image[1] + else: + image_clips.append( + ffmpeg.input(f"assets/temp/{reddit_id}/png/img{i-1}.png")["v"].filter( + "scale", screenshot_width, -1 + ) + ) + background_clip = background_clip.overlay( + image_clips[i], + enable=f"between(t,{current_time},{current_time + audio_clips_durations[i]})", + x="(main_w-overlay_w)/2", + y="(main_h-overlay_h)/2", ) - ) - background_clip = background_clip.overlay( - image_clips[i], - enable=f"between(t,{current_time},{current_time + audio_clips_durations[i]})", - x="(main_w-overlay_w)/2", - y="(main_h-overlay_h)/2", - ) current_time += audio_clips_durations[i] else: for i in range(0, number_of_clips + 1):