feat: Changed how text appears on the screen in story mode

pull/2058/head
Mohamed Moataz 2 years ago
parent d2394f9c7b
commit 6f17fbbe69

@ -33,8 +33,8 @@ class UnrealSpeech:
'Text': text, # Up to 1000 characters 'Text': text, # Up to 1000 characters
'VoiceId': voice, # Dan, Will, Scarlett, Liv, Amy 'VoiceId': voice, # Dan, Will, Scarlett, Liv, Amy
'Bitrate': '192k', # 320k, 256k, 192k, ... 'Bitrate': '192k', # 320k, 256k, 192k, ...
'Speed': '-0.15', # -1.0 to 1.0 'Speed': settings.config["settings"]["tts"]["unreal_speech_voice_speed"],
'Pitch': '1.2', # -0.5 to 1.5 'Pitch': settings.config["settings"]["tts"]["unreal_speech_voice_pitch"],
'Codec': 'libmp3lame', # libmp3lame or pcm_mulaw 'Codec': 'libmp3lame', # libmp3lame or pcm_mulaw
} }
headers = {'Authorization' : f'Bearer {api_key}'} headers = {'Authorization' : f'Bearer {api_key}'}

@ -49,6 +49,8 @@ elevenlabs_voice_name = { optional = false, default = "Bella", example = "Bella"
elevenlabs_api_key = { optional = true, example = "21f13f91f54d741e2ae27d2ab1b99d59", explanation = "Elevenlabs API key" } elevenlabs_api_key = { optional = true, example = "21f13f91f54d741e2ae27d2ab1b99d59", explanation = "Elevenlabs API key" }
unreal_speech_api_key = { optional = true, example = "21f13f91f54d741e2ae27d2ab1b99d59", explanation = "Unreal Speech API key" } unreal_speech_api_key = { optional = true, example = "21f13f91f54d741e2ae27d2ab1b99d59", explanation = "Unreal Speech API key" }
unreal_speech_voice_name = { optional = false, default = "Liv", example = "Liv", explanation = "The voice used for Unreal Speech", options = ["Scarlett", "Amy", "Liv", "Dan", "Will", ] } unreal_speech_voice_name = { optional = false, default = "Liv", example = "Liv", explanation = "The voice used for Unreal Speech", options = ["Scarlett", "Amy", "Liv", "Dan", "Will", ] }
unreal_speech_voice_pitch = { optional = false, default = "1", example = "1.2", explanation = "The pitch of the voice used for Unreal Speech (0.5 to 1.5)", type = "float" }
unreal_speech_voice_speed = { optional = false, default = "0", example = "-0.15", explanation = "The speed of the voice used for Unreal Speech (-1.0 to 1.0)", type = "float" }
aws_polly_voice = { optional = false, default = "Matthew", example = "Matthew", explanation = "The voice used for AWS Polly" } aws_polly_voice = { optional = false, default = "Matthew", example = "Matthew", explanation = "The voice used for AWS Polly" }
streamlabs_polly_voice = { optional = false, default = "Matthew", example = "Matthew", explanation = "The voice used for Streamlabs Polly" } streamlabs_polly_voice = { optional = false, default = "Matthew", example = "Matthew", explanation = "The voice used for Streamlabs Polly" }
tiktok_voice = { optional = true, default = "en_us_001", example = "en_us_006", explanation = "The voice used for TikTok TTS" } tiktok_voice = { optional = true, default = "en_us_001", example = "en_us_006", explanation = "The voice used for TikTok TTS" }

@ -1,3 +1,4 @@
import json
import re import re
import textwrap import textwrap
import os import os
@ -5,6 +6,7 @@ import os
from PIL import Image, ImageDraw, ImageFont from PIL import Image, ImageDraw, ImageFont
from rich.progress import track from rich.progress import track
from TTS.engine_wrapper import process_text from TTS.engine_wrapper import process_text
from utils.process_post import process_post
def draw_multiple_line_text( def draw_multiple_line_text(
@ -56,7 +58,7 @@ def imagemaker(theme, reddit_obj: dict, txtclr, padding=5, transparent=False) ->
Render Images for video Render Images for video
""" """
title = process_text(reddit_obj["thread_title"], False) title = process_text(reddit_obj["thread_title"], False)
texts = reddit_obj["thread_post"] texts = process_post(reddit_obj["thread_post"])
id = re.sub(r"[^\w\s-]", "", reddit_obj["thread_id"]) id = re.sub(r"[^\w\s-]", "", reddit_obj["thread_id"])
if transparent: if transparent:
@ -74,8 +76,22 @@ def imagemaker(theme, reddit_obj: dict, txtclr, padding=5, transparent=False) ->
image.save(f"assets/temp/{id}/png/title.png") image.save(f"assets/temp/{id}/png/title.png")
weights = dict()
for idx, text in track(enumerate(texts), "Rendering Image"): for idx, text in track(enumerate(texts), "Rendering Image"):
image = Image.new("RGBA", size, theme) if isinstance(text, tuple):
text = process_text(text, False) total_text_length = sum(len(t) for t in text)
draw_multiple_line_text(image, text, font, txtclr, padding, wrap=30, transparent=transparent) for i in range(len(text)):
image.save(f"assets/temp/{id}/png/img{idx}.png") sub_text = text[i]
image = Image.new("RGBA", size, theme)
sub_text = process_text(sub_text, False)
draw_multiple_line_text(image, sub_text, font, txtclr, padding, wrap=30, transparent=transparent)
image.save(f"assets/temp/{id}/png/img{idx}-{i+1}.png")
weights[f"{idx}-{i+1}"] = round(len(sub_text) / total_text_length, 3)
else:
image = Image.new("RGBA", size, theme)
text = process_text(text, False)
draw_multiple_line_text(image, text, font, txtclr, padding, wrap=30, transparent=transparent)
image.save(f"assets/temp/{id}/png/img{idx}.png")
with open(f"assets/temp/{id}/weights.json", 'w') as file:
file.write(json.dumps(weights, indent=4))

@ -0,0 +1,33 @@
def process_post(reddit_thread_post):
texts = reddit_thread_post
threshold = 60
for i in range(len(texts)):
if len(texts[i]) > threshold:
texts[i] = split_text(texts[i], threshold)
return texts
def split_text(text, threshold):
text = text.split(' ')
new_text = ''
texts = []
# for i in range(threshold+1,1,-1):
# if (len(text) / i) - (len(text) // i) >= 0.7:
# threshold = i
# # print("Found:", threshold)
# break
for i in text:
if new_text == '':
new_text = i
continue
new_text += ' ' + i
if len(new_text) >= threshold:
texts.append(new_text)
new_text = ''
if new_text != '':
texts.append(new_text)
if len(texts) == 1: return texts[0]
return tuple(texts)

@ -1,15 +1,19 @@
import json
import multiprocessing import multiprocessing
import os import os
import re import re
from os.path import exists # Needs to be imported specifically from os.path import exists # Needs to be imported specifically
from typing import Final from typing import Final
from typing import Tuple, Any, Dict from typing import Tuple, Any, Dict
import glob
import ffmpeg import ffmpeg
import translators import translators
from PIL import Image from PIL import Image
from rich.console import Console from rich.console import Console
from rich.progress import track from rich.progress import track
from pydub import AudioSegment
from pydub.playback import play
from utils.cleanup import cleanup from utils.cleanup import cleanup
from utils.console import print_step, print_substep from utils.console import print_step, print_substep
@ -155,7 +159,8 @@ def make_final_video(
print_step("Creating the final video 🎥") print_step("Creating the final video 🎥")
background_clip = ffmpeg.input(prepare_background(reddit_id, W=W, H=H)) # background_clip = ffmpeg.input(prepare_background(reddit_id, W=W, H=H))
background_clip = ffmpeg.input(f"assets/temp/{reddit_id}/background_noaudio.mp4")
# Gather all audio clips # Gather all audio clips
audio_clips = list() audio_clips = list()
@ -169,11 +174,12 @@ def make_final_video(
audio_clips = [ffmpeg.input(f"assets/temp/{reddit_id}/mp3/title.mp3")] audio_clips = [ffmpeg.input(f"assets/temp/{reddit_id}/mp3/title.mp3")]
audio_clips.insert(1, ffmpeg.input(f"assets/temp/{reddit_id}/mp3/postaudio.mp3")) audio_clips.insert(1, ffmpeg.input(f"assets/temp/{reddit_id}/mp3/postaudio.mp3"))
elif settings.config["settings"]["storymodemethod"] == 1: elif settings.config["settings"]["storymodemethod"] == 1:
audio_clips = [ # audio_clips = [
ffmpeg.input(f"assets/temp/{reddit_id}/mp3/postaudio-{i}.mp3") # ffmpeg.input(f"assets/temp/{reddit_id}/mp3/postaudio-{i}.mp3")
for i in track(range(number_of_clips + 1), "Collecting the audio files...") # for i in track(range(number_of_clips + 1), "Collecting the audio files...")
] # ]
audio_clips.insert(0, ffmpeg.input(f"assets/temp/{reddit_id}/mp3/title.mp3")) # audio_clips.insert(0, ffmpeg.input(f"assets/temp/{reddit_id}/mp3/title.mp3"))
pass
else: else:
audio_clips = [ audio_clips = [
@ -189,14 +195,19 @@ def make_final_video(
0, 0,
float(ffmpeg.probe(f"assets/temp/{reddit_id}/mp3/title.mp3")["format"]["duration"]), float(ffmpeg.probe(f"assets/temp/{reddit_id}/mp3/title.mp3")["format"]["duration"]),
) )
audio_concat = ffmpeg.concat(*audio_clips, a=1, v=0) # audio_concat = ffmpeg.concat(*audio_clips, a=1, v=0)
ffmpeg.output( # ffmpeg.output(
audio_concat, f"assets/temp/{reddit_id}/audio.mp3", **{"b:a": "192k"} # audio_concat, f"assets/temp/{reddit_id}/audio.mp3", **{"b:a": "192k"}
).overwrite_output().run(quiet=True) # ).overwrite_output().run(quiet=True)
console.log(f"[bold green] Video Will Be: {length} Seconds Long") console.log(f"[bold green] Video Will Be: {length} Seconds Long")
screenshot_width = int((W * 45) // 100) screenshot_width = int((W * 45) // 100)
# audio = AudioSegment.from_mp3(f"assets/temp/{reddit_id}/audio.mp3")
# louder_audio = audio + 10
# louder_audio.export(f"assets/temp/{reddit_id}/audio.mp3", format='mp3')
audio = ffmpeg.input(f"assets/temp/{reddit_id}/audio.mp3") audio = ffmpeg.input(f"assets/temp/{reddit_id}/audio.mp3")
final_audio = merge_background_audio(audio, reddit_id) final_audio = merge_background_audio(audio, reddit_id)
@ -221,6 +232,15 @@ def make_final_video(
0, 0,
float(ffmpeg.probe(f"assets/temp/{reddit_id}/mp3/title.mp3")["format"]["duration"]), float(ffmpeg.probe(f"assets/temp/{reddit_id}/mp3/title.mp3")["format"]["duration"]),
) )
background_clip = background_clip.overlay(
image_clips[0],
enable=f"between(t,{current_time},{current_time + audio_clips_durations[0]})",
x="(main_w-overlay_w)/2",
y="(main_h-overlay_h)/2",
)
current_time += audio_clips_durations[0]
if settings.config["settings"]["storymodemethod"] == 0: if settings.config["settings"]["storymodemethod"] == 0:
image_clips.insert( image_clips.insert(
1, 1,
@ -228,26 +248,47 @@ def make_final_video(
"scale", screenshot_width, -1 "scale", screenshot_width, -1
), ),
) )
background_clip = background_clip.overlay(
image_clips[0],
enable=f"between(t,{current_time},{current_time + audio_clips_durations[0]})",
x="(main_w-overlay_w)/2",
y="(main_h-overlay_h)/2",
)
current_time += audio_clips_durations[0]
elif settings.config["settings"]["storymodemethod"] == 1: elif settings.config["settings"]["storymodemethod"] == 1:
for i in track(range(0, number_of_clips + 1), "Collecting the image files..."): with open(f"assets/temp/{reddit_id}/weights.json", 'r') as file:
image_clips.append( weights = json.loads(file.read())
ffmpeg.input(f"assets/temp/{reddit_id}/png/img{i}.png")["v"].filter( for i in track(range(1, number_of_clips + 1), "Collecting the image files..."):
"scale", screenshot_width, -1 # Get all sub images
sub_images = glob.glob(f"assets/temp/{reddit_id}/png/img{i-1}-*.png")
if sub_images:
images = []
for image in sub_images:
weight_id = image.split("img")[-1][:-4]
images.append(
(
ffmpeg.input(image)["v"].filter(
"scale", screenshot_width, -1
),
weights[weight_id]
)
)
image_clips.append(images)
vid_time = current_time
for image in image_clips[i]:
background_clip = background_clip.overlay(
image[0],
enable=f"between(t,{vid_time},{vid_time + audio_clips_durations[i] * image[1]})",
x="(main_w-overlay_w)/2",
y="(main_h-overlay_h)/2",
)
vid_time += audio_clips_durations[i] * image[1]
else:
image_clips.append(
ffmpeg.input(f"assets/temp/{reddit_id}/png/img{i-1}.png")["v"].filter(
"scale", screenshot_width, -1
)
)
background_clip = background_clip.overlay(
image_clips[i],
enable=f"between(t,{current_time},{current_time + audio_clips_durations[i]})",
x="(main_w-overlay_w)/2",
y="(main_h-overlay_h)/2",
) )
)
background_clip = background_clip.overlay(
image_clips[i],
enable=f"between(t,{current_time},{current_time + audio_clips_durations[i]})",
x="(main_w-overlay_w)/2",
y="(main_h-overlay_h)/2",
)
current_time += audio_clips_durations[i] current_time += audio_clips_durations[i]
else: else:
for i in range(0, number_of_clips + 1): for i in range(0, number_of_clips + 1):

Loading…
Cancel
Save