Merge pull request #1177 from Trichtern/feat/add-silence-between-clips

Minor changes to #990 (fixed audio glitches caused by previous audio concatenation method)
pull/1211/head
Jason 3 years ago committed by GitHub
commit c995811a23
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -1,4 +1,5 @@
#!/usr/bin/env python3
import os
import re
from pathlib import Path
from typing import Tuple
@ -6,8 +7,11 @@ from typing import Tuple
# import sox
# from mutagen import MutagenError
# from mutagen.mp3 import MP3, HeaderNotFoundError
import numpy as np
import translators as ts
from moviepy.editor import AudioFileClip, CompositeAudioClip, concatenate_audioclips
from moviepy.audio.AudioClip import AudioClip
from moviepy.audio.fx.volumex import volumex
from moviepy.editor import AudioFileClip
from rich.progress import track
from utils import settings
@ -84,45 +88,68 @@ class TTSEngine:
split_text = [
x.group().strip() for x in re.finditer(r" *(((.|\n){0," + str(self.tts_module.max_chars) + "})(\.|.$))", text)
]
offset = 0
for idy, text_cut in enumerate(split_text):
# print(f"{idx}-{idy}: {text_cut}\n")
new_text = process_text(text_cut)
if not new_text or new_text.isspace():
offset += 1
continue
self.call_tts(f"{idx}-{idy - offset}.part", new_text)
split_files.append(AudioFileClip(f"{self.path}/{idx}-{idy - offset}.part.mp3"))
CompositeAudioClip([concatenate_audioclips(split_files)]).write_audiofile(
f"{self.path}/{idx}.mp3", fps=44100, verbose=False, logger=None
)
self.create_silence_mp3()
for i in split_files:
name = i.filename
i.close()
Path(name).unlink()
# for i in range(0, idy + 1):
# print(f"Cleaning up {self.path}/{idx}-{i}.part.mp3")
idy = None
for idy, text_cut in enumerate(split_text):
newtext = process_text(text_cut)
# print(f"{idx}-{idy}: {newtext}\n")
# Path(f"{self.path}/{idx}-{i}.part.mp3").unlink()
if not newtext or newtext.isspace():
print("newtext was blank because sanitized split text resulted in none")
continue
else:
self.call_tts(f"{idx}-{idy}.part", newtext)
with open(f"{self.path}/list.txt", 'w') as f:
for idz in range(0, len(split_text)):
f.write("file " + f"'{idx}-{idz}.part.mp3'" + "\n")
split_files.append(str(f"{self.path}/{idx}-{idy}.part.mp3"))
f.write("file " + f"'silence.mp3'" + "\n")
os.system("ffmpeg -f concat -y -hide_banner -loglevel panic -safe 0 " +
"-i " + f"{self.path}/list.txt " +
"-c copy " + f"{self.path}/{idx}.mp3")
try:
for i in range(0, len(split_files)):
os.unlink(split_files[i])
except FileNotFoundError as e:
print("File not found: " + e.filename)
except OSError:
print("OSError")
def call_tts(self, filename: str, text: str):
self.tts_module.run(text, filepath=f"{self.path}/{filename}.mp3")
# try:
# self.length += MP3(f"{self.path}/{filename}.mp3").info.length
# except (MutagenError, HeaderNotFoundError):
# self.length += sox.file_info.duration(f"{self.path}/{filename}.mp3")
try:
self.tts_module.run(text, filepath=f"{self.path}/{filename}_no_silence.mp3")
self.create_silence_mp3()
with open(f"{self.path}/{filename}.txt", 'w') as f:
f.write("file " + f"'{filename}_no_silence.mp3'" + "\n")
f.write("file " + f"'silence.mp3'" + "\n")
f.close()
os.system("ffmpeg -f concat -y -hide_banner -loglevel panic -safe 0 " +
"-i " + f"{self.path}/{filename}.txt " +
"-c copy " + f"{self.path}/{filename}.mp3")
clip = AudioFileClip(f"{self.path}/{filename}.mp3")
self.last_clip_length = clip.duration
self.length += clip.duration
clip.close()
try:
name = [f"{filename}_no_silence.mp3", "silence.mp3", f"{filename}.txt"]
for i in range(0, len(name)):
os.unlink(str(rf"{self.path}/" + name[i]))
except FileNotFoundError as e:
print("File not found: " + e.filename)
except OSError:
print("OSError")
except:
self.length = 0
def create_silence_mp3(self):
silence_duration = settings.config["settings"]["tts"]["silence_duration"]
silence = AudioClip(make_frame=lambda t: np.sin(440 * 2 * np.pi * t), duration=silence_duration, fps=44100)
silence = volumex(silence, 0)
silence.write_audiofile(f"{self.path}/silence.mp3", fps=44100, verbose=False, logger=None)
def process_text(text: str):
lang = settings.config["reddit"]["thread"]["post_lang"]

@ -37,3 +37,4 @@ streamlabs_polly_voice = { optional = false, default = "Matthew", example = "Mat
tiktok_voice = { optional = false, default = "en_us_006", example = "en_us_006", explanation = "The voice used for TikTok TTS" }
python_voice = { optional = false, default = "1", example = "1", explanation = "The index of the system tts voices (can be downloaded externally, run ptt.py to find value, start from zero)" }
py_voice_num = { optional = false, default = "2", example = "2", explanation = "the number of system voices(2 are pre-installed in windows)" }
silence_duration = { optional = true, example = "0.1", explanation = "time in seconds between TTS comments", default = 0.3, type = "float" }
Loading…
Cancel
Save