diff --git a/TTS/engine_wrapper.py b/TTS/engine_wrapper.py index e8643e1..bfd5a73 100644 --- a/TTS/engine_wrapper.py +++ b/TTS/engine_wrapper.py @@ -10,6 +10,8 @@ import translators as ts from rich.progress import track from attr import attrs, attrib +from moviepy.editor import AudioFileClip, CompositeAudioClip, concatenate_audioclips + from utils.console import print_step, print_substep from utils.voice import sanitize_text from utils import settings @@ -70,6 +72,29 @@ class TTSEngine: if condition ] + def split_post(self, text: str, idx: int): + split_files = [] + split_text = [ + x.group().strip() + for x in re.finditer( + r" *(((.|\n){0," + str(self.tts_module().max_chars) + "})(\.|.$))", text + ) + ] + + idy = None + for idy, text_cut in enumerate(split_text): + # print(f"{idx}-{idy}: {text_cut}\n") + self.call_tts(f"{idx}-{idy}.part", text_cut) + split_files.append(AudioFileClip(f"{self.path}/{idx}-{idy}.part.mp3")) + CompositeAudioClip([concatenate_audioclips(split_files)]).write_audiofile( + f"{self.path}/{idx}.mp3", fps=44100, verbose=False, logger=None + ) + + for i in split_files: + name = i.filename + i.close() + Path(name).unlink() + def call_tts( self, filename: str, diff --git a/reddit/subreddit.py b/reddit/subreddit.py index 2ce80ce..31eb712 100644 --- a/reddit/subreddit.py +++ b/reddit/subreddit.py @@ -7,6 +7,7 @@ from praw.models import MoreComments from utils.console import print_step, print_substep from utils.subreddit import get_subreddit_undone from utils.videos import check_done +from utils.voice import sanitize_text def get_subreddit_threads( @@ -98,6 +99,9 @@ def get_subreddit_threads( if top_level_comment.body in ["[removed]", "[deleted]"]: continue # # see https://github.com/JasonLovesDoggo/RedditVideoMakerBot/issues/78 if not top_level_comment.stickied: + sanitised = sanitize_text(top_level_comment.body) + if not sanitised or sanitised == " ": + continue if len(top_level_comment.body) <= int( settings.config["reddit"]["thread"]["max_comment_length"] ): diff --git a/utils/voice.py b/utils/voice.py index 0272b09..4a77833 100644 --- a/utils/voice.py +++ b/utils/voice.py @@ -81,7 +81,7 @@ def sanitize_text(text: str) -> str: result = re.sub(regex_urls, " ", text) # note: not removing apostrophes - regex_expr = r"\s['|’]|['|’]\s|[\^_~@!&;#:\-%“”‘\"%\*/{}\[\]\(\)\\|<>=+]" + regex_expr = r"\s['|’]|['|’]\s|[\^_~@!&;#:\-–—%“”‘\"%\*/{}\[\]\(\)\\|<>=+]" result = re.sub(regex_expr, " ", result) result = result.replace("+", "plus").replace("&", "and") # remove extra whitespace