From 29de549ef3c1ca4b181d905e8a9a70803a3426ac Mon Sep 17 00:00:00 2001 From: Callum Leslie Date: Mon, 11 Jul 2022 17:52:12 +0100 Subject: [PATCH 1/5] fix: skip comments that are blank after sanitisation --- reddit/subreddit.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/reddit/subreddit.py b/reddit/subreddit.py index b64a52a..c316ccf 100644 --- a/reddit/subreddit.py +++ b/reddit/subreddit.py @@ -7,6 +7,7 @@ from praw.models import MoreComments from utils.console import print_step, print_substep from utils.subreddit import get_subreddit_undone from utils.videos import check_done +from utils.voice import sanitize_text def get_subreddit_threads(POST_ID: str): @@ -95,6 +96,9 @@ def get_subreddit_threads(POST_ID: str): if top_level_comment.body in ["[removed]", "[deleted]"]: continue # # see https://github.com/JasonLovesDoggo/RedditVideoMakerBot/issues/78 if not top_level_comment.stickied: + sanitised = sanitize_text(top_level_comment.body) + if not sanitised or sanitised != " ": + continue if len(top_level_comment.body) <= int( settings.config["reddit"]["thread"]["max_comment_length"] ): From c1c72ca5af65fbb9b7529ad6c7df5cb96b233561 Mon Sep 17 00:00:00 2001 From: Callum Leslie Date: Mon, 11 Jul 2022 17:56:24 +0100 Subject: [PATCH 2/5] fix: add more characters to illegal chars --- utils/voice.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/utils/voice.py b/utils/voice.py index 0272b09..4a77833 100644 --- a/utils/voice.py +++ b/utils/voice.py @@ -81,7 +81,7 @@ def sanitize_text(text: str) -> str: result = re.sub(regex_urls, " ", text) # note: not removing apostrophes - regex_expr = r"\s['|’]|['|’]\s|[\^_~@!&;#:\-%“”‘\"%\*/{}\[\]\(\)\\|<>=+]" + regex_expr = r"\s['|’]|['|’]\s|[\^_~@!&;#:\-–—%“”‘\"%\*/{}\[\]\(\)\\|<>=+]" result = re.sub(regex_expr, " ", result) result = result.replace("+", "plus").replace("&", "and") # remove extra whitespace From 302cd355872a8d298f84787f53879c00bef26d47 Mon Sep 17 00:00:00 2001 From: Callum Leslie Date: Mon, 11 Jul 2022 17:58:49 +0100 Subject: [PATCH 3/5] fix: logic error --- reddit/subreddit.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/reddit/subreddit.py b/reddit/subreddit.py index c316ccf..4cfd669 100644 --- a/reddit/subreddit.py +++ b/reddit/subreddit.py @@ -97,7 +97,8 @@ def get_subreddit_threads(POST_ID: str): continue # # see https://github.com/JasonLovesDoggo/RedditVideoMakerBot/issues/78 if not top_level_comment.stickied: sanitised = sanitize_text(top_level_comment.body) - if not sanitised or sanitised != " ": + print(sanitised) + if not sanitised or sanitised == " ": continue if len(top_level_comment.body) <= int( settings.config["reddit"]["thread"]["max_comment_length"] From 721eca4d3e7e745531d68bbf4865624c2592be06 Mon Sep 17 00:00:00 2001 From: Callum Leslie Date: Mon, 11 Jul 2022 18:00:50 +0100 Subject: [PATCH 4/5] chore: remove debug statement --- reddit/subreddit.py | 1 - 1 file changed, 1 deletion(-) diff --git a/reddit/subreddit.py b/reddit/subreddit.py index 4cfd669..829a3a8 100644 --- a/reddit/subreddit.py +++ b/reddit/subreddit.py @@ -97,7 +97,6 @@ def get_subreddit_threads(POST_ID: str): continue # # see https://github.com/JasonLovesDoggo/RedditVideoMakerBot/issues/78 if not top_level_comment.stickied: sanitised = sanitize_text(top_level_comment.body) - print(sanitised) if not sanitised or sanitised == " ": continue if len(top_level_comment.body) <= int( From a3e7b979f3e2833e6c46a651142360691b737024 Mon Sep 17 00:00:00 2001 From: Callum Leslie Date: Mon, 11 Jul 2022 19:36:46 +0100 Subject: [PATCH 5/5] fix: regex splitter no longer skips newlines --- TTS/engine_wrapper.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/TTS/engine_wrapper.py b/TTS/engine_wrapper.py index a171db7..df90569 100644 --- a/TTS/engine_wrapper.py +++ b/TTS/engine_wrapper.py @@ -79,7 +79,9 @@ class TTSEngine: split_files = [] split_text = [ x.group().strip() - for x in re.finditer(rf" *((.{{0,{self.tts_module.max_chars}}})(\.|.$))", text) + for x in re.finditer( + r" *(((.|\n){0," + str(self.tts_module.max_chars) + "})(\.|.$))", text + ) ] idy = None