diff --git a/video_creation/voices.py b/video_creation/voices.py index c0df8b7..86bacd4 100644 --- a/video_creation/voices.py +++ b/video_creation/voices.py @@ -4,6 +4,24 @@ from mutagen.mp3 import MP3 from utils.console import print_step, print_substep from rich.progress import track +import re + + +def sanitize_text(reddit_obj): + """ + Sanitizes the text for tts. + + What gets removed: + - following characters`^_~@!&;#:-%“”‘"%*/{}[]()\|<>?=+` + """ + + # note: not removing apostrophes + regex_expr = r"\s['|’]|['|’]\s|[\^_~@!&;#:\-%“”‘\"%\*/{}\[\]\(\)\\|<>=+]" + result = re.sub(regex_expr, " ", reddit_obj) + + # remove extra whitespace + return " ".join(result.split()) + def save_text_to_mp3(reddit_obj): """Saves Text to MP3 files. @@ -17,25 +35,16 @@ def save_text_to_mp3(reddit_obj): # Create a folder for the mp3 files. Path("assets/mp3").mkdir(parents=True, exist_ok=True) - tts = gTTS(text=reddit_obj["thread_title"], lang="en", slow=False) + thread_title = sanitize_text(reddit_obj["thread_title"]) + tts = gTTS(text=thread_title, lang="en", slow=False) tts.save(f"assets/mp3/title.mp3") length += MP3(f"assets/mp3/title.mp3").info.length - try: - Path(f"assets/mp3/posttext.mp3").unlink() - except OSError as e: - pass - - if reddit_obj["thread_post"] != "": - tts = gTTS(text=reddit_obj["thread_post"], lang="en", slow=False) - tts.save(f"assets/mp3/posttext.mp3") - length += MP3(f"assets/mp3/posttext.mp3").info.length - for idx, comment in track(enumerate(reddit_obj["comments"]), "Saving..."): # ! Stop creating mp3 files if the length is greater than 50 seconds. This can be longer, but this is just a good starting point if length > 50: break - tts = gTTS(text=comment["comment_body"], lang="en", slow=False) + tts = gTTS(text=sanitize_text(comment["comment_body"]), lang="en", slow=False) tts.save(f"assets/mp3/{idx}.mp3") length += MP3(f"assets/mp3/{idx}.mp3").info.length