updated voice.py with uri removal too

3 years ago · cb3f825014
parent 45531df823
commit cb3f825014
1 changed files with 7 additions and 2 deletions
--- a/utils/voice.py
+++ b/utils/voice.py
@ -4,13 +4,18 @@ import re
 def sanitize_text(text):
    """
    Sanitizes the text for tts.
-    What gets removed:
+       What gets removed:
    - following characters`^_~@!&;#:-%“”‘"%*/{}[]()\|<>?=+`
    - any http or https links
    """
    # remove any urls from the text
    regex_urls = r"((http|https)://[^\s]+)"
    result = re.sub(regex_urls, " ", text)
    # note: not removing apostrophes
    regex_expr = r"\s['|’]|['|’]\s|[\^_~@!&;#:\-%“”‘\"%\*/{}\[\]\(\)\\|<>=+]"
-    result = re.sub(regex_expr, " ", text)
+    result = re.sub(regex_expr, " ", result)
    # remove extra whitespace
    return " ".join(result.split())