updated voice.py with uri removal too

pull/418/head
Jason 3 years ago
parent 45531df823
commit cb3f825014

@ -4,13 +4,18 @@ import re
def sanitize_text(text): def sanitize_text(text):
""" """
Sanitizes the text for tts. Sanitizes the text for tts.
What gets removed: What gets removed:
- following characters`^_~@!&;#:-%“”‘"%*/{}[]()\|<>?=+` - following characters`^_~@!&;#:-%“”‘"%*/{}[]()\|<>?=+`
- any http or https links
""" """
# remove any urls from the text
regex_urls = r"((http|https)://[^\s]+)"
result = re.sub(regex_urls, " ", text)
# note: not removing apostrophes # note: not removing apostrophes
regex_expr = r"\s['|]|['|]\s|[\^_~@!&;#:\-%“”‘\"%\*/{}\[\]\(\)\\|<>=+]" regex_expr = r"\s['|]|['|]\s|[\^_~@!&;#:\-%“”‘\"%\*/{}\[\]\(\)\\|<>=+]"
result = re.sub(regex_expr, " ", text) result = re.sub(regex_expr, " ", result)
# remove extra whitespace # remove extra whitespace
return " ".join(result.split()) return " ".join(result.split())

Loading…
Cancel
Save