diff --git a/utils/voice.py b/utils/voice.py index e6f3989..13633da 100644 --- a/utils/voice.py +++ b/utils/voice.py @@ -10,7 +10,8 @@ def sanitize_text(text): """ # remove any urls from the text - regex_urls = r"((http|https)://[^\s]+)" + regex_urls = r'((http|https)\:\/\/)?[a-zA-Z0-9\.\/\?\:@\-_=#]+\.([a-zA-Z]){2,6}([a-zA-Z0-9\.\&\/\?\:@\-_=#])*' + result = re.sub(regex_urls, " ", text) # note: not removing apostrophes