|
|
@ -10,7 +10,8 @@ def sanitize_text(text):
|
|
|
|
"""
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
|
|
# remove any urls from the text
|
|
|
|
# remove any urls from the text
|
|
|
|
regex_urls = r"((http|https)://[^\s]+)"
|
|
|
|
regex_urls = r'((http|https)\:\/\/)?[a-zA-Z0-9\.\/\?\:@\-_=#]+\.([a-zA-Z]){2,6}([a-zA-Z0-9\.\&\/\?\:@\-_=#])*'
|
|
|
|
|
|
|
|
|
|
|
|
result = re.sub(regex_urls, " ", text)
|
|
|
|
result = re.sub(regex_urls, " ", text)
|
|
|
|
|
|
|
|
|
|
|
|
# note: not removing apostrophes
|
|
|
|
# note: not removing apostrophes
|
|
|
|