From c1c72ca5af65fbb9b7529ad6c7df5cb96b233561 Mon Sep 17 00:00:00 2001 From: Callum Leslie Date: Mon, 11 Jul 2022 17:56:24 +0100 Subject: [PATCH] fix: add more characters to illegal chars --- utils/voice.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/utils/voice.py b/utils/voice.py index 0272b09..4a77833 100644 --- a/utils/voice.py +++ b/utils/voice.py @@ -81,7 +81,7 @@ def sanitize_text(text: str) -> str: result = re.sub(regex_urls, " ", text) # note: not removing apostrophes - regex_expr = r"\s['|’]|['|’]\s|[\^_~@!&;#:\-%“”‘\"%\*/{}\[\]\(\)\\|<>=+]" + regex_expr = r"\s['|’]|['|’]\s|[\^_~@!&;#:\-–—%“”‘\"%\*/{}\[\]\(\)\\|<>=+]" result = re.sub(regex_expr, " ", result) result = result.replace("+", "plus").replace("&", "and") # remove extra whitespace