From 413148d8e9c33e14b6065922a027c50279b2e8c1 Mon Sep 17 00:00:00 2001
From: TianYuan <white-sky@qq.com>
Date: Sat, 26 Nov 2022 03:56:56 +0000
Subject: [PATCH] update

---
 .../t2s/frontend/zh_normalization/text_normlization.py      | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/paddlespeech/t2s/frontend/zh_normalization/text_normlization.py b/paddlespeech/t2s/frontend/zh_normalization/text_normlization.py
index 527c167e1..1250e96ca 100644
--- a/paddlespeech/t2s/frontend/zh_normalization/text_normlization.py
+++ b/paddlespeech/t2s/frontend/zh_normalization/text_normlization.py
@@ -65,7 +65,7 @@ class TextNormalizer():
         if lang == "zh":
             text = text.replace(" ", "")
             # 过滤掉特殊字符
-            text = re.sub(r'[《》【】<=>{}()（）#&@“”^_|…\\]', '', text)
+            text = re.sub(r'[——《》【】<=>{}()（）#&@“”^_|…\\]', '', text)
         text = self.SENTENCE_SPLITOR.sub(r'\1\n', text)
         text = text.strip()
         sentences = [sentence.strip() for sentence in re.split(r'\n+', text)]
@@ -110,7 +110,8 @@ class TextNormalizer():
         sentence = sentence.replace('χ', '器')
         sentence = sentence.replace('ψ', '普赛').replace('Ψ', '普赛')
         sentence = sentence.replace('ω', '欧米伽').replace('Ω', '欧米伽')
-        sentence = sentence.replace("——", "--")
+        # re filter special characters, have one more character "-" than line 68
+        sentence = re.sub(r'[-——《》【】<=>{}()（）#&@“”^_|…\\]', '', sentence)
         return sentence
 
     def normalize_sentence(self, sentence: str) -> str:
@@ -149,6 +150,5 @@ class TextNormalizer():
 
     def normalize(self, text: str) -> List[str]:
         sentences = self._split(text)
-
         sentences = [self.normalize_sentence(sent) for sent in sentences]
         return sentences