diff --git a/paddlespeech/t2s/frontend/zh_normalization/text_normlization.py b/paddlespeech/t2s/frontend/zh_normalization/text_normlization.py index 527c167e1..1250e96ca 100644 --- a/paddlespeech/t2s/frontend/zh_normalization/text_normlization.py +++ b/paddlespeech/t2s/frontend/zh_normalization/text_normlization.py @@ -65,7 +65,7 @@ class TextNormalizer(): if lang == "zh": text = text.replace(" ", "") # 过滤掉特殊字符 - text = re.sub(r'[《》【】<=>{}()()#&@“”^_|…\\]', '', text) + text = re.sub(r'[——《》【】<=>{}()()#&@“”^_|…\\]', '', text) text = self.SENTENCE_SPLITOR.sub(r'\1\n', text) text = text.strip() sentences = [sentence.strip() for sentence in re.split(r'\n+', text)] @@ -110,7 +110,8 @@ class TextNormalizer(): sentence = sentence.replace('χ', '器') sentence = sentence.replace('ψ', '普赛').replace('Ψ', '普赛') sentence = sentence.replace('ω', '欧米伽').replace('Ω', '欧米伽') - sentence = sentence.replace("——", "--") + # re filter special characters, have one more character "-" than line 68 + sentence = re.sub(r'[-——《》【】<=>{}()()#&@“”^_|…\\]', '', sentence) return sentence def normalize_sentence(self, sentence: str) -> str: @@ -149,6 +150,5 @@ class TextNormalizer(): def normalize(self, text: str) -> List[str]: sentences = self._split(text) - sentences = [self.normalize_sentence(sent) for sent in sentences] return sentences