From 13a7fa9808d0faaa1589e0ef0659c537bd4d5dbb Mon Sep 17 00:00:00 2001
From: "david.95" <david.95@live.cn>
Date: Fri, 14 Oct 2022 15:37:33 +0800
Subject: [PATCH 01/20] enable chinese words' pinyin specified  in text of ssml
 formats, test=tts

---
 paddlespeech/t2s/exps/syn_utils.py       |   6 +-
 paddlespeech/t2s/frontend/zh_frontend.py | 156 ++++++++++++++++++++++
 paddlespeech/t2s/ssml/xml_processor.py   | 163 +++++++++++++++++++++++
 3 files changed, 323 insertions(+), 2 deletions(-)
 create mode 100644 paddlespeech/t2s/ssml/xml_processor.py

diff --git a/paddlespeech/t2s/exps/syn_utils.py b/paddlespeech/t2s/exps/syn_utils.py
index 15d8dfb78..f9d1cd1b5 100644
--- a/paddlespeech/t2s/exps/syn_utils.py
+++ b/paddlespeech/t2s/exps/syn_utils.py
@@ -13,6 +13,7 @@
 # limitations under the License.
 import math
 import os
+import re
 from pathlib import Path
 from typing import Any
 from typing import Dict
@@ -33,6 +34,7 @@ from paddlespeech.t2s.frontend.mix_frontend import MixFrontend
 from paddlespeech.t2s.frontend.zh_frontend import Frontend
 from paddlespeech.t2s.modules.normalizer import ZScore
 from paddlespeech.utils.dynamic_import import dynamic_import
+
 # remove [W:onnxruntime: xxx] from ort
 ort.set_default_logger_severity(3)
 
@@ -103,7 +105,7 @@ def get_sentences(text_file: Optional[os.PathLike], lang: str='zh'):
     sentences = []
     with open(text_file, 'rt') as f:
         for line in f:
-            items = line.strip().split()
+            items = re.split(r"\s+", line.strip(), 1)
             utt_id = items[0]
             if lang == 'zh':
                 sentence = "".join(items[1:])
@@ -180,7 +182,7 @@ def run_frontend(frontend: object,
                  to_tensor: bool=True):
     outs = dict()
     if lang == 'zh':
-        input_ids = frontend.get_input_ids(
+        input_ids = frontend.get_input_ids_ssml(
             text,
             merge_sentences=merge_sentences,
             get_tone_ids=get_tone_ids,
diff --git a/paddlespeech/t2s/frontend/zh_frontend.py b/paddlespeech/t2s/frontend/zh_frontend.py
index 722eed601..25558780b 100644
--- a/paddlespeech/t2s/frontend/zh_frontend.py
+++ b/paddlespeech/t2s/frontend/zh_frontend.py
@@ -13,6 +13,7 @@
 # limitations under the License.
 import os
 import re
+from operator import itemgetter
 from typing import Dict
 from typing import List
 
@@ -31,6 +32,7 @@ from paddlespeech.t2s.frontend.g2pw import G2PWOnnxConverter
 from paddlespeech.t2s.frontend.generate_lexicon import generate_lexicon
 from paddlespeech.t2s.frontend.tone_sandhi import ToneSandhi
 from paddlespeech.t2s.frontend.zh_normalization.text_normlization import TextNormalizer
+from paddlespeech.t2s.ssml.xml_processor import MixTextProcessor
 
 INITIALS = [
     'b', 'p', 'm', 'f', 'd', 't', 'n', 'l', 'g', 'k', 'h', 'zh', 'ch', 'sh',
@@ -81,6 +83,7 @@ class Frontend():
                  g2p_model="g2pW",
                  phone_vocab_path=None,
                  tone_vocab_path=None):
+        self.mix_ssml_processor = MixTextProcessor()
         self.tone_modifier = ToneSandhi()
         self.text_normalizer = TextNormalizer()
         self.punc = "：，；。？！“”‘’':,;.?!"
@@ -143,6 +146,7 @@ class Frontend():
                 tone_id = [line.strip().split() for line in f.readlines()]
             for tone, id in tone_id:
                 self.vocab_tones[tone] = int(id)
+        self.mix_ssml_processor.__repr__()
 
     def _init_pypinyin(self):
         large_pinyin.load()
@@ -281,6 +285,65 @@ class Frontend():
             phones_list.append(merge_list)
         return phones_list
 
+    def _split_word_to_char(self, words):
+        res = []
+        for x in words:
+            res.append(x)
+        return res
+
+    # if using ssml, have pingyin specified, assign pinyin to words
+    def _g2p_assign(self,
+                    words: List[str],
+                    pinyin_spec: List[str],
+                    merge_sentences: bool=True) -> List[List[str]]:
+        phones_list = []
+        initials = []
+        finals = []
+
+        words = self._split_word_to_char(words[0])
+        for pinyin, char in zip(pinyin_spec, words):
+            sub_initials = []
+            sub_finals = []
+            pinyin = pinyin.replace("u:", "v")
+            #self.pinyin2phone: is a dict with all pinyin mapped with sheng_mu yun_mu
+            if pinyin in self.pinyin2phone:
+                initial_final_list = self.pinyin2phone[pinyin].split(" ")
+                if len(initial_final_list) == 2:
+                    sub_initials.append(initial_final_list[0])
+                    sub_finals.append(initial_final_list[1])
+                elif len(initial_final_list) == 1:
+                    sub_initials.append('')
+                    sub_finals.append(initial_final_list[1])
+            else:
+                # If it's not pinyin (possibly punctuation) or no conversion is required
+                sub_initials.append(pinyin)
+                sub_finals.append(pinyin)
+            initials.append(sub_initials)
+            finals.append(sub_finals)
+
+        initials = sum(initials, [])
+        finals = sum(finals, [])
+        phones = []
+        for c, v in zip(initials, finals):
+            # NOTE: post process for pypinyin outputs
+            # we discriminate i, ii and iii
+            if c and c not in self.punc:
+                phones.append(c)
+            if c and c in self.punc:
+                phones.append('sp')
+            if v and v not in self.punc:
+                phones.append(v)
+        phones_list.append(phones)
+        if merge_sentences:
+            merge_list = sum(phones_list, [])
+            # rm the last 'sp' to avoid the noise at the end
+            # cause in the training data, no 'sp' in the end
+            if merge_list[-1] == 'sp':
+                merge_list = merge_list[:-1]
+            phones_list = []
+            phones_list.append(merge_list)
+        return phones_list
+
     def _merge_erhua(self,
                      initials: List[str],
                      finals: List[str],
@@ -396,6 +459,52 @@ class Frontend():
             print("----------------------------")
         return phonemes
 
+    #@an added for ssml pinyin 
+    def get_phonemes_ssml(self,
+                          ssml_inputs: list,
+                          merge_sentences: bool=True,
+                          with_erhua: bool=True,
+                          robot: bool=False,
+                          print_info: bool=False) -> List[List[str]]:
+        all_phonemes = []
+        for word_pinyin_item in ssml_inputs:
+            phonemes = []
+            sentence, pinyin_spec = itemgetter(0, 1)(word_pinyin_item)
+            sentences = self.text_normalizer.normalize(sentence)
+            if len(pinyin_spec) == 0:
+                phonemes = self._g2p(
+                    sentences,
+                    merge_sentences=merge_sentences,
+                    with_erhua=with_erhua)
+            else:
+                # phonemes should be pinyin_spec 
+                phonemes = self._g2p_assign(
+                    sentences, pinyin_spec, merge_sentences=merge_sentences)
+
+            all_phonemes = all_phonemes + phonemes
+
+        if robot:
+            new_phonemes = []
+            for sentence in all_phonemes:
+                new_sentence = []
+                for item in sentence:
+                    # `er` only have tone `2`
+                    if item[-1] in "12345" and item != "er2":
+                        item = item[:-1] + "1"
+                    new_sentence.append(item)
+                new_phonemes.append(new_sentence)
+            all_phonemes = new_phonemes
+
+        if print_info:
+            print("----------------------------")
+            print("text norm results:")
+            print(sentences)
+            print("----------------------------")
+            print("g2p results:")
+            print(all_phonemes[0])
+            print("----------------------------")
+        return [sum(all_phonemes, [])]
+
     def get_input_ids(self,
                       sentence: str,
                       merge_sentences: bool=True,
@@ -405,6 +514,7 @@ class Frontend():
                       add_blank: bool=False,
                       blank_token: str="<pad>",
                       to_tensor: bool=True) -> Dict[str, List[paddle.Tensor]]:
+
         phonemes = self.get_phonemes(
             sentence,
             merge_sentences=merge_sentences,
@@ -437,3 +547,49 @@ class Frontend():
         if temp_phone_ids:
             result["phone_ids"] = temp_phone_ids
         return result
+
+    # @an added for ssml
+    def get_input_ids_ssml(
+            self,
+            sentence: str,
+            merge_sentences: bool=True,
+            get_tone_ids: bool=False,
+            robot: bool=False,
+            print_info: bool=False,
+            add_blank: bool=False,
+            blank_token: str="<pad>",
+            to_tensor: bool=True) -> Dict[str, List[paddle.Tensor]]:
+
+        l_inputs = MixTextProcessor.get_pinyin_split(sentence)
+        phonemes = self.get_phonemes_ssml(
+            l_inputs,
+            merge_sentences=merge_sentences,
+            print_info=print_info,
+            robot=robot)
+        result = {}
+        phones = []
+        tones = []
+        temp_phone_ids = []
+        temp_tone_ids = []
+
+        for part_phonemes in phonemes:
+            phones, tones = self._get_phone_tone(
+                part_phonemes, get_tone_ids=get_tone_ids)
+            if add_blank:
+                phones = insert_after_character(phones, blank_token)
+            if tones:
+                tone_ids = self._t2id(tones)
+                if to_tensor:
+                    tone_ids = paddle.to_tensor(tone_ids)
+                temp_tone_ids.append(tone_ids)
+            if phones:
+                phone_ids = self._p2id(phones)
+                # if use paddle.to_tensor() in onnxruntime, the first time will be too low
+                if to_tensor:
+                    phone_ids = paddle.to_tensor(phone_ids)
+                temp_phone_ids.append(phone_ids)
+        if temp_tone_ids:
+            result["tone_ids"] = temp_tone_ids
+        if temp_phone_ids:
+            result["phone_ids"] = temp_phone_ids
+        return result
diff --git a/paddlespeech/t2s/ssml/xml_processor.py b/paddlespeech/t2s/ssml/xml_processor.py
new file mode 100644
index 000000000..54f24f59f
--- /dev/null
+++ b/paddlespeech/t2s/ssml/xml_processor.py
@@ -0,0 +1,163 @@
+# -*- coding: utf-8 -*-
+import re
+import xml.dom.minidom
+import xml.parsers.expat
+from xml.dom.minidom import Node
+from xml.dom.minidom import parseString
+'''
+Note:  xml 有5种特殊字符， &<>"'
+其一，采用<![CDATA[ ]]>特殊标签，将包含特殊字符的字符串封装起来。
+例如：
+<TitleName><![CDATA["姓名"]]></TitleName>
+其二，使用XML转义序列表示这些特殊的字符，这5个特殊字符所对应XML转义序列为：
+&  &amp;
+<  &lt;
+>  &gt;
+"  &quot;
+'  &apos;
+例如：
+<TitleName>&quot;姓名&quot;</TitleName>
+
+'''
+
+
+class MixTextProcessor():
+    def __repr__(self):
+        print("@an MixTextProcessor class")
+
+    def get_xml_content(self, mixstr):
+        '''返回字符串的 xml 内容'''
+        xmlptn = re.compile(r"<speak>.*?</speak>", re.M | re.S)
+        ctn = re.search(xmlptn, mixstr)
+        if ctn:
+            return ctn.group(0)
+        else:
+            return None
+
+    def get_content_split(self, mixstr):
+        ''' 文本分解，顺序加了列表中，按非xml 和 xml 分开，对应的字符串,带标点符号
+        不能去除空格，因为xml 中tag 属性带空格
+        '''
+        ctlist = []
+        # print("Testing:",mixstr[:20])
+        patn = re.compile(r'(.*\s*?)(<speak>.*?</speak>)(.*\s*)$', re.M | re.S)
+        mat = re.match(patn, mixstr)
+        if mat:
+            pre_xml = mat.group(1)
+            in_xml = mat.group(2)
+            after_xml = mat.group(3)
+
+            ctlist.append(pre_xml)
+            ctlist.append(in_xml)
+            ctlist.append(after_xml)
+            return ctlist
+        else:
+            ctlist.append(mixstr)
+        return ctlist
+
+    @classmethod
+    def get_pinyin_split(self, mixstr):
+        ctlist = []
+        patn = re.compile(r'(.*\s*?)(<speak>.*?</speak>)(.*\s*)$', re.M | re.S)
+        mat = re.match(patn, mixstr)
+        if mat:
+            pre_xml = mat.group(1)
+            in_xml = mat.group(2)
+            after_xml = mat.group(3)
+
+            ctlist.append([pre_xml, []])
+            dom = DomXml(in_xml)
+            pinyinlist = dom.get_pinyins_for_xml()
+            ctlist = ctlist + pinyinlist
+            ctlist.append([after_xml, []])
+        else:
+            ctlist.append([mixstr, []])
+        return ctlist
+
+
+class DomXml():
+    def __init__(self, xmlstr):
+        print("Parse xml str:", xmlstr)
+        self.tdom = parseString(xmlstr)  #Document
+        # print("tdom:",type(self.tdom)) 
+        self.root = self.tdom.documentElement  #Element
+        # print("root:",type(self.root)) 
+        self.rnode = self.tdom.childNodes  #NodeList
+        # print("rnode:",type(self.rnode))
+        pass
+
+    def get_text(self):
+        '''返回xml 内容的所有文本内容的 列表'''
+        res = []
+
+        for x1 in self.rnode:
+            if x1.nodeType == Node.TEXT_NODE:
+                res.append(x1.value)
+            else:
+                for x2 in x1.childNodes:
+                    if isinstance(x2, xml.dom.minidom.Text):
+                        res.append(x2.data)
+                    else:
+                        for x3 in x2.childNodes:
+                            if isinstance(x3, xml.dom.minidom.Text):
+                                res.append(x3.data)
+                            else:
+                                print("len(nodes of x3):", len(x3.childNodes))
+
+        return res
+
+    def get_xmlchild_list(self):
+        '''返回xml 内容的列表， 包括所有文本内容(不带tag)'''
+        res = []
+
+        for x1 in self.rnode:
+            if x1.nodeType == Node.TEXT_NODE:
+                res.append(x1.value)
+            else:
+                for x2 in x1.childNodes:
+                    if isinstance(x2, xml.dom.minidom.Text):
+                        res.append(x2.data)
+                    else:
+                        for x3 in x2.childNodes:
+                            if isinstance(x3, xml.dom.minidom.Text):
+                                res.append(x3.data)
+                            else:
+                                print("len(nodes of x3):", len(x3.childNodes))
+        print(res)
+        return res
+
+    def get_pinyins_for_xml(self):
+        '''返回xml 内容，如果字符串 和 拼音的 list , 如 ['''
+        res = []
+
+        for x1 in self.rnode:
+            if x1.nodeType == Node.TEXT_NODE:
+                t = re.sub(r"\s+", "", x1.value)
+                res.append([t, []])
+            else:
+                for x2 in x1.childNodes:
+                    if isinstance(x2, xml.dom.minidom.Text):
+                        t = re.sub(r"\s+", "", x2.data)
+                        res.append([t, []])
+                    else:
+                        # print("x2",x2,x2.tagName)
+                        if x2.hasAttribute('pinyin'):
+                            pinyin_value = x2.getAttribute("pinyin")
+                            pinyins = pinyin_value.split(" ")
+                        for x3 in x2.childNodes:
+                            # print('x3',x3)
+                            if isinstance(x3, xml.dom.minidom.Text):
+                                t = re.sub(r"\s+", "", x3.data)
+                                res.append([t, pinyins])
+                            else:
+                                print("len(nodes of x3):", len(x3.childNodes))
+
+        return res
+
+    def get_all_tags(self, tag_name):
+        '''获取所有的tag 及属性值'''
+        alltags = self.root.getElementsByTagName(tag_name)
+        for x in alltags:
+            if x.hasAttribute('pinyin'):  # pinyin
+                print(x.tagName, 'pinyin',
+                      x.getAttribute('pinyin'), x.firstChild.data)

From 278c7a41a83412f02bc4b0b98832c5076f0940cf Mon Sep 17 00:00:00 2001
From: "david.95" <david.95@live.cn>
Date: Mon, 17 Oct 2022 14:59:23 +0800
Subject: [PATCH 02/20] add module define to fix ci, test=tts

---
 paddlespeech/t2s/__init__.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/paddlespeech/t2s/__init__.py b/paddlespeech/t2s/__init__.py
index 7d93c026e..57fe82a9c 100644
--- a/paddlespeech/t2s/__init__.py
+++ b/paddlespeech/t2s/__init__.py
@@ -18,5 +18,6 @@ from . import exps
 from . import frontend
 from . import models
 from . import modules
+from . import ssml
 from . import training
 from . import utils

From 29508f400b23211c9e7380800e2d02c9a16a426f Mon Sep 17 00:00:00 2001
From: "david.95" <david.95@live.cn>
Date: Mon, 17 Oct 2022 16:44:29 +0800
Subject: [PATCH 03/20] to fix CI issue, test=tts

---
 paddlespeech/t2s/ssml/__init__.py | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 create mode 100644 paddlespeech/t2s/ssml/__init__.py

diff --git a/paddlespeech/t2s/ssml/__init__.py b/paddlespeech/t2s/ssml/__init__.py
new file mode 100644
index 000000000..e69de29bb

From f56cc08b18f5fb6fc3254db4dd40ec3597d34f36 Mon Sep 17 00:00:00 2001
From: "david.95" <david.95@live.cn>
Date: Mon, 17 Oct 2022 16:55:07 +0800
Subject: [PATCH 04/20] add license content, test=tts

---
 paddlespeech/t2s/ssml/__init__.py | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/paddlespeech/t2s/ssml/__init__.py b/paddlespeech/t2s/ssml/__init__.py
index e69de29bb..abf198b97 100644
--- a/paddlespeech/t2s/ssml/__init__.py
+++ b/paddlespeech/t2s/ssml/__init__.py
@@ -0,0 +1,13 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.

From 1067088debd49ba308fc55a8c55d1d04f211ff51 Mon Sep 17 00:00:00 2001
From: "david.95" <david.95@live.cn>
Date: Mon, 17 Oct 2022 17:18:27 +0800
Subject: [PATCH 05/20] modify __init__

---
 paddlespeech/t2s/ssml/__init__.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/paddlespeech/t2s/ssml/__init__.py b/paddlespeech/t2s/ssml/__init__.py
index abf198b97..f344250d2 100644
--- a/paddlespeech/t2s/ssml/__init__.py
+++ b/paddlespeech/t2s/ssml/__init__.py
@@ -11,3 +11,5 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+
+from .xml_processor import *

From 89e9ea69ebb884d5ba13d02c66c29475a153f2ea Mon Sep 17 00:00:00 2001
From: "david.95" <david.95@live.cn>
Date: Mon, 17 Oct 2022 17:29:46 +0800
Subject: [PATCH 06/20] modify __init__

---
 paddlespeech/t2s/ssml/__init__.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/paddlespeech/t2s/ssml/__init__.py b/paddlespeech/t2s/ssml/__init__.py
index f344250d2..9b4db053b 100644
--- a/paddlespeech/t2s/ssml/__init__.py
+++ b/paddlespeech/t2s/ssml/__init__.py
@@ -11,5 +11,4 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
 from .xml_processor import *

From f295d2d4450099f2cf8b7e2d417a9c9599230563 Mon Sep 17 00:00:00 2001
From: "david.95" <david.95@live.cn>
Date: Mon, 17 Oct 2022 18:00:13 +0800
Subject: [PATCH 07/20] remove useless code

---
 paddlespeech/t2s/frontend/zh_frontend.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/paddlespeech/t2s/frontend/zh_frontend.py b/paddlespeech/t2s/frontend/zh_frontend.py
index 25558780b..e30286986 100644
--- a/paddlespeech/t2s/frontend/zh_frontend.py
+++ b/paddlespeech/t2s/frontend/zh_frontend.py
@@ -146,7 +146,6 @@ class Frontend():
                 tone_id = [line.strip().split() for line in f.readlines()]
             for tone, id in tone_id:
                 self.vocab_tones[tone] = int(id)
-        self.mix_ssml_processor.__repr__()
 
     def _init_pypinyin(self):
         large_pinyin.load()

From 050d766915c01a59fd4880dfb263dbc30605944f Mon Sep 17 00:00:00 2001
From: tianhao zhang <15600919271@163.com>
Date: Wed, 19 Oct 2022 05:31:18 +0000
Subject: [PATCH 08/20] fix u2pp model

---
 docs/source/released_model.md              |  2 +-
 paddlespeech/cli/asr/infer.py              |  4 ++--
 paddlespeech/resource/model_alias.py       |  1 -
 paddlespeech/resource/pretrained_models.py | 26 +++-------------------
 4 files changed, 6 insertions(+), 27 deletions(-)

diff --git a/docs/source/released_model.md b/docs/source/released_model.md
index a2456f1fe..586f17c34 100644
--- a/docs/source/released_model.md
+++ b/docs/source/released_model.md
@@ -9,7 +9,7 @@ Acoustic Model | Training Data | Token-based | Size | Descriptions | CER | WER |
 [Ds2 Online Aishell ASR0 Model](https://paddlespeech.bj.bcebos.com/s2t/aishell/asr0/asr0_deepspeech2_online_aishell_fbank161_ckpt_0.2.1.model.tar.gz) | Aishell Dataset | Char-based | 491 MB  | 2 Conv + 5 LSTM layers | 0.0666 |-| 151 h | [D2 Online Aishell ASR0](../../examples/aishell/asr0) | onnx/inference/python |
 [Ds2 Offline Aishell ASR0 Model](https://paddlespeech.bj.bcebos.com/s2t/aishell/asr0/asr0_deepspeech2_offline_aishell_ckpt_1.0.1.model.tar.gz)| Aishell Dataset | Char-based | 1.4 GB | 2 Conv + 5 bidirectional LSTM layers| 0.0554 |-| 151 h | [Ds2 Offline Aishell ASR0](../../examples/aishell/asr0) | inference/python |
 [Conformer Online Wenetspeech ASR1 Model](https://paddlespeech.bj.bcebos.com/s2t/wenetspeech/asr1/asr1_chunk_conformer_wenetspeech_ckpt_1.0.0a.model.tar.gz) | WenetSpeech Dataset | Char-based | 457 MB  | Encoder:Conformer, Decoder:Transformer, Decoding method: Attention rescoring| 0.11 (test\_net) 0.1879 (test\_meeting) |-| 10000 h |- | python |
-[Conformer U2PP Online Wenetspeech ASR1 Model](https://paddlespeech.bj.bcebos.com/s2t/wenetspeech/asr1/asr1_chunk_conformer_u2pp_wenetspeech_ckpt_1.1.4.model.tar.gz) | WenetSpeech Dataset | Char-based | 476 MB  | Encoder:Conformer, Decoder:BiTransformer, Decoding method: Attention rescoring| 0.047198 (aishell test\_-1) 0.059212 (aishell test\_16) |-| 10000 h |- | python |
+[Conformer U2PP Online Wenetspeech ASR1 Model](https://paddlespeech.bj.bcebos.com/s2t/wenetspeech/asr1/asr1_chunk_conformer_u2pp_wenetspeech_ckpt_1.3.0.model.tar.gz) | WenetSpeech Dataset | Char-based | 476 MB  | Encoder:Conformer, Decoder:BiTransformer, Decoding method: Attention rescoring| 0.047198 (aishell test\_-1) 0.059212 (aishell test\_16) |-| 10000 h |- | python |
 [Conformer Online Aishell ASR1 Model](https://paddlespeech.bj.bcebos.com/s2t/aishell/asr1/asr1_chunk_conformer_aishell_ckpt_0.2.0.model.tar.gz) | Aishell Dataset | Char-based | 189 MB  | Encoder:Conformer, Decoder:Transformer, Decoding method: Attention rescoring| 0.0544 |-| 151 h | [Conformer Online Aishell ASR1](../../examples/aishell/asr1) | python |
 [Conformer Offline Aishell ASR1 Model](https://paddlespeech.bj.bcebos.com/s2t/aishell/asr1/asr1_conformer_aishell_ckpt_1.0.1.model.tar.gz) | Aishell Dataset | Char-based | 189 MB  | Encoder:Conformer, Decoder:Transformer, Decoding method: Attention rescoring | 0.0460 |-| 151 h | [Conformer Offline Aishell ASR1](../../examples/aishell/asr1) | python |
 [Transformer Aishell ASR1 Model](https://paddlespeech.bj.bcebos.com/s2t/aishell/asr1/asr1_transformer_aishell_ckpt_0.1.1.model.tar.gz) | Aishell Dataset | Char-based | 128 MB | Encoder:Transformer, Decoder:Transformer, Decoding method: Attention rescoring | 0.0523 || 151 h | [Transformer  Aishell ASR1](../../examples/aishell/asr1) | python |
diff --git a/paddlespeech/cli/asr/infer.py b/paddlespeech/cli/asr/infer.py
index 437f64631..004143361 100644
--- a/paddlespeech/cli/asr/infer.py
+++ b/paddlespeech/cli/asr/infer.py
@@ -52,7 +52,7 @@ class ASRExecutor(BaseExecutor):
         self.parser.add_argument(
             '--model',
             type=str,
-            default='conformer_u2pp_wenetspeech',
+            default='conformer_u2pp_online_wenetspeech',
             choices=[
                 tag[:tag.index('-')]
                 for tag in self.task_resource.pretrained_models.keys()
@@ -470,7 +470,7 @@ class ASRExecutor(BaseExecutor):
     @stats_wrapper
     def __call__(self,
                  audio_file: os.PathLike,
-                 model: str='conformer_u2pp_wenetspeech',
+                 model: str='conformer_u2pp_online_wenetspeech',
                  lang: str='zh',
                  sample_rate: int=16000,
                  config: os.PathLike=None,
diff --git a/paddlespeech/resource/model_alias.py b/paddlespeech/resource/model_alias.py
index f5ec655b7..8e9ecc4ba 100644
--- a/paddlespeech/resource/model_alias.py
+++ b/paddlespeech/resource/model_alias.py
@@ -25,7 +25,6 @@ model_alias = {
     "deepspeech2online": ["paddlespeech.s2t.models.ds2:DeepSpeech2Model"],
     "conformer": ["paddlespeech.s2t.models.u2:U2Model"],
     "conformer_online": ["paddlespeech.s2t.models.u2:U2Model"],
-    "conformer_u2pp": ["paddlespeech.s2t.models.u2:U2Model"],
     "conformer_u2pp_online": ["paddlespeech.s2t.models.u2:U2Model"],
     "transformer": ["paddlespeech.s2t.models.u2:U2Model"],
     "wenetspeech": ["paddlespeech.s2t.models.u2:U2Model"],
diff --git a/paddlespeech/resource/pretrained_models.py b/paddlespeech/resource/pretrained_models.py
index efd6bb3f2..df50a6a9d 100644
--- a/paddlespeech/resource/pretrained_models.py
+++ b/paddlespeech/resource/pretrained_models.py
@@ -68,32 +68,12 @@ asr_dynamic_pretrained_models = {
             '',
         },
     },
-    "conformer_u2pp_wenetspeech-zh-16k": {
-        '1.1': {
-            'url':
-            'https://paddlespeech.bj.bcebos.com/s2t/wenetspeech/asr1/asr1_chunk_conformer_u2pp_wenetspeech_ckpt_1.1.3.model.tar.gz',
-            'md5':
-            '662b347e1d2131b7a4dc5398365e2134',
-            'cfg_path':
-            'model.yaml',
-            'ckpt_path':
-            'exp/chunk_conformer_u2pp/checkpoints/avg_10',
-            'model':
-            'exp/chunk_conformer_u2pp/checkpoints/avg_10.pdparams',
-            'params':
-            'exp/chunk_conformer_u2pp/checkpoints/avg_10.pdparams',
-            'lm_url':
-            '',
-            'lm_md5':
-            '',
-        },
-    },
     "conformer_u2pp_online_wenetspeech-zh-16k": {
-        '1.1': {
+        '1.3': {
             'url':
-            'https://paddlespeech.bj.bcebos.com/s2t/wenetspeech/asr1/asr1_chunk_conformer_u2pp_wenetspeech_ckpt_1.1.4.model.tar.gz',
+            'https://paddlespeech.bj.bcebos.com/s2t/wenetspeech/asr1/asr1_chunk_conformer_u2pp_wenetspeech_ckpt_1.3.0.model.tar.gz',
             'md5':
-            '3100fc1eac5779486cab859366992d0b',
+            '62d230c1bf27731192aa9d3b8deca300',
             'cfg_path':
             'model.yaml',
             'ckpt_path':

From cb76e664017f15b7963eca0e126e5429f0a58ba9 Mon Sep 17 00:00:00 2001
From: dahu1 <707133607@qq.com>
Date: Wed, 19 Oct 2022 15:54:08 +0800
Subject: [PATCH 09/20] =?UTF-8?q?1.token=E9=85=8D=E7=BD=AE=E4=B8=8D?=
 =?UTF-8?q?=E5=86=99=E6=AD=BB=EF=BC=8C2.text=E6=98=BE=E7=A4=BA=E4=B8=8D?=
 =?UTF-8?q?=E4=B9=B1=E7=A0=81,=20test=3Dasr?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../text/exps/ernie_linear/punc_restore.py        | 15 ++++++++++-----
 1 file changed, 10 insertions(+), 5 deletions(-)

diff --git a/paddlespeech/text/exps/ernie_linear/punc_restore.py b/paddlespeech/text/exps/ernie_linear/punc_restore.py
index 2cb4d0719..98804606c 100644
--- a/paddlespeech/text/exps/ernie_linear/punc_restore.py
+++ b/paddlespeech/text/exps/ernie_linear/punc_restore.py
@@ -25,8 +25,6 @@ DefinedClassifier = {
     'ErnieLinear': ErnieLinear,
 }
 
-tokenizer = ErnieTokenizer.from_pretrained('ernie-1.0')
-
 
 def _clean_text(text, punc_list):
     text = text.lower()
@@ -35,7 +33,7 @@ def _clean_text(text, punc_list):
     return text
 
 
-def preprocess(text, punc_list):
+def preprocess(text, punc_list, tokenizer):
     clean_text = _clean_text(text, punc_list)
     assert len(clean_text) > 0, f'Invalid input string: {text}'
     tokenized_input = tokenizer(
@@ -51,7 +49,8 @@ def test(args):
     with open(args.config) as f:
         config = CfgNode(yaml.safe_load(f))
     print("========Args========")
-    print(yaml.safe_dump(vars(args)))
+    print(yaml.safe_dump(vars(args), allow_unicode=True))
+    # print(args)
     print("========Config========")
     print(config)
 
@@ -61,10 +60,16 @@ def test(args):
             punc_list.append(line.strip())
 
     model = DefinedClassifier[config["model_type"]](**config["model"])
+    # print(model)
+
+    pretrained_token = config['data_params']['pretrained_token']
+    tokenizer = ErnieTokenizer.from_pretrained(pretrained_token)
+    # tokenizer = ErnieTokenizer.from_pretrained('ernie-1.0')
+
     state_dict = paddle.load(args.checkpoint)
     model.set_state_dict(state_dict["main_params"])
     model.eval()
-    _inputs = preprocess(args.text, punc_list)
+    _inputs = preprocess(args.text, punc_list, tokenizer)
     seq_len = _inputs['seq_len']
     input_ids = paddle.to_tensor(_inputs['input_ids']).unsqueeze(0)
     seg_ids = paddle.to_tensor(_inputs['seg_ids']).unsqueeze(0)

From 3ac7ac253f66c46f01aa11be3de95d6177f47107 Mon Sep 17 00:00:00 2001
From: "david.95" <david.95@live.cn>
Date: Thu, 20 Oct 2022 09:29:11 +0800
Subject: [PATCH 10/20] fix review issue,test=tts

---
 paddlespeech/t2s/ssml/xml_processor.py | 17 ++++++-----------
 1 file changed, 6 insertions(+), 11 deletions(-)

diff --git a/paddlespeech/t2s/ssml/xml_processor.py b/paddlespeech/t2s/ssml/xml_processor.py
index 54f24f59f..b39121347 100644
--- a/paddlespeech/t2s/ssml/xml_processor.py
+++ b/paddlespeech/t2s/ssml/xml_processor.py
@@ -35,8 +35,8 @@ class MixTextProcessor():
             return None
 
     def get_content_split(self, mixstr):
-        ''' 文本分解，顺序加了列表中，按非xml 和 xml 分开，对应的字符串,带标点符号
-        不能去除空格，因为xml 中tag 属性带空格
+        ''' 文本分解，顺序加了列表中，按非 xml 和 xml 分开，对应的字符串,带标点符号
+        不能去除空格，因为 xml 中tag 属性带空格
         '''
         ctlist = []
         # print("Testing:",mixstr[:20])
@@ -77,17 +77,12 @@ class MixTextProcessor():
 
 class DomXml():
     def __init__(self, xmlstr):
-        print("Parse xml str:", xmlstr)
         self.tdom = parseString(xmlstr)  #Document
-        # print("tdom:",type(self.tdom)) 
         self.root = self.tdom.documentElement  #Element
-        # print("root:",type(self.root)) 
         self.rnode = self.tdom.childNodes  #NodeList
-        # print("rnode:",type(self.rnode))
-        pass
 
     def get_text(self):
-        '''返回xml 内容的所有文本内容的 列表'''
+        '''返回 xml 内容的所有文本内容的列表'''
         res = []
 
         for x1 in self.rnode:
@@ -107,7 +102,7 @@ class DomXml():
         return res
 
     def get_xmlchild_list(self):
-        '''返回xml 内容的列表， 包括所有文本内容(不带tag)'''
+        '''返回 xml 内容的列表，包括所有文本内容(不带 tag)'''
         res = []
 
         for x1 in self.rnode:
@@ -127,7 +122,7 @@ class DomXml():
         return res
 
     def get_pinyins_for_xml(self):
-        '''返回xml 内容，如果字符串 和 拼音的 list , 如 ['''
+        '''返回 xml 内容，字符串和拼音的 list '''
         res = []
 
         for x1 in self.rnode:
@@ -155,7 +150,7 @@ class DomXml():
         return res
 
     def get_all_tags(self, tag_name):
-        '''获取所有的tag 及属性值'''
+        '''获取所有的 tag 及属性值'''
         alltags = self.root.getElementsByTagName(tag_name)
         for x in alltags:
             if x.hasAttribute('pinyin'):  # pinyin

From 7d5ae651ce92d0bd953f0de54b81d00cf951b01d Mon Sep 17 00:00:00 2001
From: "david.95" <david.95@live.cn>
Date: Thu, 20 Oct 2022 10:07:21 +0800
Subject: [PATCH 11/20] add readme thanks

---
 README.md    | 2 +-
 README_cn.md | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index 49e40624d..0abb3fd69 100644
--- a/README.md
+++ b/README.md
@@ -923,7 +923,7 @@ You are warmly welcome to submit questions in [discussions](https://github.com/P
 
 ## Acknowledgement
 - Many thanks to [HighCWu](https://github.com/HighCWu) for adding [VITS-aishell3](./examples/aishell3/vits) and [VITS-VC](./examples/aishell3/vits-vc) examples.
-- Many thanks to [david-95](https://github.com/david-95) improved TTS, fixed multi-punctuation bug, and contributed to multiple program and data. 
+- Many thanks to [david-95](https://github.com/david-95) improved TTS, fixed multi-punctuation bug, and contributed to multiple program and data.Added SSML for Chinese Text Frontend. 
 - Many thanks to [BarryKCL](https://github.com/BarryKCL) improved TTS Chinses frontend based on [G2PW](https://github.com/GitYCC/g2pW).
 - Many thanks to [yeyupiaoling](https://github.com/yeyupiaoling)/[PPASR](https://github.com/yeyupiaoling/PPASR)/[PaddlePaddle-DeepSpeech](https://github.com/yeyupiaoling/PaddlePaddle-DeepSpeech)/[VoiceprintRecognition-PaddlePaddle](https://github.com/yeyupiaoling/VoiceprintRecognition-PaddlePaddle)/[AudioClassification-PaddlePaddle](https://github.com/yeyupiaoling/AudioClassification-PaddlePaddle) for years of attention, constructive advice and great help.
 - Many thanks to [mymagicpower](https://github.com/mymagicpower) for the Java implementation of ASR upon [short](https://github.com/mymagicpower/AIAS/tree/main/3_audio_sdks/asr_sdk) and [long](https://github.com/mymagicpower/AIAS/tree/main/3_audio_sdks/asr_long_audio_sdk) audio files.
diff --git a/README_cn.md b/README_cn.md
index bf3ff4dfd..0c3af5dd4 100644
--- a/README_cn.md
+++ b/README_cn.md
@@ -928,7 +928,7 @@ PaddleSpeech 的 **语音合成** 主要包含三个模块：文本前端、声
 
 ## 致谢
 - 非常感谢 [HighCWu](https://github.com/HighCWu) 新增 [VITS-aishell3](./examples/aishell3/vits) 和 [VITS-VC](./examples/aishell3/vits-vc) 代码示例。
-- 非常感谢 [david-95](https://github.com/david-95) 修复句尾多标点符号出错的问题，贡献补充多条程序和数据。
+- 非常感谢 [david-95](https://github.com/david-95) 修复句尾多标点符号出错的问题，贡献补充多条程序和数据。新增 SSML 中文文本前端处理。
 - 非常感谢 [BarryKCL](https://github.com/BarryKCL) 基于 [G2PW](https://github.com/GitYCC/g2pW) 对 TTS 中文文本前端的优化。
 - 非常感谢 [yeyupiaoling](https://github.com/yeyupiaoling)/[PPASR](https://github.com/yeyupiaoling/PPASR)/[PaddlePaddle-DeepSpeech](https://github.com/yeyupiaoling/PaddlePaddle-DeepSpeech)/[VoiceprintRecognition-PaddlePaddle](https://github.com/yeyupiaoling/VoiceprintRecognition-PaddlePaddle)/[AudioClassification-PaddlePaddle](https://github.com/yeyupiaoling/AudioClassification-PaddlePaddle) 多年来的关注和建议，以及在诸多问题上的帮助。
 - 非常感谢 [mymagicpower](https://github.com/mymagicpower) 采用PaddleSpeech 对 ASR 的[短语音](https://github.com/mymagicpower/AIAS/tree/main/3_audio_sdks/asr_sdk)及[长语音](https://github.com/mymagicpower/AIAS/tree/main/3_audio_sdks/asr_long_audio_sdk)进行 Java 实现。

From ec1f9edd562275e2d2799c16e36a304bae172e1c Mon Sep 17 00:00:00 2001
From: "david.95" <david.95@live.cn>
Date: Thu, 20 Oct 2022 10:11:26 +0800
Subject: [PATCH 12/20] add space after punctions

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 0abb3fd69..d02ac4c6b 100644
--- a/README.md
+++ b/README.md
@@ -923,7 +923,7 @@ You are warmly welcome to submit questions in [discussions](https://github.com/P
 
 ## Acknowledgement
 - Many thanks to [HighCWu](https://github.com/HighCWu) for adding [VITS-aishell3](./examples/aishell3/vits) and [VITS-VC](./examples/aishell3/vits-vc) examples.
-- Many thanks to [david-95](https://github.com/david-95) improved TTS, fixed multi-punctuation bug, and contributed to multiple program and data.Added SSML for Chinese Text Frontend. 
+- Many thanks to [david-95](https://github.com/david-95) improved TTS, fixed multi-punctuation bug, and contributed to multiple program and data. Added SSML for Chinese Text Frontend. 
 - Many thanks to [BarryKCL](https://github.com/BarryKCL) improved TTS Chinses frontend based on [G2PW](https://github.com/GitYCC/g2pW).
 - Many thanks to [yeyupiaoling](https://github.com/yeyupiaoling)/[PPASR](https://github.com/yeyupiaoling/PPASR)/[PaddlePaddle-DeepSpeech](https://github.com/yeyupiaoling/PaddlePaddle-DeepSpeech)/[VoiceprintRecognition-PaddlePaddle](https://github.com/yeyupiaoling/VoiceprintRecognition-PaddlePaddle)/[AudioClassification-PaddlePaddle](https://github.com/yeyupiaoling/AudioClassification-PaddlePaddle) for years of attention, constructive advice and great help.
 - Many thanks to [mymagicpower](https://github.com/mymagicpower) for the Java implementation of ASR upon [short](https://github.com/mymagicpower/AIAS/tree/main/3_audio_sdks/asr_sdk) and [long](https://github.com/mymagicpower/AIAS/tree/main/3_audio_sdks/asr_long_audio_sdk) audio files.

From da525d346f0a78fc1b6f11db408a5ce1a76c5610 Mon Sep 17 00:00:00 2001
From: TianYuan <white-sky@qq.com>
Date: Thu, 20 Oct 2022 06:17:17 +0000
Subject: [PATCH 13/20] fix uvicorn's version

---
 setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/setup.py b/setup.py
index e551d9fa6..3353cdada 100644
--- a/setup.py
+++ b/setup.py
@@ -77,7 +77,7 @@ base = [
     "pybind11",
 ]
 
-server = ["fastapi", "uvicorn", "pattern_singleton", "websockets"]
+server = ["fastapi", "uvicorn<=0.18.3", "pattern_singleton", "websockets"]
 
 requirements = {
     "install":

From 63c80121e2c5691145a2bc8c49cf1a2b277c7067 Mon Sep 17 00:00:00 2001
From: TianYuan <white-sky@qq.com>
Date: Thu, 20 Oct 2022 06:33:07 +0000
Subject: [PATCH 14/20] fix uvicorn's bug

---
 paddlespeech/server/bin/paddlespeech_server.py | 2 +-
 setup.py                                       | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/paddlespeech/server/bin/paddlespeech_server.py b/paddlespeech/server/bin/paddlespeech_server.py
index 10a91d9be..1b1792bd1 100644
--- a/paddlespeech/server/bin/paddlespeech_server.py
+++ b/paddlespeech/server/bin/paddlespeech_server.py
@@ -113,7 +113,7 @@ class ServerExecutor(BaseExecutor):
         """
         config = get_config(config_file)
         if self.init(config):
-            uvicorn.run(app, host=config.host, port=config.port, debug=True)
+            uvicorn.run(app, host=config.host, port=config.port)
 
 
 @cli_server_register(
diff --git a/setup.py b/setup.py
index 3353cdada..e551d9fa6 100644
--- a/setup.py
+++ b/setup.py
@@ -77,7 +77,7 @@ base = [
     "pybind11",
 ]
 
-server = ["fastapi", "uvicorn<=0.18.3", "pattern_singleton", "websockets"]
+server = ["fastapi", "uvicorn", "pattern_singleton", "websockets"]
 
 requirements = {
     "install":

From ce153d915e512c5ab38e7791fb733540189ebfb1 Mon Sep 17 00:00:00 2001
From: tianhao zhang <15600919271@163.com>
Date: Thu, 20 Oct 2022 07:54:00 +0000
Subject: [PATCH 15/20] update u2pp result.md

---
 examples/wenetspeech/asr1/RESULTS.md | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

diff --git a/examples/wenetspeech/asr1/RESULTS.md b/examples/wenetspeech/asr1/RESULTS.md
index f22c652e6..cd480163e 100644
--- a/examples/wenetspeech/asr1/RESULTS.md
+++ b/examples/wenetspeech/asr1/RESULTS.md
@@ -53,3 +53,22 @@ Pretrain model from https://paddlespeech.bj.bcebos.com/s2t/wenetspeech/asr1/asr1
 | conformer | 32.52 M | conf/chunk_conformer.yaml | spec_aug  | aishell1 | ctc_greedy_search | -1 | 0.061884 |  
 | conformer | 32.52 M | conf/chunk_conformer.yaml | spec_aug  | aishell1 | ctc_prefix_beam_search | -1 | 0.062056 |  
 | conformer | 32.52 M | conf/chunk_conformer.yaml | spec_aug  | aishell1 | attention_rescoring | -1 |  0.052110 |
+
+
+## U2PP Steaming Pretrained Model
+
+Pretrain model from https://paddlespeech.bj.bcebos.com/s2t/wenetspeech/asr1/asr1_chunk_conformer_u2pp_wenetspeech_ckpt_1.3.0.model.tar.gz
+
+| Model | Params | Config | Augmentation| Test set | Decode method | Chunk Size | CER |  
+| --- | --- | --- | --- | --- | --- | --- | --- |
+| conformer | 122.88 M | conf/chunk_conformer.yaml | spec_aug  | aishell1 | attention | 16 | 0.057031 |  
+| conformer | 122.88 M | conf/chunk_conformer.yaml | spec_aug  | aishell1 | ctc_greedy_search | 16 | 0.068826 |  
+| conformer | 122.88 M | conf/chunk_conformer.yaml | spec_aug  | aishell1 | ctc_prefix_beam_search | 16 | 0.069111 |  
+| conformer | 122.88 M | conf/chunk_conformer.yaml | spec_aug  | aishell1 | attention_rescoring | 16 | 0.059213 |
+
+| Model | Params | Config | Augmentation| Test set | Decode method | Chunk Size | CER |  
+| --- | --- | --- | --- | --- | --- | --- | --- |
+| conformer | 122.88 M | conf/chunk_conformer.yaml | spec_aug  | aishell1 | attention | -1 | 0.049256 |  
+| conformer | 122.88 M | conf/chunk_conformer.yaml | spec_aug  | aishell1 | ctc_greedy_search | -1 | 0.052086 |  
+| conformer | 122.88 M | conf/chunk_conformer.yaml | spec_aug  | aishell1 | ctc_prefix_beam_search | -1 | 0.052267 |  
+| conformer | 122.88 M | conf/chunk_conformer.yaml | spec_aug  | aishell1 | attention_rescoring | -1 |  0.047198 |

From ed0138c6e324a87e31a23138bafe6f878ed8f4e9 Mon Sep 17 00:00:00 2001
From: "david.95" <david.95@live.cn>
Date: Thu, 20 Oct 2022 18:09:41 +0800
Subject: [PATCH 16/20] add condition check if a ssml input and filter space
 line, test=tts

---
 paddlespeech/t2s/exps/syn_utils.py | 36 +++++++++++++++++++-----------
 1 file changed, 23 insertions(+), 13 deletions(-)

diff --git a/paddlespeech/t2s/exps/syn_utils.py b/paddlespeech/t2s/exps/syn_utils.py
index f9d1cd1b5..41663891e 100644
--- a/paddlespeech/t2s/exps/syn_utils.py
+++ b/paddlespeech/t2s/exps/syn_utils.py
@@ -105,14 +105,15 @@ def get_sentences(text_file: Optional[os.PathLike], lang: str='zh'):
     sentences = []
     with open(text_file, 'rt') as f:
         for line in f:
-            items = re.split(r"\s+", line.strip(), 1)
-            utt_id = items[0]
-            if lang == 'zh':
-                sentence = "".join(items[1:])
-            elif lang == 'en':
-                sentence = " ".join(items[1:])
-            elif lang == 'mix':
-                sentence = " ".join(items[1:])
+            if line.strip() != "":
+                items = re.split(r"\s+", line.strip(), 1)
+                utt_id = items[0]
+                if lang == 'zh':
+                    sentence = "".join(items[1:])
+                elif lang == 'en':
+                    sentence = " ".join(items[1:])
+                elif lang == 'mix':
+                    sentence = " ".join(items[1:])
             sentences.append((utt_id, sentence))
     return sentences
 
@@ -182,11 +183,20 @@ def run_frontend(frontend: object,
                  to_tensor: bool=True):
     outs = dict()
     if lang == 'zh':
-        input_ids = frontend.get_input_ids_ssml(
-            text,
-            merge_sentences=merge_sentences,
-            get_tone_ids=get_tone_ids,
-            to_tensor=to_tensor)
+        input_ids = {}
+        if text.strip() != "" and re.match(r".*?<speak>.*?</speak>.*", text,
+                                           re.DOTALL):
+            input_ids = frontend.get_input_ids_ssml(
+                text,
+                merge_sentences=merge_sentences,
+                get_tone_ids=get_tone_ids,
+                to_tensor=to_tensor)
+        else:
+            input_ids = frontend.get_input_ids(
+                text,
+                merge_sentences=merge_sentences,
+                get_tone_ids=get_tone_ids,
+                to_tensor=to_tensor)
         phone_ids = input_ids["phone_ids"]
         if get_tone_ids:
             tone_ids = input_ids["tone_ids"]

From 4dfb3365f637b28b30f0359dd641f571800eb2a8 Mon Sep 17 00:00:00 2001
From: TianYuan <white-sky@qq.com>
Date: Fri, 21 Oct 2022 17:23:17 +0800
Subject: [PATCH 17/20] Update README.md

---
 README.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/README.md b/README.md
index d02ac4c6b..4ed1a022c 100644
--- a/README.md
+++ b/README.md
@@ -157,6 +157,7 @@ Via the easy-to-use, efficient, flexible and scalable implementation, our vision
   - 🧩  *Cascaded models application*: as an extension of the typical traditional audio tasks, we combine the workflows of the aforementioned tasks with other fields like Natural language processing (NLP) and Computer Vision (CV).
 
 ### Recent Update
+- 🎉 2022.10.21: Add [SSML](https://github.com/PaddlePaddle/PaddleSpeech/discussions/2538) for Chinese Text Frontend.
 - 👑 2022.10.11: Add [Wav2vec2ASR](./examples/librispeech/asr3), wav2vec2.0 fine-tuning for ASR on LibriSpeech.
 - 🔥 2022.09.26: Add Voice Cloning, TTS finetune, and ERNIE-SAT in [PaddleSpeech Web Demo](./demos/speech_web).
 - ⚡ 2022.09.09: Add AISHELL-3 Voice Cloning [example](./examples/aishell3/vc2) with ECAPA-TDNN speaker encoder.

From 7693bd1812086d2b5d5a19646e704a6155cb1103 Mon Sep 17 00:00:00 2001
From: TianYuan <white-sky@qq.com>
Date: Fri, 21 Oct 2022 17:24:40 +0800
Subject: [PATCH 18/20] Update README.md

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 4ed1a022c..3b26ff9b5 100644
--- a/README.md
+++ b/README.md
@@ -924,7 +924,7 @@ You are warmly welcome to submit questions in [discussions](https://github.com/P
 
 ## Acknowledgement
 - Many thanks to [HighCWu](https://github.com/HighCWu) for adding [VITS-aishell3](./examples/aishell3/vits) and [VITS-VC](./examples/aishell3/vits-vc) examples.
-- Many thanks to [david-95](https://github.com/david-95) improved TTS, fixed multi-punctuation bug, and contributed to multiple program and data. Added SSML for Chinese Text Frontend. 
+- Many thanks to [david-95](https://github.com/david-95) improved TTS, fixed multi-punctuation bug, and contributed to multiple program and data. Added [SSML](https://github.com/PaddlePaddle/PaddleSpeech/discussions/2538) for Chinese Text Frontend. 
 - Many thanks to [BarryKCL](https://github.com/BarryKCL) improved TTS Chinses frontend based on [G2PW](https://github.com/GitYCC/g2pW).
 - Many thanks to [yeyupiaoling](https://github.com/yeyupiaoling)/[PPASR](https://github.com/yeyupiaoling/PPASR)/[PaddlePaddle-DeepSpeech](https://github.com/yeyupiaoling/PaddlePaddle-DeepSpeech)/[VoiceprintRecognition-PaddlePaddle](https://github.com/yeyupiaoling/VoiceprintRecognition-PaddlePaddle)/[AudioClassification-PaddlePaddle](https://github.com/yeyupiaoling/AudioClassification-PaddlePaddle) for years of attention, constructive advice and great help.
 - Many thanks to [mymagicpower](https://github.com/mymagicpower) for the Java implementation of ASR upon [short](https://github.com/mymagicpower/AIAS/tree/main/3_audio_sdks/asr_sdk) and [long](https://github.com/mymagicpower/AIAS/tree/main/3_audio_sdks/asr_long_audio_sdk) audio files.

From 9c68c2061e1b595deac62229a2f29f9f0659ff17 Mon Sep 17 00:00:00 2001
From: TianYuan <white-sky@qq.com>
Date: Fri, 21 Oct 2022 17:29:13 +0800
Subject: [PATCH 19/20] Update README_cn.md

---
 README_cn.md | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/README_cn.md b/README_cn.md
index 0c3af5dd4..9a4549898 100644
--- a/README_cn.md
+++ b/README_cn.md
@@ -164,7 +164,8 @@
 
   
 ### 近期更新
-- 👑 2022.10.11: 新增 [Wav2vec2ASR](./examples/librispeech/asr3), 在 LibriSpeech 上针对ASR任务对wav2vec2.0 的fine-tuning.
+ - 🎉 2022.10.21: TTS 中文文本前端新增 [SSML](https://github.com/PaddlePaddle/PaddleSpeech/discussions/2538) 功能。
+- 👑 2022.10.11: 新增 [Wav2vec2ASR](./examples/librispeech/asr3), 在 LibriSpeech 上针对 ASR 任务对 wav2vec2.0 的 finetuning。
 - 🔥 2022.09.26: 新增 Voice Cloning, TTS finetune 和 ERNIE-SAT 到 [PaddleSpeech 网页应用](./demos/speech_web)。
 - ⚡ 2022.09.09: 新增基于 ECAPA-TDNN 声纹模型的 AISHELL-3 Voice Cloning [示例](./examples/aishell3/vc2)。
 - ⚡ 2022.08.25: 发布 TTS [finetune](./examples/other/tts_finetune/tts3) 示例。
@@ -928,7 +929,7 @@ PaddleSpeech 的 **语音合成** 主要包含三个模块：文本前端、声
 
 ## 致谢
 - 非常感谢 [HighCWu](https://github.com/HighCWu) 新增 [VITS-aishell3](./examples/aishell3/vits) 和 [VITS-VC](./examples/aishell3/vits-vc) 代码示例。
-- 非常感谢 [david-95](https://github.com/david-95) 修复句尾多标点符号出错的问题，贡献补充多条程序和数据。新增 SSML 中文文本前端处理。
+- 非常感谢 [david-95](https://github.com/david-95) 修复 TTS 句尾多标点符号出错的问题，贡献补充多条程序和数据。为 TTS 中文文本前端新增 [SSML](https://github.com/PaddlePaddle/PaddleSpeech/discussions/2538) 功能。
 - 非常感谢 [BarryKCL](https://github.com/BarryKCL) 基于 [G2PW](https://github.com/GitYCC/g2pW) 对 TTS 中文文本前端的优化。
 - 非常感谢 [yeyupiaoling](https://github.com/yeyupiaoling)/[PPASR](https://github.com/yeyupiaoling/PPASR)/[PaddlePaddle-DeepSpeech](https://github.com/yeyupiaoling/PaddlePaddle-DeepSpeech)/[VoiceprintRecognition-PaddlePaddle](https://github.com/yeyupiaoling/VoiceprintRecognition-PaddlePaddle)/[AudioClassification-PaddlePaddle](https://github.com/yeyupiaoling/AudioClassification-PaddlePaddle) 多年来的关注和建议，以及在诸多问题上的帮助。
 - 非常感谢 [mymagicpower](https://github.com/mymagicpower) 采用PaddleSpeech 对 ASR 的[短语音](https://github.com/mymagicpower/AIAS/tree/main/3_audio_sdks/asr_sdk)及[长语音](https://github.com/mymagicpower/AIAS/tree/main/3_audio_sdks/asr_long_audio_sdk)进行 Java 实现。

From 09a735af2449a2205a6006287e6bd1e98b355c37 Mon Sep 17 00:00:00 2001
From: TianYuan <white-sky@qq.com>
Date: Fri, 21 Oct 2022 17:32:47 +0800
Subject: [PATCH 20/20] Update README.md

---
 README.md | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/README.md b/README.md
index 3b26ff9b5..26f13d00e 100644
--- a/README.md
+++ b/README.md
@@ -157,7 +157,7 @@ Via the easy-to-use, efficient, flexible and scalable implementation, our vision
   - 🧩  *Cascaded models application*: as an extension of the typical traditional audio tasks, we combine the workflows of the aforementioned tasks with other fields like Natural language processing (NLP) and Computer Vision (CV).
 
 ### Recent Update
-- 🎉 2022.10.21: Add [SSML](https://github.com/PaddlePaddle/PaddleSpeech/discussions/2538) for Chinese Text Frontend.
+- 🎉 2022.10.21: Add [SSML](https://github.com/PaddlePaddle/PaddleSpeech/discussions/2538) for TTS Chinese Text Frontend.
 - 👑 2022.10.11: Add [Wav2vec2ASR](./examples/librispeech/asr3), wav2vec2.0 fine-tuning for ASR on LibriSpeech.
 - 🔥 2022.09.26: Add Voice Cloning, TTS finetune, and ERNIE-SAT in [PaddleSpeech Web Demo](./demos/speech_web).
 - ⚡ 2022.09.09: Add AISHELL-3 Voice Cloning [example](./examples/aishell3/vc2) with ECAPA-TDNN speaker encoder.
@@ -924,8 +924,8 @@ You are warmly welcome to submit questions in [discussions](https://github.com/P
 
 ## Acknowledgement
 - Many thanks to [HighCWu](https://github.com/HighCWu) for adding [VITS-aishell3](./examples/aishell3/vits) and [VITS-VC](./examples/aishell3/vits-vc) examples.
-- Many thanks to [david-95](https://github.com/david-95) improved TTS, fixed multi-punctuation bug, and contributed to multiple program and data. Added [SSML](https://github.com/PaddlePaddle/PaddleSpeech/discussions/2538) for Chinese Text Frontend. 
-- Many thanks to [BarryKCL](https://github.com/BarryKCL) improved TTS Chinses frontend based on [G2PW](https://github.com/GitYCC/g2pW).
+- Many thanks to [david-95](https://github.com/david-95) for fixing multi-punctuation bug、contributing to multiple program and data, and adding [SSML](https://github.com/PaddlePaddle/PaddleSpeech/discussions/2538) for TTS Chinese Text Frontend. 
+- Many thanks to [BarryKCL](https://github.com/BarryKCL) for improving TTS Chinses Frontend based on [G2PW](https://github.com/GitYCC/g2pW).
 - Many thanks to [yeyupiaoling](https://github.com/yeyupiaoling)/[PPASR](https://github.com/yeyupiaoling/PPASR)/[PaddlePaddle-DeepSpeech](https://github.com/yeyupiaoling/PaddlePaddle-DeepSpeech)/[VoiceprintRecognition-PaddlePaddle](https://github.com/yeyupiaoling/VoiceprintRecognition-PaddlePaddle)/[AudioClassification-PaddlePaddle](https://github.com/yeyupiaoling/AudioClassification-PaddlePaddle) for years of attention, constructive advice and great help.
 - Many thanks to [mymagicpower](https://github.com/mymagicpower) for the Java implementation of ASR upon [short](https://github.com/mymagicpower/AIAS/tree/main/3_audio_sdks/asr_sdk) and [long](https://github.com/mymagicpower/AIAS/tree/main/3_audio_sdks/asr_long_audio_sdk) audio files.
 - Many thanks to [JiehangXie](https://github.com/JiehangXie)/[PaddleBoBo](https://github.com/JiehangXie/PaddleBoBo) for developing Virtual Uploader(VUP)/Virtual YouTuber(VTuber) with PaddleSpeech TTS function.