From 742523fb38f521aaa93431658a7eb2042b2bad81 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=99=8B=E4=B8=9C=E6=AF=85?= Date: Fri, 13 Jan 2023 15:45:49 +0800 Subject: [PATCH] [tts]For mixed Chinese and English speech synthesis, add SSML support for Chinese (#2830) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * 添加.history * [tts]添加中英混合语音合成时对中文SSML的支持 --- .gitignore | 1 + paddlespeech/t2s/frontend/mix_frontend.py | 57 ++++++++++++++++++++--- paddlespeech/t2s/ssml/xml_processor.py | 34 ++++++++++++++ 3 files changed, 86 insertions(+), 6 deletions(-) diff --git a/.gitignore b/.gitignore index 75f56b60..4a0c4331 100644 --- a/.gitignore +++ b/.gitignore @@ -15,6 +15,7 @@ *.egg-info build *output/ +.history audio/dist/ audio/fc_patch/ diff --git a/paddlespeech/t2s/frontend/mix_frontend.py b/paddlespeech/t2s/frontend/mix_frontend.py index 19c98d53..c13a5ab6 100644 --- a/paddlespeech/t2s/frontend/mix_frontend.py +++ b/paddlespeech/t2s/frontend/mix_frontend.py @@ -11,6 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +import re from typing import Dict from typing import List @@ -18,6 +19,7 @@ import paddle from paddlespeech.t2s.frontend import English from paddlespeech.t2s.frontend.zh_frontend import Frontend +from paddlespeech.t2s.ssml.xml_processor import MixTextProcessor class MixFrontend(): @@ -107,7 +109,40 @@ class MixFrontend(): add_sp: bool=True, to_tensor: bool=True) -> Dict[str, List[paddle.Tensor]]: - segments = self.get_segment(sentence) + ''' 1. 添加SSML支持,先列出 文字 和 标签内容, + 然后添加到tmpSegments数组里 + ''' + d_inputs = MixTextProcessor.get_dom_split(sentence) + tmpSegments = [] + for instr in d_inputs: + ''' 暂时只支持 say-as ''' + if instr.lower().startswith("" + segments.append(tuple(currentSeg)) + segments.append(seg) + currentSeg = ["", ""] + else: + if currentSeg[0] == '': + currentSeg[0] = seg[0] + currentSeg[1] = seg[1] + else: + currentSeg[0] = currentSeg[0] + seg[0] + if currentSeg[0] != '': + currentSeg[0] = "" + currentSeg[0] + "" + segments.append(tuple(currentSeg)) phones_list = [] result = {} @@ -120,11 +155,21 @@ class MixFrontend(): input_ids = self.en_frontend.get_input_ids( content, merge_sentences=False, to_tensor=to_tensor) else: - input_ids = self.zh_frontend.get_input_ids( - content, - merge_sentences=False, - get_tone_ids=get_tone_ids, - to_tensor=to_tensor) + ''' 3. 把带speak tag的中文和普通文字分开处理 + ''' + if content.strip() != "" and \ + re.match(r".*?.*?.*", content, re.DOTALL): + input_ids = self.zh_frontend.get_input_ids_ssml( + content, + merge_sentences=False, + get_tone_ids=get_tone_ids, + to_tensor=to_tensor) + else: + input_ids = self.zh_frontend.get_input_ids( + content, + merge_sentences=False, + get_tone_ids=get_tone_ids, + to_tensor=to_tensor) if add_sp: input_ids["phone_ids"][-1] = paddle.concat( [input_ids["phone_ids"][-1], self.sp_id_tensor]) diff --git a/paddlespeech/t2s/ssml/xml_processor.py b/paddlespeech/t2s/ssml/xml_processor.py index b3912134..892ca371 100644 --- a/paddlespeech/t2s/ssml/xml_processor.py +++ b/paddlespeech/t2s/ssml/xml_processor.py @@ -74,6 +74,28 @@ class MixTextProcessor(): ctlist.append([mixstr, []]) return ctlist + @classmethod + def get_dom_split(self, mixstr): + ''' 文本分解,顺序加了列表中,返回文本和say-as标签 + ''' + ctlist = [] + patn = re.compile(r'(.*\s*?)(.*?)(.*\s*)$', re.M | re.S) + mat = re.match(patn, mixstr) + if mat: + pre_xml = mat.group(1) + in_xml = mat.group(2) + after_xml = mat.group(3) + + ctlist.append(pre_xml) + dom = DomXml(in_xml) + tags = dom.get_text_and_sayas_tags() + ctlist.extend(tags) + + ctlist.append(after_xml) + return ctlist + else: + ctlist.append(mixstr) + return ctlist class DomXml(): def __init__(self, xmlstr): @@ -156,3 +178,15 @@ class DomXml(): if x.hasAttribute('pinyin'): # pinyin print(x.tagName, 'pinyin', x.getAttribute('pinyin'), x.firstChild.data) + + def get_text_and_sayas_tags(self): + '''返回 xml 内容的列表,包括所有文本内容和 tag''' + res = [] + + for x1 in self.rnode: + if x1.nodeType == Node.TEXT_NODE: + res.append(x1.value) + else: + for x2 in x1.childNodes: + res.append(x2.toxml()) + return res