[tts]中英文语音合成时添加SSML支持

pull/2829/head
晋东毅-新华财经 3 years ago
parent c54c9506aa
commit 2660ff425a

1
.gitignore vendored

@ -13,6 +13,7 @@
*.done
*.whl
*.egg-info
.history
build
*output/

@ -11,6 +11,7 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import re
from typing import Dict
from typing import List
@ -18,6 +19,7 @@ import paddle
from paddlespeech.t2s.frontend import English
from paddlespeech.t2s.frontend.zh_frontend import Frontend
from paddlespeech.t2s.ssml.xml_processor import MixTextProcessor
class MixFrontend():
@ -107,7 +109,38 @@ class MixFrontend():
add_sp: bool=True,
to_tensor: bool=True) -> Dict[str, List[paddle.Tensor]]:
segments = self.get_segment(sentence)
''' 1. 添加SSML支持先列出 文字 和 <say-as>标签内容然后添加到tmpSegments数组里
'''
d_inputs = MixTextProcessor.get_dom_split(sentence)
tmpSegments = []
for instr in d_inputs:
if instr.lower().startswith("<say-as"):
tmpSegments.append((instr, "zh"))
else:
tmpSegments.extend(self.get_segment(instr))
''' 2. 把zh的merge到一起避免合成结果中间停顿
'''
segments = []
currentSeg = ["", ""]
for seg in tmpSegments:
if seg[1] == "en" or seg[1] == "other":
if currentSeg[0] == '':
segments.append(seg)
else:
currentSeg[0] = "<speak>" + currentSeg[0] + "</speak>"
segments.append(tuple(currentSeg))
segments.append(seg)
currentSeg = ["", ""]
else:
if currentSeg[0] == '':
currentSeg[0] = seg[0]
currentSeg[1] = seg[1]
else:
currentSeg[0] = currentSeg[0] + seg[0]
if currentSeg[0] != '':
currentSeg[0] = "<speak>" + currentSeg[0] + "</speak>"
segments.append(tuple(currentSeg))
phones_list = []
result = {}
@ -120,11 +153,19 @@ class MixFrontend():
input_ids = self.en_frontend.get_input_ids(
content, merge_sentences=False, to_tensor=to_tensor)
else:
input_ids = self.zh_frontend.get_input_ids(
content,
merge_sentences=False,
get_tone_ids=get_tone_ids,
to_tensor=to_tensor)
if content.strip() != "" and re.match(r".*?<speak>.*?</speak>.*", content,
re.DOTALL):
input_ids = self.zh_frontend.get_input_ids_ssml(
content,
merge_sentences=False,
get_tone_ids=get_tone_ids,
to_tensor=to_tensor)
else:
input_ids = self.zh_frontend.get_input_ids(
content,
merge_sentences=False,
get_tone_ids=get_tone_ids,
to_tensor=to_tensor)
if add_sp:
input_ids["phone_ids"][-1] = paddle.concat(
[input_ids["phone_ids"][-1], self.sp_id_tensor])

@ -73,7 +73,29 @@ class MixTextProcessor():
else:
ctlist.append([mixstr, []])
return ctlist
@classmethod
def get_dom_split(self, mixstr):
''' 文本分解顺序加了列表中返回文本和say-as标签供mix进一步处理
'''
ctlist = []
patn = re.compile(r'(.*\s*?)(<speak>.*?</speak>)(.*\s*)$', re.M | re.S)
mat = re.match(patn, mixstr)
if mat:
pre_xml = mat.group(1)
in_xml = mat.group(2)
after_xml = mat.group(3)
ctlist.append(pre_xml)
dom = DomXml(in_xml)
tags = dom.get_text_and_sayas_tags()
ctlist.extend(tags)
ctlist.append(after_xml)
return ctlist
else:
ctlist.append(mixstr)
return ctlist
class DomXml():
def __init__(self, xmlstr):
@ -156,3 +178,16 @@ class DomXml():
if x.hasAttribute('pinyin'): # pinyin
print(x.tagName, 'pinyin',
x.getAttribute('pinyin'), x.firstChild.data)
def get_text_and_sayas_tags(self):
'''返回 xml 内容的列表,包括所有文本内容和<say-as> tag'''
res = []
for x1 in self.rnode:
if x1.nodeType == Node.TEXT_NODE:
res.append(x1.value)
else:
for x2 in x1.childNodes:
res.append(x2.toxml())
print(res)
return res

Loading…
Cancel
Save