# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. from paddlespeech.t2s.frontend.ssml.xml_processor import MixTextProcessor if __name__ == '__main__': text = "你好吗,我们的声学模型使用了 Fast Speech Two。前浪在沙滩上,沙滩上倒了一堆。 想象干干的树干了, 里面有个干尸,不知是被谁死的。thank you." # SSML: 13 # 0 ['你好吗,', []] # 1 ['我们的声学模型使用了FastSpeechTwo。前浪', []] # 2 ['倒', ['dao3']] # 3 ['在沙滩上,沙滩上倒了一堆', []] # 4 ['土', ['tu3']] # 5 ['。想象', []] # 6 ['干干', ['gan1', 'gan1']] # 7 ['的树干', []] # 8 ['倒', ['dao3']] # 9 ['了,里面有个干尸,不知是被谁', []] # 10 ['干', ['gan4']] # 11 ['死的。', []] # 12 ['thank you.', []] inputs = MixTextProcessor.get_pinyin_split(text) print(f"SSML get_pinyin_split: {len(inputs)}") for i, sub in enumerate(inputs): print(i, sub) print() # SSML get_dom_split: 13 # 0 你好吗, # 1 我们的声学模型使用了 Fast Speech Two。前浪 # 2 # 3 在沙滩上,沙滩上倒了一堆 # 4 # 5 。 想象 # 6 干干 # 7 的树干 # 8 # 9 了, 里面有个干尸,不知是被谁 # 10 # 11 死的。 # 12 thank you. inputs = MixTextProcessor.get_dom_split(text) print(f"SSML get_dom_split: {len(inputs)}") for i, sub in enumerate(inputs): print(i, sub) print() # SSML object.get_pinyin_split: 246 # 我们的声学模型使用了 Fast Speech Two。前浪在沙滩上,沙滩上倒了一堆。 想象干干的树干了, 里面有个干尸,不知是被谁死的。 outs = MixTextProcessor().get_xml_content(text) print(f"SSML object.get_pinyin_split: {len(outs)}") print(outs) print() # SSML object.get_content_split: 30 你好吗, # 1 我们的声学模型使用了 Fast Speech Two。前浪在沙滩上,沙滩上倒了一堆。 想象干干的树干 # 倒了, 里面有个干尸,不知是被谁死的。 # 2 thank you. outs = MixTextProcessor().get_content_split(text) print(f"SSML object.get_content_split: {len(outs)}") for i, sub in enumerate(outs): print(i, sub) print() import json import xmltodict text = "我们的声学模型使用了 Fast Speech Two。前浪在沙滩上,沙滩上倒了一堆。 想象干干的树干了, 里面有个干尸,不知是被谁死的。" ssml = xmltodict.parse(text) print(json.dumps(ssml)) print(ssml['speak'].keys()) print(ssml['speak']['#text']) print(ssml['speak']['say-as'])