diff --git a/paddlespeech/resource/pretrained_models.py b/paddlespeech/resource/pretrained_models.py index e844cda3..e39f7721 100644 --- a/paddlespeech/resource/pretrained_models.py +++ b/paddlespeech/resource/pretrained_models.py @@ -1345,7 +1345,7 @@ g2pw_onnx_models = { 'url': 'https://paddlespeech.bj.bcebos.com/Parakeet/released_models/g2p/G2PWModel.tar', 'md5': - '86a3dd8db0291c575c46e134111dce23', + '63bc0894af15a5a591e58b2130a2bcac', }, }, } diff --git a/paddlespeech/t2s/frontend/g2pw/onnx_api.py b/paddlespeech/t2s/frontend/g2pw/onnx_api.py index 8d485628..ace943f2 100644 --- a/paddlespeech/t2s/frontend/g2pw/onnx_api.py +++ b/paddlespeech/t2s/frontend/g2pw/onnx_api.py @@ -8,7 +8,7 @@ import onnxruntime import numpy as np from opencc import OpenCC - +from pypinyin import pinyin, lazy_pinyin, Style from paddlenlp.transformers import BertTokenizer from paddlespeech.utils.env import MODEL_HOME from paddlespeech.t2s.frontend.g2pw.dataset import prepare_data,\ @@ -127,6 +127,7 @@ class G2PWOnnxConverter: } texts, query_ids, sent_ids, partial_results = [], [], [], [] for sent_id, sent in enumerate(sentences): + pypinyin_result = pinyin(sent,style=Style.TONE3) partial_result = [None] * len(sent) for i, char in enumerate(sent): if char in polyphonic_chars: @@ -136,6 +137,7 @@ class G2PWOnnxConverter: elif char in monophonic_chars_dict: partial_result[i] = self.style_convert_func(monophonic_chars_dict[char]) elif char in self.char_bopomofo_dict: - partial_result[i] = self.style_convert_func(self.char_bopomofo_dict[char][0]) + partial_result[i] = pypinyin_result[i][0] + # partial_result[i] = self.style_convert_func(self.char_bopomofo_dict[char][0]) partial_results.append(partial_result) return texts, query_ids, sent_ids, partial_results diff --git a/paddlespeech/t2s/frontend/polyphonic.yaml b/paddlespeech/t2s/frontend/polyphonic.yaml index 6b453d87..629bcd26 100644 --- a/paddlespeech/t2s/frontend/polyphonic.yaml +++ b/paddlespeech/t2s/frontend/polyphonic.yaml @@ -1,5 +1,26 @@ polyphonic: 湖泊: ['hu2','po1'] + 地壳: ['di4','qiao4'] + 柏树: ['bai3','shu4'] + 曝光: ['bao4','guang1'] 弹力: ['tan2','li4'] + 字帖: ['zi4','tie4'] + 口吃: ['kou3','chi1'] + 包扎: ['bao1','za1'] + 哪吒: ['ne2','zha1'] + 说服: ['shuo1','fu2'] + 识字: ['shi2','zi4'] + 骨头: ['gu3','tou5'] + 对称: ['dui4','chen4'] + 口供: ['kou3','gong4'] + 抹布: ['ma1','bu4'] + 露背: ['lu4','bei4'] + 圈养: ['juan4', 'yang3'] + 眼眶: ['yan3', 'kuang4'] + 品行: ['pin3','xing2'] 颤抖: ['chan4','dou3'] - 鸭绿江: ['ya1','lu4','jiang1'] \ No newline at end of file + 差不多: ['cha4','bu5','duo1'] + 鸭绿江: ['ya1','lu4','jiang1'] + 撒切尔: ['sa4','qie4','er3'] + 比比皆是: ['bi3','bi3','jie1','shi4'] + 身无长物: ['shen1','wu2','chang2','wu4'] \ No newline at end of file