From 2498b9ce66b3ac515bbc6243bd10d7d6fb8e61b0 Mon Sep 17 00:00:00 2001 From: wangcanlong Date: Wed, 3 Aug 2022 15:58:24 +0800 Subject: [PATCH] =?UTF-8?q?=E4=BC=98=E5=8C=96=E4=B8=AD=E6=96=87=E6=96=87?= =?UTF-8?q?=E6=9C=AC=E5=89=8D=E7=AB=AF?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- paddlespeech/t2s/frontend/polyphonic.yaml | 5 + paddlespeech/t2s/frontend/zh_frontend.py | 110 ++++++++++++++++++---- 2 files changed, 99 insertions(+), 16 deletions(-) create mode 100644 paddlespeech/t2s/frontend/polyphonic.yaml diff --git a/paddlespeech/t2s/frontend/polyphonic.yaml b/paddlespeech/t2s/frontend/polyphonic.yaml new file mode 100644 index 000000000..6b453d87b --- /dev/null +++ b/paddlespeech/t2s/frontend/polyphonic.yaml @@ -0,0 +1,5 @@ +polyphonic: + 湖泊: ['hu2','po1'] + 弹力: ['tan2','li4'] + 颤抖: ['chan4','dou3'] + 鸭绿江: ['ya1','lu4','jiang1'] \ No newline at end of file diff --git a/paddlespeech/t2s/frontend/zh_frontend.py b/paddlespeech/t2s/frontend/zh_frontend.py index 143ccbc15..e6d00c1d2 100644 --- a/paddlespeech/t2s/frontend/zh_frontend.py +++ b/paddlespeech/t2s/frontend/zh_frontend.py @@ -12,6 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. import re +import yaml from typing import Dict from typing import List @@ -19,6 +20,7 @@ import jieba.posseg as psg import numpy as np import paddle from g2pM import G2pM +from g2pw import G2PWConverter from pypinyin import lazy_pinyin from pypinyin import load_phrases_dict from pypinyin import load_single_dict @@ -53,9 +55,23 @@ def insert_after_character(lst, item): return result +class Polyphonic(): + def __init__(self,dict_file="./paddlespeech/t2s/frontend/polyphonic.yaml"): + with open(dict_file, encoding='utf8') as polyphonic_file: + # 解析yaml + polyphonic_dict = yaml.load(polyphonic_file, Loader=yaml.FullLoader) + self.polyphonic_words = polyphonic_dict["polyphonic"] + + def correct_pronunciation(self,word,pinyin): + # 词汇被词典收录则返回纠正后的读音 + if word in self.polyphonic_words.keys(): + pinyin = self.polyphonic_words[word] + # 否则返回原读音 + return pinyin + class Frontend(): def __init__(self, - g2p_model="pypinyin", + g2p_model="g2pW", phone_vocab_path=None, tone_vocab_path=None): self.tone_modifier = ToneSandhi() @@ -67,6 +83,12 @@ class Frontend(): self.g2pM_model = G2pM() self.pinyin2phone = generate_lexicon( with_tone=True, with_erhua=False) + elif self.g2p_model == "g2pW": + self.corrector = Polyphonic() + self.g2pW_model = G2PWConverter(style='pinyin', enable_non_tradional_chinese=True) + self.pinyin2phone = generate_lexicon( + with_tone=True, with_erhua=False) + else: self.__init__pypinyin() self.must_erhua = {"小院儿", "胡同儿", "范儿", "老汉儿", "撒欢儿", "寻老礼儿", "妥妥儿"} @@ -139,6 +161,24 @@ class Frontend(): # If it's not pinyin (possibly punctuation) or no conversion is required initials.append(pinyin) finals.append(pinyin) + elif self.g2p_model == "g2pW": + pinyins = self.g2pW_model(word)[0] + if pinyins == [None]: + pinyins = [word] + for pinyin in pinyins: + pinyin = pinyin.replace("u:", "v") + if pinyin in self.pinyin2phone: + initial_final_list = self.pinyin2phone[pinyin].split(" ") + if len(initial_final_list) == 2: + initials.append(initial_final_list[0]) + finals.append(initial_final_list[1]) + elif len(initial_final_list) == 1: + initials.append('') + finals.append(initial_final_list[1]) + else: + # If it's not pinyin (possibly punctuation) or no conversion is required + initials.append(pinyin) + finals.append(pinyin) return initials, finals # if merge_sentences, merge all sentences into one phone sequence @@ -150,27 +190,65 @@ class Frontend(): phones_list = [] for seg in segments: phones = [] + initials = [] + finals = [] # Replace all English words in the sentence seg = re.sub('[a-zA-Z]+', '', seg) seg_cut = psg.lcut(seg) - initials = [] - finals = [] seg_cut = self.tone_modifier.pre_merge_for_modify(seg_cut) - for word, pos in seg_cut: - if pos == 'eng': - continue - sub_initials, sub_finals = self._get_initials_finals(word) - sub_finals = self.tone_modifier.modified_tone(word, pos, - sub_finals) - if with_erhua: - sub_initials, sub_finals = self._merge_erhua( - sub_initials, sub_finals, word, pos) - initials.append(sub_initials) - finals.append(sub_finals) - # assert len(sub_initials) == len(sub_finals) == len(word) + if self.g2p_model == "g2pW": + pinyins = self.g2pW_model(seg)[0] + pre_word_length = 0 + for word, pos in seg_cut: + sub_initials = [] + sub_finals = [] + now_word_length = pre_word_length + len(word) + if pos == 'eng': + pre_word_length = now_word_length + continue + word_pinyins = pinyins[pre_word_length:now_word_length] + # 矫正发音 + word_pinyins = self.corrector.correct_pronunciation(word,word_pinyins) + for pinyin,char in zip(word_pinyins,word): + if pinyin == None: + pinyin = char + pinyin = pinyin.replace("u:", "v") + if pinyin in self.pinyin2phone: + initial_final_list = self.pinyin2phone[pinyin].split(" ") + if len(initial_final_list) == 2: + sub_initials.append(initial_final_list[0]) + sub_finals.append(initial_final_list[1]) + elif len(initial_final_list) == 1: + sub_initials.append('') + sub_finals.append(initial_final_list[1]) + else: + # If it's not pinyin (possibly punctuation) or no conversion is required + sub_initials.append(pinyin) + sub_finals.append(pinyin) + pre_word_length = now_word_length + sub_finals = self.tone_modifier.modified_tone(word, pos, + sub_finals) + if with_erhua: + sub_initials, sub_finals = self._merge_erhua( + sub_initials, sub_finals, word, pos) + initials.append(sub_initials) + finals.append(sub_finals) + # assert len(sub_initials) == len(sub_finals) == len(word) + else: + for word, pos in seg_cut: + if pos == 'eng': + continue + sub_initials, sub_finals = self._get_initials_finals(word) + sub_finals = self.tone_modifier.modified_tone(word, pos, + sub_finals) + if with_erhua: + sub_initials, sub_finals = self._merge_erhua( + sub_initials, sub_finals, word, pos) + initials.append(sub_initials) + finals.append(sub_finals) + # assert len(sub_initials) == len(sub_finals) == len(word) initials = sum(initials, []) finals = sum(finals, []) - for c, v in zip(initials, finals): # NOTE: post process for pypinyin outputs # we discriminate i, ii and iii