优化中文文本前端

3 years ago · 2498b9ce66
parent 070a08f2be
commit 2498b9ce66
2 changed files with 99 additions and 16 deletions
--- a/paddlespeech/t2s/frontend/polyphonic.yaml
+++ b/paddlespeech/t2s/frontend/polyphonic.yaml
@ -0,0 +1,5 @@
 polyphonic:
    湖泊: ['hu2','po1']
    弹力: ['tan2','li4']
    颤抖: ['chan4','dou3']
    鸭绿江: ['ya1','lu4','jiang1']
--- a/paddlespeech/t2s/frontend/zh_frontend.py
+++ b/paddlespeech/t2s/frontend/zh_frontend.py
@ -12,6 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import re
 import yaml
 from typing import Dict
 from typing import List
@ -19,6 +20,7 @@ import jieba.posseg as psg
 import numpy as np
 import paddle
 from g2pM import G2pM
 from g2pw import G2PWConverter
 from pypinyin import lazy_pinyin
 from pypinyin import load_phrases_dict
 from pypinyin import load_single_dict
@ -53,9 +55,23 @@ def insert_after_character(lst, item):
    return result
 class Polyphonic():
    def __init__(self,dict_file="./paddlespeech/t2s/frontend/polyphonic.yaml"):
        with open(dict_file, encoding='utf8') as polyphonic_file:
            # 解析yaml
            polyphonic_dict = yaml.load(polyphonic_file, Loader=yaml.FullLoader)
        self.polyphonic_words = polyphonic_dict["polyphonic"]
    def correct_pronunciation(self,word,pinyin):
        # 词汇被词典收录则返回纠正后的读音
        if word in self.polyphonic_words.keys():
            pinyin = self.polyphonic_words[word]
        # 否则返回原读音
        return pinyin
 class Frontend():
    def __init__(self,
-                 g2p_model="pypinyin",
+                 g2p_model="g2pW",
                 phone_vocab_path=None,
                 tone_vocab_path=None):
        self.tone_modifier = ToneSandhi()
@ -67,6 +83,12 @@ class Frontend():
            self.g2pM_model = G2pM()
            self.pinyin2phone = generate_lexicon(
                with_tone=True, with_erhua=False)
        elif self.g2p_model == "g2pW":
            self.corrector = Polyphonic()
            self.g2pW_model = G2PWConverter(style='pinyin', enable_non_tradional_chinese=True)
            self.pinyin2phone = generate_lexicon(
                with_tone=True, with_erhua=False)
        else:
            self.__init__pypinyin()
        self.must_erhua = {"小院儿", "胡同儿", "范儿", "老汉儿", "撒欢儿", "寻老礼儿", "妥妥儿"}
@ -139,6 +161,24 @@ class Frontend():
                    # If it's not pinyin (possibly punctuation) or no conversion is required
                    initials.append(pinyin)
                    finals.append(pinyin)
        elif self.g2p_model == "g2pW":
            pinyins = self.g2pW_model(word)[0]
            if pinyins == [None]:
                pinyins = [word]
            for pinyin in pinyins:
                pinyin = pinyin.replace("u:", "v")
                if pinyin in self.pinyin2phone:
                    initial_final_list = self.pinyin2phone[pinyin].split(" ")
                    if len(initial_final_list) == 2:
                        initials.append(initial_final_list[0])
                        finals.append(initial_final_list[1])
                    elif len(initial_final_list) == 1:
                        initials.append('')
                        finals.append(initial_final_list[1])
                else:
                    # If it's not pinyin (possibly punctuation) or no conversion is required
                    initials.append(pinyin)
                    finals.append(pinyin)
        return initials, finals
    # if merge_sentences, merge all sentences into one phone sequence
@ -150,12 +190,51 @@ class Frontend():
        phones_list = []
        for seg in segments:
            phones = []
            initials = []
            finals = []
            # Replace all English words in the sentence
            seg = re.sub('[a-zA-Z]+', '', seg)
            seg_cut = psg.lcut(seg)
            initials = []
            finals = []
            seg_cut = self.tone_modifier.pre_merge_for_modify(seg_cut)
            if self.g2p_model == "g2pW":
                pinyins = self.g2pW_model(seg)[0]
                pre_word_length = 0
                for word, pos in seg_cut:
                    sub_initials = []
                    sub_finals = []
                    now_word_length = pre_word_length + len(word)
                    if pos == 'eng':
                        pre_word_length = now_word_length
                        continue
                    word_pinyins = pinyins[pre_word_length:now_word_length]
                    # 矫正发音
                    word_pinyins = self.corrector.correct_pronunciation(word,word_pinyins)
                    for pinyin,char in zip(word_pinyins,word):
                        if pinyin == None:
                            pinyin = char
                        pinyin = pinyin.replace("u:", "v")
                        if pinyin in self.pinyin2phone:
                            initial_final_list = self.pinyin2phone[pinyin].split(" ")
                            if len(initial_final_list) == 2:
                                sub_initials.append(initial_final_list[0])
                                sub_finals.append(initial_final_list[1])
                            elif len(initial_final_list) == 1:
                                sub_initials.append('')
                                sub_finals.append(initial_final_list[1])
                        else:
                            # If it's not pinyin (possibly punctuation) or no conversion is required
                            sub_initials.append(pinyin)
                            sub_finals.append(pinyin)
                    pre_word_length = now_word_length
                    sub_finals = self.tone_modifier.modified_tone(word, pos,
                                                                sub_finals)
                    if with_erhua:
                        sub_initials, sub_finals = self._merge_erhua(
                            sub_initials, sub_finals, word, pos)
                    initials.append(sub_initials)
                    finals.append(sub_finals)
                    # assert len(sub_initials) == len(sub_finals) == len(word)
            else:
                for word, pos in seg_cut:
                    if pos == 'eng':
                        continue
@ -170,7 +249,6 @@ class Frontend():
                    # assert len(sub_initials) == len(sub_finals) == len(word)
            initials = sum(initials, [])
            finals = sum(finals, [])
            for c, v in zip(initials, finals):
                # NOTE: post process for pypinyin outputs
                # we discriminate i, ii and iii