Add end-to-end version of MFA FastSpeech2, test=tts

pull/2693/head
WongLaw 3 years ago
parent 3d2a40b176
commit 3866bdbd33

@ -30,10 +30,10 @@ from pypinyin_dict.phrase_pinyin_data import large_pinyin
from paddlespeech.t2s.frontend.g2pw import G2PWOnnxConverter from paddlespeech.t2s.frontend.g2pw import G2PWOnnxConverter
from paddlespeech.t2s.frontend.generate_lexicon import generate_lexicon from paddlespeech.t2s.frontend.generate_lexicon import generate_lexicon
from paddlespeech.t2s.frontend.rhy_prediction.rhy_predictor import Rhy_predictor
from paddlespeech.t2s.frontend.tone_sandhi import ToneSandhi from paddlespeech.t2s.frontend.tone_sandhi import ToneSandhi
from paddlespeech.t2s.frontend.zh_normalization.text_normlization import TextNormalizer from paddlespeech.t2s.frontend.zh_normalization.text_normlization import TextNormalizer
from paddlespeech.t2s.ssml.xml_processor import MixTextProcessor from paddlespeech.t2s.ssml.xml_processor import MixTextProcessor
from paddlespeech.t2s.frontend.rhy_prediction.rhy_predictor import Rhy_predictor
INITIALS = [ INITIALS = [
'b', 'p', 'm', 'f', 'd', 't', 'n', 'l', 'g', 'k', 'h', 'zh', 'ch', 'sh', 'b', 'p', 'm', 'f', 'd', 't', 'n', 'l', 'g', 'k', 'h', 'zh', 'ch', 'sh',
@ -108,7 +108,8 @@ class Frontend():
'掺和': [['chan1'], ['huo5']] '掺和': [['chan1'], ['huo5']]
} }
if rhy_tuple is not None: if rhy_tuple is not None:
self.rhy_predictor = Rhy_predictor(rhy_tuple[0], rhy_tuple[1], rhy_tuple[2]) self.rhy_predictor = Rhy_predictor(rhy_tuple[0], rhy_tuple[1],
rhy_tuple[2])
print("Rhythm predictor loaded.") print("Rhythm predictor loaded.")
# g2p_model can be pypinyin and g2pM and g2pW # g2p_model can be pypinyin and g2pM and g2pW
self.g2p_model = g2p_model self.g2p_model = g2p_model
@ -223,7 +224,8 @@ class Frontend():
pinyins = self.g2pM_model(seg, tone=True, char_split=False) pinyins = self.g2pM_model(seg, tone=True, char_split=False)
if self.rhy_predictor is not None: if self.rhy_predictor is not None:
rhy_text = self.rhy_predictor.get_prediction(seg) rhy_text = self.rhy_predictor.get_prediction(seg)
final_py = self.rhy_predictor.pinyin_align(pinyins, rhy_text) final_py = self.rhy_predictor.pinyin_align(pinyins,
rhy_text)
pinyins = final_py pinyins = final_py
pre_word_length = 0 pre_word_length = 0
for word, pos in seg_cut: for word, pos in seg_cut:
@ -518,7 +520,7 @@ class Frontend():
print(all_phonemes[0]) print(all_phonemes[0])
print("----------------------------") print("----------------------------")
return [sum(all_phonemes, [])] return [sum(all_phonemes, [])]
def del_same_sp(self, phonemes): def del_same_sp(self, phonemes):
new_phonemes = [] new_phonemes = []
for ph_seq in phonemes: for ph_seq in phonemes:

Loading…
Cancel
Save