add comment

2 years ago · 744ea44279
parent d17b2ee1af
commit 744ea44279
8 changed files with 35 additions and 136057 deletions
--- a/README.md
+++ b/README.md
@ -699,6 +699,7 @@ You are warmly welcome to submit questions in [discussions](https://github.com/P

 ## Acknowledgement

+- Many thanks to [BarryKCL](https://github.com/BarryKCL) improved TTS Chinses frontend based on [G2PW](https://github.com/GitYCC/g2pW)
 - Many thanks to [yeyupiaoling](https://github.com/yeyupiaoling)/[PPASR](https://github.com/yeyupiaoling/PPASR)/[PaddlePaddle-DeepSpeech](https://github.com/yeyupiaoling/PaddlePaddle-DeepSpeech)/[VoiceprintRecognition-PaddlePaddle](https://github.com/yeyupiaoling/VoiceprintRecognition-PaddlePaddle)/[AudioClassification-PaddlePaddle](https://github.com/yeyupiaoling/AudioClassification-PaddlePaddle) for years of attention, constructive advice and great help.
 - Many thanks to [mymagicpower](https://github.com/mymagicpower) for the Java implementation of ASR upon [short](https://github.com/mymagicpower/AIAS/tree/main/3_audio_sdks/asr_sdk) and [long](https://github.com/mymagicpower/AIAS/tree/main/3_audio_sdks/asr_long_audio_sdk) audio files.
 - Many thanks to [JiehangXie](https://github.com/JiehangXie)/[PaddleBoBo](https://github.com/JiehangXie/PaddleBoBo) for developing Virtual Uploader(VUP)/Virtual YouTuber(VTuber) with PaddleSpeech TTS function.
--- a/README_cn.md
+++ b/README_cn.md
@ -833,6 +833,7 @@ PaddleSpeech 的 **语音合成** 主要包含三个模块：文本前端、声

 ## 致谢

+- 非常感谢 [BarryKCL](https://github.com/BarryKCL)基于[G2PW](https://github.com/GitYCC/g2pW)对TTS中文文本前端的优化。
 - 非常感谢 [yeyupiaoling](https://github.com/yeyupiaoling)/[PPASR](https://github.com/yeyupiaoling/PPASR)/[PaddlePaddle-DeepSpeech](https://github.com/yeyupiaoling/PaddlePaddle-DeepSpeech)/[VoiceprintRecognition-PaddlePaddle](https://github.com/yeyupiaoling/VoiceprintRecognition-PaddlePaddle)/[AudioClassification-PaddlePaddle](https://github.com/yeyupiaoling/AudioClassification-PaddlePaddle) 多年来的关注和建议，以及在诸多问题上的帮助。
 - 非常感谢 [mymagicpower](https://github.com/mymagicpower) 采用PaddleSpeech 对 ASR 的[短语音](https://github.com/mymagicpower/AIAS/tree/main/3_audio_sdks/asr_sdk)及[长语音](https://github.com/mymagicpower/AIAS/tree/main/3_audio_sdks/asr_long_audio_sdk)进行 Java 实现。
 - 非常感谢 [JiehangXie](https://github.com/JiehangXie)/[PaddleBoBo](https://github.com/JiehangXie/PaddleBoBo) 采用 PaddleSpeech 语音合成功能实现 Virtual Uploader(VUP)/Virtual YouTuber(VTuber) 虚拟主播。
--- a/paddlespeech/t2s/frontend/g2pw/bopomofo_to_pinyin_wo_tune_dict.json
+++ b/paddlespeech/t2s/frontend/g2pw/bopomofo_to_pinyin_wo_tune_dict.json
--- a/paddlespeech/t2s/frontend/g2pw/char_bopomofo_dict.json
+++ b/paddlespeech/t2s/frontend/g2pw/char_bopomofo_dict.json
--- a/paddlespeech/t2s/frontend/g2pw/dataset.py
+++ b/paddlespeech/t2s/frontend/g2pw/dataset.py
@ -1,3 +1,7 @@
+"""
+Credits
+ This code is modified from https://github.com/GitYCC/g2pW
+"""
 import numpy as np
 from paddlespeech.t2s.frontend.g2pw.utils import tokenize_and_map

@ -128,8 +132,3 @@ def get_char_phoneme_labels(polyphonic_chars):
            char2phonemes[char] = []
        char2phonemes[char].append(labels.index(f'{char} {phoneme}'))
    return labels, char2phonemes
-
-
-def prepare_pos(pos_path):
-     return open(pos_path).read().rstrip().split('\n')
-
--- a/paddlespeech/t2s/frontend/g2pw/onnx_api.py
+++ b/paddlespeech/t2s/frontend/g2pw/onnx_api.py
@ -1,3 +1,7 @@
+"""
+Credits
+ This code is modified from https://github.com/GitYCC/g2pW
+"""
 import os
 import json
 import onnxruntime
@ -7,7 +11,10 @@ from opencc import OpenCC

 from paddlenlp.transformers import BertTokenizer

-from paddlespeech.t2s.frontend.g2pw.dataset import prepare_data, prepare_onnx_input, get_phoneme_labels, get_char_phoneme_labels
+from paddlespeech.t2s.frontend.g2pw.dataset import prepare_data,\
+                                                   prepare_onnx_input,\
+                                                   get_phoneme_labels,\
+                                                   get_char_phoneme_labels
 from paddlespeech.t2s.frontend.g2pw.utils import load_config

 MODEL_URL = 'https://paddlespeech.bj.bcebos.com/Parakeet/released_models/g2p/G2PWModel.tar'
@ -34,6 +41,7 @@ def predict(session, onnx_input, labels):


 def download_model(model_dir):
+    os.makedirs(model_dir, exist_ok=True)
    wget_shell = "cd %s  && wget %s"%(model_dir,MODEL_URL)
    os.system(wget_shell)
    shell = "cd %s ;tar -xvf %s;cd %s/G2PWModel;rm -rf .*" % (model_dir,MODEL_URL.split("/")[-1], model_dir)
@ -44,10 +52,12 @@ def download_model(model_dir):

 class G2PWOnnxConverter:
    def __init__(self, style='bopomofo', model_source=None, enable_non_tradional_chinese=False):
-        model_dir = os.path.dirname(os.path.abspath(__file__))
+        model_dir = os.path.join(os.path.expandvars('$HOME'), 'paddlespeech/models')
        if not os.path.exists(os.path.join(model_dir, 'G2PWModel/g2pW.onnx')):
            download_model(model_dir)

+        sess_options = onnxruntime.SessionOptions()
+        sess_options.intra_op_num_threads = 2
        self.session_g2pW =  onnxruntime.InferenceSession(os.path.join(model_dir, 'G2PWModel/g2pW.onnx'))
        self.config = load_config(os.path.join(model_dir, 'G2PWModel/config.py'), use_default=True)

@ -65,16 +75,14 @@ class G2PWOnnxConverter:
        self.chars = sorted(list(self.char2phonemes.keys()))
        self.pos_tags = ['UNK', 'A', 'C', 'D', 'I', 'N', 'P', 'T', 'V', 'DE', 'SHI']

-        with open(os.path.join(os.path.dirname(os.path.abspath(__file__)),
-                               'bopomofo_to_pinyin_wo_tune_dict.json'), 'r',encoding='utf-8') as fr:
+        with open(os.path.join(model_dir,'G2PWModel/bopomofo_to_pinyin_wo_tune_dict.json'), 'r',encoding='utf-8') as fr:
            self.bopomofo_convert_dict = json.load(fr)
        self.style_convert_func = {
            'bopomofo': lambda x: x,
            'pinyin': self._convert_bopomofo_to_pinyin,
        }[style]

-        with open(os.path.join(os.path.dirname(os.path.abspath(__file__)),
-                               'char_bopomofo_dict.json'), 'r',encoding='utf-8') as fr:
+        with open(os.path.join(model_dir,'G2PWModel/char_bopomofo_dict.json'), 'r',encoding='utf-8') as fr:
            self.char_bopomofo_dict = json.load(fr)

        if self.enable_opencc:
--- a/paddlespeech/t2s/frontend/g2pw/utils.py
+++ b/paddlespeech/t2s/frontend/g2pw/utils.py
@ -1,27 +1,11 @@
+
+"""
+Credits
+ This code is modified from https://github.com/GitYCC/g2pW
+"""
 import re
-import logging
 import sys

-
-class RunningAverage:
-    def __init__(self):
-        self.values = []
-
-    def add(self, val):
-        self.values.append(val)
-
-    def add_all(self, vals):
-        self.values += vals
-
-    def get(self):
-        if len(self.values) == 0:
-            return None
-        return sum(self.values) / len(self.values)
-
-    def flush(self):
-        self.values = []
-
-
 def wordize_and_map(text):
    words = []
    index_map_from_text_to_word = []
@ -146,16 +130,4 @@ def load_config(config_path, use_default=False):
                for dict_k, dict_v in val.items():
                    if dict_k not in d:
                        d[dict_k] = dict_v
-    return config
-
-
-def get_logger(file_path):
-    logger = logging.getLogger()
-    logger.setLevel(logging.DEBUG)
-
-    output_file_handler = logging.FileHandler(file_path)
-    stdout_handler = logging.StreamHandler(sys.stdout)
-
-    logger.addHandler(output_file_handler)
-    logger.addHandler(stdout_handler)
-    return logger
+    return config
--- a/paddlespeech/t2s/frontend/zh_frontend.py
+++ b/paddlespeech/t2s/frontend/zh_frontend.py
@ -79,7 +79,7 @@ class Frontend():
        self.tone_modifier = ToneSandhi()
        self.text_normalizer = TextNormalizer()
        self.punc = "：，；。？！“”‘’':,;.?!"
-        # g2p_model can be pypinyin and g2pM
+        # g2p_model can be pypinyin and g2pM and g2pW
        self.g2p_model = g2p_model
        if self.g2p_model == "g2pM":
            self.g2pM_model = G2pM()
@ -87,6 +87,7 @@ class Frontend():
                with_tone=True, with_erhua=False)
        elif self.g2p_model == "g2pW":
            self.corrector = Polyphonic()
+            self.g2pM_model = G2pM()
            self.g2pW_model = G2PWOnnxConverter(style='pinyin', enable_non_tradional_chinese=True)
            self.pinyin2phone = generate_lexicon(
                with_tone=True, with_erhua=False)
@ -180,8 +181,14 @@ class Frontend():
            initials = []
            finals = []
            seg_cut = self.tone_modifier.pre_merge_for_modify(seg_cut)
+            # 为了多音词获得更好的效果，这里采用整句预测
            if self.g2p_model == "g2pW":
-                pinyins = self.g2pW_model(seg)[0]
+                try:
+                    pinyins = self.g2pW_model(seg)[0]
+                except Exception:
+                    # g2pW采用模型采用繁体输入，如果有cover不了的简体词，采用g2pM预测
+                    print("[%s] not in g2pW dict,use g2pM"%seg)
+                    pinyins = self.g2pM_model(seg, tone=True, char_split=False)
                pre_word_length = 0
                for word, pos in seg_cut:
                    sub_initials = []