change G2PWModel download

2 years ago · de0f99150a
parent 744ea44279
commit de0f99150a
3 changed files with 26 additions and 47 deletions
--- a/MANIFEST.in
+++ b/MANIFEST.in
@ -1,3 +1,2 @@
 include paddlespeech/t2s/exps/*.txt
-include paddlespeech/t2s/frontend/*.yaml
-include paddlespeech/t2s/frontend/g2pw/*.json
+include paddlespeech/t2s/frontend/*.yaml
--- a/paddlespeech/resource/pretrained_models.py
+++ b/paddlespeech/resource/pretrained_models.py
@ -655,24 +655,6 @@ tts_dynamic_pretrained_models = {
            'phone_id_map.txt',
        },
    },
-    "fastspeech2_mix-mix": {
-        '1.0': {
-            'url':
-            'https://paddlespeech.bj.bcebos.com/t2s/chinse_english_mixed/models/fastspeech2_csmscljspeech_add-zhen.zip',
-            'md5':
-            '77d9d4b5a79ed6203339ead7ef6c74f9',
-            'config':
-            'default.yaml',
-            'ckpt':
-            'snapshot_iter_94000.pdz',
-            'speech_stats':
-            'speech_stats.npy',
-            'phones_dict':
-            'phone_id_map.txt',
-            'speaker_dict':
-            'speaker_id_map.txt',
-        },
-    },
    # tacotron2
    "tacotron2_csmsc-zh": {
        '1.0': {
@ -1095,8 +1077,7 @@ tts_onnx_pretrained_models = {
            'https://paddlespeech.bj.bcebos.com/Parakeet/released_models/speedyspeech/speedyspeech_csmsc_onnx_0.2.0.zip',
            'md5':
            '3e9c45af9ef70675fc1968ed5074fc88',
-            'ckpt':
-            'speedyspeech_csmsc.onnx',
+            'ckpt': ['speedyspeech_csmsc.onnx'],
            'phones_dict':
            'phone_id_map.txt',
            'tones_dict':
@ -1112,8 +1093,7 @@ tts_onnx_pretrained_models = {
            'https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_csmsc_onnx_0.2.0.zip',
            'md5':
            'fd3ad38d83273ad51f0ea4f4abf3ab4e',
-            'ckpt':
-            'fastspeech2_csmsc.onnx',
+            'ckpt': ['fastspeech2_csmsc.onnx'],
            'phones_dict':
            'phone_id_map.txt',
            'sample_rate':
@ -1126,8 +1106,7 @@ tts_onnx_pretrained_models = {
            'https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_ljspeech_onnx_1.1.0.zip',
            'md5':
            '00754307636a48c972a5f3e65cda3d18',
-            'ckpt':
-            'fastspeech2_ljspeech.onnx',
+            'ckpt': ['fastspeech2_ljspeech.onnx'],
            'phones_dict':
            'phone_id_map.txt',
            'sample_rate':
@ -1140,8 +1119,7 @@ tts_onnx_pretrained_models = {
            'https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_aishell3_onnx_1.1.0.zip',
            'md5':
            'a1d6ee21de897ce394f5469e2bb4df0d',
-            'ckpt':
-            'fastspeech2_aishell3.onnx',
+            'ckpt': ['fastspeech2_aishell3.onnx'],
            'phones_dict':
            'phone_id_map.txt',
            'speaker_dict':
@ -1153,11 +1131,10 @@ tts_onnx_pretrained_models = {
    "fastspeech2_vctk_onnx-en": {
        '1.0': {
            'url':
-            'https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_vctk_onnx_1.1.0.zip',
+            'hhttps://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_vctk_onnx_1.1.0.zip',
            'md5':
            'd9c3a9b02204a2070504dd99f5f959bf',
-            'ckpt':
-            'fastspeech2_vctk.onnx',
+            'ckpt': ['fastspeech2_vctk.onnx'],
            'phones_dict':
            'phone_id_map.txt',
            'speaker_dict':
@ -1335,3 +1312,17 @@ kws_dynamic_pretrained_models = {
        },
    },
 }
+
+# ---------------------------------
+# ------------- G2PW ---------------
+# ---------------------------------
+g2pw_onnx_models = {
+    'G2PWModel': {
+        '1.0': {
+            'url':
+            'https://paddlespeech.bj.bcebos.com/Parakeet/released_models/g2p/G2PWModel.tar',
+            'md5':
+            '86a3dd8db0291c575c46e134111dce23',
+        },
+    },
+}
--- a/paddlespeech/t2s/frontend/g2pw/onnx_api.py
+++ b/paddlespeech/t2s/frontend/g2pw/onnx_api.py
@ -10,14 +10,14 @@ import numpy as np
 from opencc import OpenCC

 from paddlenlp.transformers import BertTokenizer
-
+from paddlespeech.utils.env import MODEL_HOME
 from paddlespeech.t2s.frontend.g2pw.dataset import prepare_data,\
                                                   prepare_onnx_input,\
                                                   get_phoneme_labels,\
                                                   get_char_phoneme_labels
 from paddlespeech.t2s.frontend.g2pw.utils import load_config
-
-MODEL_URL = 'https://paddlespeech.bj.bcebos.com/Parakeet/released_models/g2p/G2PWModel.tar'
+from paddlespeech.cli.utils import download_and_decompress
+from paddlespeech.resource.pretrained_models import g2pw_onnx_models


 def predict(session, onnx_input, labels):
@ -40,21 +40,10 @@ def predict(session, onnx_input, labels):
    return all_preds, all_confidences


-def download_model(model_dir):
-    os.makedirs(model_dir, exist_ok=True)
-    wget_shell = "cd %s  && wget %s"%(model_dir,MODEL_URL)
-    os.system(wget_shell)
-    shell = "cd %s ;tar -xvf %s;cd %s/G2PWModel;rm -rf .*" % (model_dir,MODEL_URL.split("/")[-1], model_dir)
-    os.system(shell)
-    rm_shell = "cd %s && rm -rf %s"%(model_dir,MODEL_URL.split("/")[-1])
-    os.system(rm_shell)
-
-
 class G2PWOnnxConverter:
-    def __init__(self, style='bopomofo', model_source=None, enable_non_tradional_chinese=False):
-        model_dir = os.path.join(os.path.expandvars('$HOME'), 'paddlespeech/models')
+    def __init__(self, model_dir = MODEL_HOME, style='bopomofo', model_source=None, enable_non_tradional_chinese=False):
        if not os.path.exists(os.path.join(model_dir, 'G2PWModel/g2pW.onnx')):
-            download_model(model_dir)
+            uncompress_path = download_and_decompress(g2pw_onnx_models['G2PWModel']['1.0'],model_dir)

        sess_options = onnxruntime.SessionOptions()
        sess_options.intra_op_num_threads = 2