From d21e03c03e4fb29cbd6ce3b708de19a6d542a04a Mon Sep 17 00:00:00 2001 From: TianYuan Date: Fri, 26 Aug 2022 18:06:12 +0800 Subject: [PATCH] update tts3 readme, test=doc (#2315) --- docs/source/released_model.md | 6 ++++-- examples/aishell3/tts3/README.md | 15 ++++++++------- examples/aishell3/tts3/local/synthesize_e2e.sh | 6 +++--- examples/other/g2p/README.md | 2 +- examples/vctk/tts3/README.md | 16 +++++++++------- examples/zh_en_tts/tts3/README.md | 14 ++++++++------ 6 files changed, 33 insertions(+), 26 deletions(-) diff --git a/docs/source/released_model.md b/docs/source/released_model.md index 8d0ff1d4..d6691812 100644 --- a/docs/source/released_model.md +++ b/docs/source/released_model.md @@ -42,9 +42,11 @@ SpeedySpeech| CSMSC | [speedyspeech-csmsc](https://github.com/PaddlePaddle/Paddl FastSpeech2| CSMSC |[fastspeech2-csmsc](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/csmsc/tts3)|[fastspeech2_nosil_baker_ckpt_0.4.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_nosil_baker_ckpt_0.4.zip)|[fastspeech2_csmsc_static_0.2.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_csmsc_static_0.2.0.zip)
[fastspeech2_csmsc_onnx_0.2.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_csmsc_onnx_0.2.0.zip)|157MB| FastSpeech2-Conformer| CSMSC |[fastspeech2-csmsc](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/csmsc/tts3)|[fastspeech2_conformer_baker_ckpt_0.5.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_conformer_baker_ckpt_0.5.zip)||| FastSpeech2-CNNDecoder| CSMSC| [fastspeech2-csmsc](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/csmsc/tts3)| [fastspeech2_cnndecoder_csmsc_ckpt_1.0.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_cnndecoder_csmsc_ckpt_1.0.0.zip) | [fastspeech2_cnndecoder_csmsc_static_1.0.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_cnndecoder_csmsc_static_1.0.0.zip)
[fastspeech2_cnndecoder_csmsc_streaming_static_1.0.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_cnndecoder_csmsc_streaming_static_1.0.0.zip)
[fastspeech2_cnndecoder_csmsc_onnx_1.0.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_cnndecoder_csmsc_onnx_1.0.0.zip)
[fastspeech2_cnndecoder_csmsc_streaming_onnx_1.0.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_cnndecoder_csmsc_streaming_onnx_1.0.0.zip) | 84MB| -FastSpeech2| AISHELL-3 |[fastspeech2-aishell3](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/aishell3/tts3)|[fastspeech2_nosil_aishell3_ckpt_0.4.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_nosil_aishell3_ckpt_0.4.zip)|[fastspeech2_aishell3_static_1.1.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_aishell3_static_1.1.0.zip)
[fastspeech2_aishell3_onnx_1.1.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_aishell3_onnx_1.1.0.zip)|147MB| +FastSpeech2| AISHELL-3 |[fastspeech2-aishell3](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/aishell3/tts3)|[fastspeech2_aishell3_ckpt_1.1.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_aishell3_ckpt_1.1.0.zip)|[fastspeech2_aishell3_static_1.1.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_aishell3_static_1.1.0.zip)
[fastspeech2_aishell3_onnx_1.1.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_aishell3_onnx_1.1.0.zip)|147MB| FastSpeech2| LJSpeech |[fastspeech2-ljspeech](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/ljspeech/tts3)|[fastspeech2_nosil_ljspeech_ckpt_0.5.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_nosil_ljspeech_ckpt_0.5.zip)|[fastspeech2_ljspeech_static_1.1.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_ljspeech_static_1.1.0.zip)
[fastspeech2_ljspeech_onnx_1.1.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_ljspeech_onnx_1.1.0.zip)|145MB| -FastSpeech2| VCTK |[fastspeech2-vctk](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/vctk/tts3)|[fastspeech2_nosil_vctk_ckpt_0.5.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_nosil_vctk_ckpt_0.5.zip)|[fastspeech2_vctk_static_1.1.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_vctk_static_1.1.0.zip)
[fastspeech2_vctk_onnx_1.1.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_vctk_onnx_1.1.0.zip) | 145MB| +FastSpeech2| VCTK |[fastspeech2-vctk](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/vctk/tts3)|[fastspeech2_vctk_ckpt_1.2.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_vctk_ckpt_1.2.0.zip)|[fastspeech2_vctk_static_1.1.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_vctk_static_1.1.0.zip)
[fastspeech2_vctk_onnx_1.1.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_vctk_onnx_1.1.0.zip) | 145MB| +FastSpeech2| ZH_EN |[fastspeech2-zh_en](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/zh_en_tts/tts3)|[fastspeech2_mix_ckpt_1.2.0.zip](https://paddlespeech.bj.bcebos.com/t2s/chinse_english_mixed/models/fastspeech2_mix_ckpt_1.2.0.zip)|[fastspeech2_mix_static_0.2.0.zip](https://paddlespeech.bj.bcebos.com/t2s/chinse_english_mixed/models/fastspeech2_mix_static_0.2.0.zip)
[fastspeech2_mix_onnx_0.2.0.zip](https://paddlespeech.bj.bcebos.com/t2s/chinse_english_mixed/models/fastspeech2_mix_onnx_0.2.0.zip) | 145MB| + ### Vocoders Model Type | Dataset| Example Link | Pretrained Models| Static/ONNX Models|Size (static) diff --git a/examples/aishell3/tts3/README.md b/examples/aishell3/tts3/README.md index 6ef2870c..3e1dee2f 100644 --- a/examples/aishell3/tts3/README.md +++ b/examples/aishell3/tts3/README.md @@ -229,9 +229,11 @@ The ONNX model can be downloaded here: FastSpeech2 checkpoint contains files listed below. ```text -fastspeech2_nosil_aishell3_ckpt_0.4 +fastspeech2_aishell3_ckpt_1.1.0 ├── default.yaml # default config used to train fastspeech2 +├── energy_stats.npy # statistics used to normalize energy when training fastspeech2 ├── phone_id_map.txt # phone vocabulary file when training fastspeech2 +├── pitch_stats.npy # statistics used to normalize pitch when training fastspeech2 ├── snapshot_iter_96400.pdz # model parameters and optimizer states ├── speaker_id_map.txt # speaker id map file when training a multi-speaker fastspeech2 └── speech_stats.npy # statistics used to normalize spectrogram when training fastspeech2 @@ -244,9 +246,9 @@ FLAGS_allocator_strategy=naive_best_fit \ FLAGS_fraction_of_gpu_memory_to_use=0.01 \ python3 ${BIN_DIR}/../synthesize_e2e.py \ --am=fastspeech2_aishell3 \ - --am_config=fastspeech2_nosil_aishell3_ckpt_0.4/default.yaml \ - --am_ckpt=fastspeech2_nosil_aishell3_ckpt_0.4/snapshot_iter_96400.pdz \ - --am_stat=fastspeech2_nosil_aishell3_ckpt_0.4/speech_stats.npy \ + --am_config=fastspeech2_aishell3_ckpt_1.1.0/default.yaml \ + --am_ckpt=fastspeech2_aishell3_ckpt_1.1.0/snapshot_iter_96400.pdz \ + --am_stat=fastspeech2_aishell3_ckpt_1.1.0/speech_stats.npy \ --voc=pwgan_aishell3 \ --voc_config=pwg_aishell3_ckpt_0.5/default.yaml \ --voc_ckpt=pwg_aishell3_ckpt_0.5/snapshot_iter_1000000.pdz \ @@ -254,9 +256,8 @@ python3 ${BIN_DIR}/../synthesize_e2e.py \ --lang=zh \ --text=${BIN_DIR}/../sentences.txt \ --output_dir=exp/default/test_e2e \ - --phones_dict=fastspeech2_nosil_aishell3_ckpt_0.4/phone_id_map.txt \ - --speaker_dict=fastspeech2_nosil_aishell3_ckpt_0.4/speaker_id_map.txt \ + --phones_dict=fastspeech2_aishell3_ckpt_1.1.0/phone_id_map.txt \ + --speaker_dict=fastspeech2_aishell3_ckpt_1.1.0/speaker_id_map.txt \ --spk_id=0 \ --inference_dir=exp/default/inference - ``` diff --git a/examples/aishell3/tts3/local/synthesize_e2e.sh b/examples/aishell3/tts3/local/synthesize_e2e.sh index ff3608be..158350ae 100755 --- a/examples/aishell3/tts3/local/synthesize_e2e.sh +++ b/examples/aishell3/tts3/local/synthesize_e2e.sh @@ -38,7 +38,7 @@ if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then --am=fastspeech2_aishell3 \ --am_config=${config_path} \ --am_ckpt=${train_output_path}/checkpoints/${ckpt_name} \ - --am_stat=fastspeech2_nosil_aishell3_ckpt_0.4/speech_stats.npy \ + --am_stat=dump/train/speech_stats.npy \ --voc=hifigan_aishell3 \ --voc_config=hifigan_aishell3_ckpt_0.2.0/default.yaml \ --voc_ckpt=hifigan_aishell3_ckpt_0.2.0/snapshot_iter_2500000.pdz \ @@ -46,8 +46,8 @@ if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then --lang=zh \ --text=${BIN_DIR}/../sentences.txt \ --output_dir=${train_output_path}/test_e2e \ - --phones_dict=fastspeech2_nosil_aishell3_ckpt_0.4/phone_id_map.txt \ - --speaker_dict=fastspeech2_nosil_aishell3_ckpt_0.4/speaker_id_map.txt \ + --phones_dict=dump/phone_id_map.txt \ + --speaker_dict=dump/speaker_id_map.txt \ --spk_id=0 \ --inference_dir=${train_output_path}/inference fi diff --git a/examples/other/g2p/README.md b/examples/other/g2p/README.md index a8f8f734..88294350 100644 --- a/examples/other/g2p/README.md +++ b/examples/other/g2p/README.md @@ -12,7 +12,7 @@ Run the command below to get the results of the test. ./run.sh ``` -The `avg WER` of g2p is: 0.024219452438490413 +The `avg WER` of g2p is: 0.024169315564825305 ```text ,--------------------------------------------------------------------. diff --git a/examples/vctk/tts3/README.md b/examples/vctk/tts3/README.md index 9c0d7561..2a2f27fd 100644 --- a/examples/vctk/tts3/README.md +++ b/examples/vctk/tts3/README.md @@ -216,7 +216,7 @@ optional arguments: ## Pretrained Model Pretrained FastSpeech2 model with no silence in the edge of audios: -- [fastspeech2_nosil_vctk_ckpt_0.5.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_nosil_vctk_ckpt_0.5.zip) +- [fastspeech2_vctk_ckpt_1.2.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_vctk_ckpt_1.2.0.zip) The static model can be downloaded here: - [fastspeech2_vctk_static_1.1.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_vctk_static_1.1.0.zip) @@ -226,9 +226,11 @@ The ONNX model can be downloaded here: FastSpeech2 checkpoint contains files listed below. ```text -fastspeech2_nosil_vctk_ckpt_0.5 +fastspeech2_vctk_ckpt_1.2.0 ├── default.yaml # default config used to train fastspeech2 +├── energy_stats.npy # statistics used to normalize energy when training fastspeech2 ├── phone_id_map.txt # phone vocabulary file when training fastspeech2 +├── pitch_stats.npy # statistics used to normalize pitch when training fastspeech2 ├── snapshot_iter_66200.pdz # model parameters and optimizer states ├── speaker_id_map.txt # speaker id map file when training a multi-speaker fastspeech2 └── speech_stats.npy # statistics used to normalize spectrogram when training fastspeech2 @@ -241,9 +243,9 @@ FLAGS_allocator_strategy=naive_best_fit \ FLAGS_fraction_of_gpu_memory_to_use=0.01 \ python3 ${BIN_DIR}/../synthesize_e2e.py \ --am=fastspeech2_vctk \ - --am_config=fastspeech2_nosil_vctk_ckpt_0.5/default.yaml \ - --am_ckpt=fastspeech2_nosil_vctk_ckpt_0.5/snapshot_iter_66200.pdz \ - --am_stat=fastspeech2_nosil_vctk_ckpt_0.5/speech_stats.npy \ + --am_config=fastspeech2_vctk_ckpt_1.2.0/default.yaml \ + --am_ckpt=fastspeech2_vctk_ckpt_1.2.0/snapshot_iter_66200.pdz \ + --am_stat=fastspeech2_vctk_ckpt_1.2.0/speech_stats.npy \ --voc=pwgan_vctk \ --voc_config=pwg_vctk_ckpt_0.1.1/default.yaml \ --voc_ckpt=pwg_vctk_ckpt_0.1.1/snapshot_iter_1500000.pdz \ @@ -251,8 +253,8 @@ python3 ${BIN_DIR}/../synthesize_e2e.py \ --lang=en \ --text=${BIN_DIR}/../sentences_en.txt \ --output_dir=exp/default/test_e2e \ - --phones_dict=fastspeech2_nosil_vctk_ckpt_0.5/phone_id_map.txt \ - --speaker_dict=fastspeech2_nosil_vctk_ckpt_0.5/speaker_id_map.txt \ + --phones_dict=fastspeech2_vctk_ckpt_1.2.0/phone_id_map.txt \ + --speaker_dict=fastspeech2_vctk_ckpt_1.2.0/speaker_id_map.txt \ --spk_id=0 \ --inference_dir=exp/default/inference ``` diff --git a/examples/zh_en_tts/tts3/README.md b/examples/zh_en_tts/tts3/README.md index e7365baa..b4b68308 100644 --- a/examples/zh_en_tts/tts3/README.md +++ b/examples/zh_en_tts/tts3/README.md @@ -262,9 +262,11 @@ The ONNX model can be downloaded here: FastSpeech2 checkpoint contains files listed below. ```text -fastspeech2_mix_ckpt_0.2.0 +fastspeech2_mix_ckpt_1.2.0 ├── default.yaml # default config used to train fastspeech2 +├── energy_stats.npy # statistics used to energy spectrogram when training fastspeech2 ├── phone_id_map.txt # phone vocabulary file when training fastspeech2 +├── pitch_stats.npy # statistics used to normalize pitch when training fastspeech2 ├── snapshot_iter_99200.pdz # model parameters and optimizer states ├── speaker_id_map.txt # speaker id map file when training a multi-speaker fastspeech2 └── speech_stats.npy # statistics used to normalize spectrogram when training fastspeech2 @@ -281,9 +283,9 @@ FLAGS_allocator_strategy=naive_best_fit \ FLAGS_fraction_of_gpu_memory_to_use=0.01 \ python3 ${BIN_DIR}/../synthesize_e2e.py \ --am=fastspeech2_mix \ - --am_config=fastspeech2_mix_ckpt_0.2.0/default.yaml \ - --am_ckpt=fastspeech2_mix_ckpt_0.2.0/snapshot_iter_99200.pdz \ - --am_stat=fastspeech2_mix_ckpt_0.2.0/speech_stats.npy \ + --am_config=fastspeech2_mix_ckpt_1.2.0/default.yaml \ + --am_ckpt=fastspeech2_mix_ckpt_1.2.0/snapshot_iter_99200.pdz \ + --am_stat=fastspeech2_mix_ckpt_1.2.0/speech_stats.npy \ --voc=pwgan_aishell3 \ --voc_config=pwg_aishell3_ckpt_0.5/default.yaml \ --voc_ckpt=pwg_aishell3_ckpt_0.5/snapshot_iter_1000000.pdz \ @@ -291,8 +293,8 @@ python3 ${BIN_DIR}/../synthesize_e2e.py \ --lang=mix \ --text=${BIN_DIR}/../sentences_mix.txt \ --output_dir=exp/default/test_e2e \ - --phones_dict=fastspeech2_mix_ckpt_0.2.0/phone_id_map.txt \ - --speaker_dict=fastspeech2_mix_ckpt_0.2.0/speaker_id_map.txt \ + --phones_dict=fastspeech2_mix_ckpt_1.2.0/phone_id_map.txt \ + --speaker_dict=fastspeech2_mix_ckpt_1.2.0/speaker_id_map.txt \ --spk_id=174 \ --inference_dir=exp/default/inference ```