diff --git a/docs/source/released_model.md b/docs/source/released_model.md index a1e3eb87..8d0ff1d4 100644 --- a/docs/source/released_model.md +++ b/docs/source/released_model.md @@ -67,7 +67,7 @@ WaveRNN | CSMSC |[WaveRNN-csmsc](https://github.com/PaddlePaddle/PaddleSpeech/tr Model Type | Dataset| Example Link | Pretrained Models :-------------:| :------------:| :-----: | :-----: | GE2E| AISHELL-3, etc. |[ge2e](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/other/ge2e)|[ge2e_ckpt_0.3.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/ge2e/ge2e_ckpt_0.3.zip) -GE2E + Tactron2| AISHELL-3 |[ge2e-tactron2-aishell3](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/aishell3/vc0)|[tacotron2_aishell3_ckpt_vc0_0.2.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/tacotron2/tacotron2_aishell3_ckpt_vc0_0.2.0.zip) +GE2E + Tacotron2| AISHELL-3 |[ge2e-Tacotron2-aishell3](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/aishell3/vc0)|[tacotron2_aishell3_ckpt_vc0_0.2.0.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/tacotron2/tacotron2_aishell3_ckpt_vc0_0.2.0.zip) GE2E + FastSpeech2 | AISHELL-3 |[ge2e-fastspeech2-aishell3](https://github.com/PaddlePaddle/PaddleSpeech/tree/develop/examples/aishell3/vc1)|[fastspeech2_nosil_aishell3_vc1_ckpt_0.5.zip](https://paddlespeech.bj.bcebos.com/Parakeet/released_models/fastspeech2/fastspeech2_nosil_aishell3_vc1_ckpt_0.5.zip) diff --git a/docs/source/tts/quick_start.md b/docs/source/tts/quick_start.md index bddee778..d8dbc646 100644 --- a/docs/source/tts/quick_start.md +++ b/docs/source/tts/quick_start.md @@ -7,7 +7,7 @@ The examples in PaddleSpeech are mainly classified by datasets, the TTS datasets * VCTK (English multiple speakers) The models in PaddleSpeech TTS have the following mapping relationship: -* tts0 - Tactron2 +* tts0 - Tacotron2 * tts1 - TransformerTTS * tts2 - SpeedySpeech * tts3 - FastSpeech2 @@ -17,7 +17,7 @@ The models in PaddleSpeech TTS have the following mapping relationship: * voc3 - MultiBand MelGAN * voc4 - Style MelGAN * voc5 - HiFiGAN -* vc0 - Tactron2 Voice Clone with GE2E +* vc0 - Tacotron2 Voice Clone with GE2E * vc1 - FastSpeech2 Voice Clone with GE2E ## Quick Start diff --git a/docs/source/tts/quick_start_cn.md b/docs/source/tts/quick_start_cn.md index 37246e84..c56d9bb4 100644 --- a/docs/source/tts/quick_start_cn.md +++ b/docs/source/tts/quick_start_cn.md @@ -9,7 +9,7 @@ PaddleSpeech 的 TTS 模型具有以下映射关系: -* tts0 - Tactron2 +* tts0 - Tacotron2 * tts1 - TransformerTTS * tts2 - SpeedySpeech * tts3 - FastSpeech2 @@ -19,7 +19,7 @@ PaddleSpeech 的 TTS 模型具有以下映射关系: * voc3 - MultiBand MelGAN * voc4 - Style MelGAN * voc5 - HiFiGAN -* vc0 - Tactron2 Voice Clone with GE2E +* vc0 - Tacotron2 Voice Clone with GE2E * vc1 - FastSpeech2 Voice Clone with GE2E ## 快速开始 diff --git a/docs/tutorial/tts/tts_tutorial.ipynb b/docs/tutorial/tts/tts_tutorial.ipynb index 81f713ef..583adb01 100644 --- a/docs/tutorial/tts/tts_tutorial.ipynb +++ b/docs/tutorial/tts/tts_tutorial.ipynb @@ -769,7 +769,7 @@ "```\n", "我们在每个数据集的 README.md 介绍了子目录和模型的对应关系, 在 TTS 中有如下对应关系:\n", "```text\n", - "tts0 - Tactron2\n", + "tts0 - Tacotron2\n", "tts1 - TransformerTTS\n", "tts2 - SpeedySpeech\n", "tts3 - FastSpeech2\n", diff --git a/examples/aishell3/README.md b/examples/aishell3/README.md index 273f488e..191974de 100644 --- a/examples/aishell3/README.md +++ b/examples/aishell3/README.md @@ -1,6 +1,6 @@ # Aishell3 -* tts0 - Tactron2 +* tts0 - Tacotron2 * tts1 - TransformerTTS * tts2 - SpeedySpeech * tts3 - FastSpeech2 @@ -8,5 +8,5 @@ * voc1 - Parallel WaveGAN * voc2 - MelGAN * voc3 - MultiBand MelGAN -* vc0 - Tactron2 Voice Cloning with GE2E +* vc0 - Tacotron2 Voice Cloning with GE2E * vc1 - FastSpeech2 Voice Cloning with GE2E diff --git a/examples/csmsc/README.md b/examples/csmsc/README.md index 2aad609c..77375faa 100644 --- a/examples/csmsc/README.md +++ b/examples/csmsc/README.md @@ -1,7 +1,7 @@ # CSMSC -* tts0 - Tactron2 +* tts0 - Tacotron2 * tts1 - TransformerTTS * tts2 - SpeedySpeech * tts3 - FastSpeech2 diff --git a/examples/ljspeech/README.md b/examples/ljspeech/README.md index 67b1bf47..ccafdb14 100644 --- a/examples/ljspeech/README.md +++ b/examples/ljspeech/README.md @@ -1,7 +1,7 @@ # LJSpeech -* tts0 - Tactron2 +* tts0 - Tacotron2 * tts1 - TransformerTTS * tts2 - SpeedySpeech * tts3 - FastSpeech2 diff --git a/examples/vctk/README.md b/examples/vctk/README.md index 4007c031..ac5fd24f 100644 --- a/examples/vctk/README.md +++ b/examples/vctk/README.md @@ -1,7 +1,7 @@ # VCTK -* tts0 - Tactron2 +* tts0 - Tacotron2 * tts1 - TransformerTTS * tts2 - SpeedySpeech * tts3 - FastSpeech2 diff --git a/paddlespeech/t2s/frontend/g2pw/dataset.py b/paddlespeech/t2s/frontend/g2pw/dataset.py index ab715dc3..98af5f46 100644 --- a/paddlespeech/t2s/frontend/g2pw/dataset.py +++ b/paddlespeech/t2s/frontend/g2pw/dataset.py @@ -81,12 +81,12 @@ def prepare_onnx_input(tokenizer, position_ids.append(position_id) outputs = { - 'input_ids': np.array(input_ids), - 'token_type_ids': np.array(token_type_ids), - 'attention_masks': np.array(attention_masks), + 'input_ids': np.array(input_ids).astype(np.int64), + 'token_type_ids': np.array(token_type_ids).astype(np.int64), + 'attention_masks': np.array(attention_masks).astype(np.int64), 'phoneme_masks': np.array(phoneme_masks).astype(np.float32), - 'char_ids': np.array(char_ids), - 'position_ids': np.array(position_ids), + 'char_ids': np.array(char_ids).astype(np.int64), + 'position_ids': np.array(position_ids).astype(np.int64), } return outputs