From 3145325b4eafd93d803a97d675fd00551b63a2b0 Mon Sep 17 00:00:00 2001 From: zxcd <228587199@qq.com> Date: Fri, 10 Mar 2023 11:30:09 +0800 Subject: [PATCH] [ASR] add wav2vec2 aishell model result, test=asr (#3012) * Create RESULT.md * add wav2vec2ASR-large-aishell1 finetune model. * update model link and add readme. * fix released model info. --- README.md | 1 + README_cn.md | 1 + docs/source/released_model.md | 2 +- examples/aishell/asr3/README.md | 8 ++--- examples/aishell/asr3/RESULT.md | 17 +++++++++++ paddlespeech/resource/pretrained_models.py | 35 ++++++++++++++++------ 6 files changed, 50 insertions(+), 14 deletions(-) create mode 100644 examples/aishell/asr3/RESULT.md diff --git a/README.md b/README.md index 0cb99d1c..5c5dc3a0 100644 --- a/README.md +++ b/README.md @@ -178,6 +178,7 @@ Via the easy-to-use, efficient, flexible and scalable implementation, our vision - 🧩 *Cascaded models application*: as an extension of the typical traditional audio tasks, we combine the workflows of the aforementioned tasks with other fields like Natural language processing (NLP) and Computer Vision (CV). ### Recent Update +- 👑 2023.03.09: Add [Wav2vec2ASR-zh](./examples/aishell/asr3). - 🎉 2023.03.07: Add [TTS ARM Linux C++ Demo](./demos/TTSArmLinux). - 🎉 2023.02.16: Add [Cantonese TTS](./examples/canton/tts3). - 🔥 2023.01.10: Add [code-switch asr CLI and Demos](./demos/speech_recognition). diff --git a/README_cn.md b/README_cn.md index 0f2adf81..fa013029 100644 --- a/README_cn.md +++ b/README_cn.md @@ -183,6 +183,7 @@ - 🧩 级联模型应用: 作为传统语音任务的扩展,我们结合了自然语言处理、计算机视觉等任务,实现更接近实际需求的产业级应用。 ### 近期更新 +- 👑 2023.03.09: 新增 [Wav2vec2ASR-zh](./examples/aishell/asr3). - 🎉 2023.03.07: 新增 [TTS ARM Linux C++ 部署示例](./demos/TTSArmLinux)。 - 🎉 2023.02.16: 新增[粤语语音合成](./examples/canton/tts3)。 - 🔥 2023.01.10: 新增[中英混合 ASR CLI 和 Demos](./demos/speech_recognition)。 diff --git a/docs/source/released_model.md b/docs/source/released_model.md index 634be7b7..9e922177 100644 --- a/docs/source/released_model.md +++ b/docs/source/released_model.md @@ -25,7 +25,7 @@ Model | Pre-Train Method | Pre-Train Data | Finetune Data | Size | Descriptions [Wav2vec2-large-960h-lv60-self Model](https://paddlespeech.bj.bcebos.com/wav2vec/wav2vec2-large-960h-lv60-self.pdparams) | wav2vec2 | Librispeech and LV-60k Dataset (5.3w h) | - | 1.18 GB |Pre-trained Wav2vec2.0 Model | - | - | - | [Wav2vec2ASR-large-960h-librispeech Model](https://paddlespeech.bj.bcebos.com/s2t/librispeech/asr3/wav2vec2ASR-large-960h-librispeech_ckpt_1.3.1.model.tar.gz) | wav2vec2 | Librispeech and LV-60k Dataset (5.3w h) | Librispeech (960 h) | 718 MB |Encoder: Wav2vec2.0, Decoder: CTC, Decoding method: Greedy search | - | 0.0189 | [Wav2vecASR Librispeech ASR3](../../examples/librispeech/asr3) | [Wav2vec2-large-wenetspeech-self Model](https://paddlespeech.bj.bcebos.com/s2t/aishell/asr3/wav2vec2-large-wenetspeech-self_ckpt_1.3.0.model.tar.gz) | wav2vec2 | Wenetspeech Dataset (1w h) | - | 714 MB |Pre-trained Wav2vec2.0 Model | - | - | - | -[Wav2vec2ASR-large-aishell1 Model](https://paddlespeech.bj.bcebos.com/s2t/aishell/asr3/wav2vec2ASR-large-aishell1_ckpt_1.3.0.model.tar.gz) | wav2vec2 | Wenetspeech Dataset (1w h) | aishell1 (train set) | 1.17 GB |Encoder: Wav2vec2.0, Decoder: CTC, Decoding method: Greedy search | 0.0453 | - | - | +[Wav2vec2ASR-large-aishell1 Model](https://paddlespeech.bj.bcebos.com/s2t/aishell/asr3/wav2vec2ASR-large-aishell1_ckpt_1.4.0.model.tar.gz) | wav2vec2 | Wenetspeech Dataset (1w h) | aishell1 (train set) | 1.18 GB |Encoder: Wav2vec2.0, Decoder: CTC, Decoding method: Greedy search | 0.0510 | - | - | ### Whisper Model Demo Link | Training Data | Size | Descriptions | CER | Model diff --git a/examples/aishell/asr3/README.md b/examples/aishell/asr3/README.md index e5806d62..f6fa60d7 100644 --- a/examples/aishell/asr3/README.md +++ b/examples/aishell/asr3/README.md @@ -164,8 +164,8 @@ using the `tar` scripts to unpack the model and then you can use the script to t For example: ```bash -wget https://paddlespeech.bj.bcebos.com/s2t/aishell/asr3/wav2vec2ASR-large-aishell1_ckpt_1.3.0.model.tar.gz -tar xzvf wav2vec2ASR-large-aishell1_ckpt_1.3.0.model.tar.gz +wget https://paddlespeech.bj.bcebos.com/s2t/aishell/asr3/wav2vec2ASR-large-aishell1_ckpt_1.4.0.model.tar.gz +tar xzvf wav2vec2ASR-large-aishell1_ckpt_1.4.0.model.tar.gz source path.sh # If you have process the data and get the manifest file, you can skip the following 2 steps bash local/data.sh --stage -1 --stop_stage -1 @@ -185,8 +185,8 @@ In some situations, you want to use the trained model to do the inference for th ``` you can train the model by yourself using ```bash run.sh --stage 0 --stop_stage 3```, or you can download the pretrained model through the script below: ```bash -wget https://paddlespeech.bj.bcebos.com/s2t/aishell/asr3/wav2vec2ASR-large-aishell1_ckpt_1.3.0.model.tar.gz -tar xzvf wav2vec2ASR-large-aishell1_ckpt_1.3.0.model.tar.gz +wget https://paddlespeech.bj.bcebos.com/s2t/aishell/asr3/wav2vec2ASR-large-aishell1_ckpt_1.4.0.model.tar.gz +tar xzvf wav2vec2ASR-large-aishell1_ckpt_1.4.0.model.tar.gz ``` You can download the audio demo: ```bash diff --git a/examples/aishell/asr3/RESULT.md b/examples/aishell/asr3/RESULT.md new file mode 100644 index 00000000..1291ef15 --- /dev/null +++ b/examples/aishell/asr3/RESULT.md @@ -0,0 +1,17 @@ +# AISHELL + +## Version + +* paddle version: develop (commit id: daea892c67e85da91906864de40ce9f6f1b893ae) +* paddlespeech version: develop (commit id: c14b4238b256693281e59605abff7c9435b3e2b2) + +## Device +* python: 3.7 +* cuda: 10.2 +* cudnn: 7.6 + +## Result +train: Epoch 80, 2*V100-32G, batchsize:5 +| Model | Params | Config | Augmentation| Test set | Decode method | WER | +| --- | --- | --- | --- | --- | --- | --- | +| wav2vec2ASR | 324.49 M | conf/wav2vec2ASR.yaml | spec_aug | test-set | greedy search | 5.1009 | diff --git a/paddlespeech/resource/pretrained_models.py b/paddlespeech/resource/pretrained_models.py index dd5f08b0..04df1862 100644 --- a/paddlespeech/resource/pretrained_models.py +++ b/paddlespeech/resource/pretrained_models.py @@ -103,6 +103,22 @@ ssl_dynamic_pretrained_models = { 'exp/wav2vec2ASR/checkpoints/avg_1.pdparams', }, }, + "wav2vec2ASR_aishell1-zh-16k": { + '1.4': { + 'url': + 'https://paddlespeech.bj.bcebos.com/s2t/aishell/asr3/wav2vec2ASR-large-aishell1_ckpt_1.4.0.model.tar.gz', + 'md5': + '9f0bc943adb822789bf61e674b229d17', + 'cfg_path': + 'model.yaml', + 'ckpt_path': + 'exp/wav2vec2ASR/checkpoints/avg_1', + 'model': + 'exp/wav2vec2ASR/checkpoints/avg_1.pdparams', + 'params': + 'exp/wav2vec2ASR/checkpoints/avg_1.pdparams', + }, + }, } # --------------------------------- @@ -1644,8 +1660,8 @@ tts_static_pretrained_models["pwgan_male-en"] = tts_static_pretrained_models[ "pwgan_male-mix"] = tts_static_pretrained_models["pwgan_male-zh"] tts_static_pretrained_models["hifigan_male-en"] = tts_static_pretrained_models[ "hifigan_male-mix"] = tts_static_pretrained_models["hifigan_male-zh"] -tts_static_pretrained_models["pwgan_aishell3-canton"] = tts_static_pretrained_models[ - "pwgan_aishell3-zh"] +tts_static_pretrained_models[ + "pwgan_aishell3-canton"] = tts_static_pretrained_models["pwgan_aishell3-zh"] tts_onnx_pretrained_models = { # speedyspeech @@ -1979,8 +1995,9 @@ tts_onnx_pretrained_models["pwgan_male_onnx-en"] = tts_onnx_pretrained_models[ tts_onnx_pretrained_models["hifigan_male_onnx-en"] = tts_onnx_pretrained_models[ "hifigan_male_onnx-mix"] = tts_onnx_pretrained_models[ "hifigan_male_onnx-zh"] -tts_onnx_pretrained_models["pwgan_aishell3_onnx-canton"] = tts_onnx_pretrained_models[ - "pwgan_aishell3_onnx-zh"] +tts_onnx_pretrained_models[ + "pwgan_aishell3_onnx-canton"] = tts_onnx_pretrained_models[ + "pwgan_aishell3_onnx-zh"] # --------------------------------- # ------------ Vector ------------- @@ -2058,10 +2075,10 @@ rhy_frontend_models = { # --------------------------------- StarGANv2VC_source = { - '1.0' :{ - 'url': 'https://paddlespeech.bj.bcebos.com/Parakeet/released_models/starganv2vc/StarGANv2VC_source.zip', - 'md5': '195e169419163f5648030ba84c71f866', - + '1.0': { + 'url': + 'https://paddlespeech.bj.bcebos.com/Parakeet/released_models/starganv2vc/StarGANv2VC_source.zip', + 'md5': + '195e169419163f5648030ba84c71f866', } } -