From 3e9f141f099e727e5736e7f08c2223751955773f Mon Sep 17 00:00:00 2001 From: jiamingkong Date: Wed, 24 May 2023 21:49:12 +0800 Subject: [PATCH] Adapted wavlmASR model to pretrained weights and CLI --- examples/librispeech/asr5/README.md | 4 ++-- paddlespeech/resource/model_alias.py | 1 + paddlespeech/resource/pretrained_models.py | 10 ++++++++++ paddlespeech/s2t/models/wavlm/__init__.py | 2 ++ 4 files changed, 15 insertions(+), 2 deletions(-) create mode 100644 paddlespeech/s2t/models/wavlm/__init__.py diff --git a/examples/librispeech/asr5/README.md b/examples/librispeech/asr5/README.md index c684ca547..826c33cec 100644 --- a/examples/librispeech/asr5/README.md +++ b/examples/librispeech/asr5/README.md @@ -184,8 +184,8 @@ In some situations, you want to use the trained model to do the inference for th ``` you can train the model by yourself using ```bash run.sh --stage 0 --stop_stage 3```, or you can download the pretrained model through the script below: ```bash -wget https://paddlespeech.bj.bcebos.com/wavlm/wavlmASR-base-100h-librispeech_ckpt_1.4.0.model.tar.gz -tar xzvf wavlmASR-base-100h-librispeech_ckpt_1.4.0.model.tar.gz +wget https://paddlespeech.bj.bcebos.com/wavlm/wavlm_baseplus_libriclean_100h.tar.gz +tar xzvf wavlm_baseplus_libriclean_100h.tar.gz ``` You can download the audio demo: ```bash diff --git a/paddlespeech/resource/model_alias.py b/paddlespeech/resource/model_alias.py index 04872c72e..6bf9b588e 100644 --- a/paddlespeech/resource/model_alias.py +++ b/paddlespeech/resource/model_alias.py @@ -25,6 +25,7 @@ model_alias = { "wav2vec2": ["paddlespeech.s2t.models.wav2vec2:Wav2vec2Base"], "hubertASR": ["paddlespeech.s2t.models.hubert:HubertASR"], "hubert": ["paddlespeech.s2t.models.hubert:HubertBase"], + "wavlmASR": ["paddlespeech.s2t.models.wavlm:WavLMASR"], # --------------------------------- # -------------- ASR -------------- diff --git a/paddlespeech/resource/pretrained_models.py b/paddlespeech/resource/pretrained_models.py index e56188640..5a68563fe 100644 --- a/paddlespeech/resource/pretrained_models.py +++ b/paddlespeech/resource/pretrained_models.py @@ -149,6 +149,16 @@ ssl_dynamic_pretrained_models = { 'exp/hubertASR/checkpoints/avg_1.pdparams', }, }, + "wavlmASR_librispeech-en-16k": { + "1.0": { + "url": "https://paddlespeech.bj.bcebos.com/wavlm/wavlm_baseplus_libriclean_100h.tar.gz", + "md5": "cfa5157302d449f1dc960ad9719d72fa", + "cfg_path": "model.yaml", + "ckpt_path": "exp/wavlmASR/checkpoints/46", + "model": "exp/wavlmASR/checkpoints/46.pdparams", + "params": "exp/wavlmASR/checkpoints/46.pdparams", + } + } } # --------------------------------- diff --git a/paddlespeech/s2t/models/wavlm/__init__.py b/paddlespeech/s2t/models/wavlm/__init__.py new file mode 100644 index 000000000..cf69114ea --- /dev/null +++ b/paddlespeech/s2t/models/wavlm/__init__.py @@ -0,0 +1,2 @@ +from .wavlm_paddle import WavLM, WavLMConfig +from .wavlm_asr import WavLMASR, WavLMBase \ No newline at end of file