Add paddlespeech.cls and esc50 example.

4 years ago · 33f0e7622c
parent 476f05c424
commit 33f0e7622c
14 changed files with 100 additions and 24 deletions
--- a/examples/esc50/README.md
+++ b/examples/esc50/README.md
@ -30,7 +30,7 @@ $ CUDA_VISIBLE_DEVICES=0 ./run.sh 1
 `paddlespeech/cls/exps/PANNs/train.py` 脚本中可支持配置的参数：
- `device`: 选用什么设备进行训练，可选cpu或gpu，默认为gpu。如使用gpu训练则参数gpus指定GPU卡号。
+- `device`: 指定模型预测时使用的设备。
 - `feat_backend`: 选择提取特征的后端，可选`'numpy'`或`'paddle'`，默认为`'numpy'`。
 - `epochs`: 训练轮次，默认为50。
 - `learning_rate`: Fine-tune的学习率；默认为5e-5。
@ -42,8 +42,8 @@ $ CUDA_VISIBLE_DEVICES=0 ./run.sh 1
 示例代码中使用的预训练模型为`CNN14`，如果想更换为其他预训练模型，可通过以下方式执行：
 ```python
-from model import SoundClassifier
+from paddleaudio.datasets import ESC50
-from paddlespeech.cls.datasets import ESC50
+from paddlespeech.cls.models import SoundClassifier
 from paddlespeech.cls.models import cnn14, cnn10, cnn6
 # CNN14
@ -67,7 +67,7 @@ $ CUDA_VISIBLE_DEVICES=0 ./run.sh 2
 `paddlespeech/cls/exps/PANNs/predict.py` 脚本中可支持配置的参数：
- `device`: 选用什么设备进行训练，可选cpu或gpu，默认为gpu。如使用gpu训练则参数gpus指定GPU卡号。
+- `device`: 指定模型预测时使用的设备。
 - `wav`: 指定预测的音频文件。
 - `feat_backend`: 选择提取特征的后端，可选`'numpy'`或`'paddle'`，默认为`'numpy'`。
 - `top_k`: 预测显示的top k标签的得分，默认为1。
@ -88,10 +88,10 @@ Cat: 6.579841738130199e-06
 模型训练结束后，可以将已保存的动态图参数导出成静态图的模型和参数，然后实施静态图的部署。
 ```shell
-python -u export_model.py --checkpoint ./checkpoint/epoch_50/model.pdparams --output_dir ./export
+$ CUDA_VISIBLE_DEVICES=0 ./run.sh 3
 ```
-可支持配置的参数：
+`paddlespeech/cls/exps/PANNs/export_model.py` 脚本中可支持配置的参数：
 - `checkpoint`: 模型参数checkpoint文件。
 - `output_dir`: 导出静态图模型和参数文件的保存目录。
@ -106,8 +106,16 @@ export
 #### 2. 模型部署和预测
-`deploy/python/predict.py` 脚本使用了`paddle.inference`模块下的api，提供了python端部署的示例：
+`paddlespeech/cls/exps/PANNs/deploy/predict.py` 脚本使用了`paddle.inference`模块下的api，提供了python端部署的示例：
 ```shell
 $ CUDA_VISIBLE_DEVICES=0 ./run.sh 3
 ```
 ```sh
-python deploy/python/predict.py --model_dir ./export --device gpu
+python paddlespeech/cls/exps/PANNs/deploy/predict.py --model_dir ./export --device gpu
 ```
 `paddlespeech/cls/exps/PANNs/deploy/predict.py` 脚本中可支持配置的主要参数：
 - `device`: 指定模型预测时使用的设备。
 - `model_dir`: 导出静态图模型和参数文件的保存目录。
 - `wav`: 指定预测的音频文件。
--- a/examples/esc50/cls0/local/export.sh
+++ b/examples/esc50/cls0/local/export.sh
@ -0,0 +1,8 @@
 #!/bin/bash
 ckpt_dir=$1
 output_dir=$2
 python3 ${BIN_DIR}/export_model.py \
 --checkpoint ${ckpt_dir}/model.pdparams \
 --output_dir ${output_dir}
--- a/examples/esc50/cls0/local/static_model_infer.sh
+++ b/examples/esc50/cls0/local/static_model_infer.sh
@ -0,0 +1,11 @@
 #!/bin/bash
 device=$1
 model_dir=$2
 audio_file=$3
 python3 ${BIN_DIR}/deploy/predict.py \
 --device ${device} \
 --model_dir ${model_dir} \
 --wav ${audio_file} 
--- a/examples/esc50/cls0/run.sh
+++ b/examples/esc50/cls0/run.sh
@ -15,13 +15,23 @@ feat_backend=numpy
 if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
    ./local/train.sh ${ngpu} ${device} ${feat_backend} || exit -1
    exit 0
 fi
 audio_file=~/cat.wav
 ckpt_dir=./checkpoint/epoch_50
 if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
    ./local/infer.sh ${device} ${audio_file} ${ckpt_dir} ${feat_backend} || exit -1
    exit 0
 fi
 output_dir=./export
 if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
    ./local/export.sh ${ckpt_dir} ${output_dir} || exit -1
    exit 0
 fi
-exit 0
+if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ]; then
    ./local/static_model_infer.sh ${device} ${output_dir} ${audio_file} || exit -1
    exit 0
 fi
--- a/paddlespeech/cls/exps/PANNs/init.py
+++ b/paddlespeech/cls/exps/PANNs/init.py
@ -11,4 +11,3 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 from .panns import *
--- a/paddlespeech/cls/exps/PANNs/deploy/init.py
+++ b/paddlespeech/cls/exps/PANNs/deploy/init.py
@ -0,0 +1,13 @@
 # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
--- a/paddlespeech/cls/exps/PANNs/deploy/python/predict.py
+++ b/paddlespeech/cls/exps/PANNs/deploy/python/predict.py
@ -18,15 +18,16 @@ import numpy as np
 from paddle import inference
 from scipy.special import softmax
-from paddlespeech.cls.backends import load as load_audio
+from paddleaudio.backends import load as load_audio
-from paddlespeech.cls.datasets import ESC50
+from paddleaudio.datasets import ESC50
-from paddlespeech.cls.features import melspectrogram
+from paddleaudio.features import melspectrogram
 # yapf: disable
 parser = argparse.ArgumentParser()
 parser.add_argument("--model_dir", type=str, required=True, default="./export", help="The directory to static model.")
 parser.add_argument("--batch_size", type=int, default=2, help="Batch size per GPU/CPU for training.")
 parser.add_argument('--device', choices=['cpu', 'gpu', 'xpu'], default="gpu", help="Select which device to train model, defaults to gpu.")
 parser.add_argument("--wav", type=str, required=True, help="Audio file to infer.")
 parser.add_argument("--batch_size", type=int, default=1, help="Batch size per GPU/CPU for training.")
 parser.add_argument('--use_tensorrt', type=eval, default=False, choices=[True, False], help='Enable to use tensorrt to speed up.')
 parser.add_argument("--precision", type=str, default="fp32", choices=["fp32", "fp16"], help='The tensorrt precision.')
 parser.add_argument('--cpu_threads', type=int, default=10, help='Number of threads to predict when using cpu.')
@ -132,10 +133,7 @@ if __name__ == "__main__":
                          args.use_tensorrt, args.precision, args.cpu_threads,
                          args.enable_mkldnn)
-    wavs = [
+    wavs = [args.wav]
        '~/audio_demo_resource/cat.wav',
        '~/audio_demo_resource/dog.wav',
    ]
    for i in range(len(wavs)):
        wavs[i] = os.path.abspath(os.path.expanduser(wavs[i]))
--- a/paddlespeech/cls/exps/PANNs/export_model.py
+++ b/paddlespeech/cls/exps/PANNs/export_model.py
@ -16,9 +16,9 @@ import os
 import paddle
 from .model import SoundClassifier
 from .panns import cnn14
 from paddleaudio.datasets import ESC50
 from paddlespeech.cls.models import cnn14
 from paddlespeech.cls.models import SoundClassifier
 # yapf: disable
 parser = argparse.ArgumentParser(__doc__)
--- a/paddlespeech/cls/exps/PANNs/predict.py
+++ b/paddlespeech/cls/exps/PANNs/predict.py
@ -16,13 +16,13 @@ import argparse
 import numpy as np
 import paddle
 import paddle.nn.functional as F
 from model import SoundClassifier
 from panns import cnn14
 from paddleaudio.backends import load as load_audio
 from paddleaudio.datasets import ESC50
 from paddleaudio.features import LogMelSpectrogram
 from paddleaudio.features import melspectrogram
 from paddlespeech.cls.models import cnn14
 from paddlespeech.cls.models import SoundClassifier
 # yapf: disable
 parser = argparse.ArgumentParser(__doc__)
--- a/paddlespeech/cls/exps/PANNs/train.py
+++ b/paddlespeech/cls/exps/PANNs/train.py
@ -15,13 +15,13 @@ import argparse
 import os
 import paddle
 from model import SoundClassifier
 from panns import cnn14
 from paddleaudio.datasets import ESC50
 from paddleaudio.features import LogMelSpectrogram
 from paddleaudio.utils import logger
 from paddleaudio.utils import Timer
 from paddlespeech.cls.models import cnn14
 from paddlespeech.cls.models import SoundClassifier
 # yapf: disable
 parser = argparse.ArgumentParser(__doc__)
--- a/paddlespeech/cls/models/PANNs/init.py
+++ b/paddlespeech/cls/models/PANNs/init.py
@ -0,0 +1,15 @@
 # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 from .classifier import *
 from .panns import *
--- a/paddlespeech/cls/models/PANNs/classifier.py
+++ b/paddlespeech/cls/models/PANNs/classifier.py
--- a/paddlespeech/cls/models/PANNs/panns.py
+++ b/paddlespeech/cls/models/PANNs/panns.py
--- a/paddlespeech/cls/models/init.py
+++ b/paddlespeech/cls/models/init.py
@ -0,0 +1,14 @@
 # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 from .PANNs import *