Add paddlespeech.cls and esc50 example.

3 years ago · 33f0e7622c
parent 476f05c424
commit 33f0e7622c
14 changed files with 100 additions and 24 deletions
--- a/examples/esc50/README.md
+++ b/examples/esc50/README.md
@ -30,7 +30,7 @@ $ CUDA_VISIBLE_DEVICES=0 ./run.sh 1

 `paddlespeech/cls/exps/PANNs/train.py` 脚本中可支持配置的参数：

- `device`: 选用什么设备进行训练，可选cpu或gpu，默认为gpu。如使用gpu训练则参数gpus指定GPU卡号。
+- `device`: 指定模型预测时使用的设备。
 - `feat_backend`: 选择提取特征的后端，可选`'numpy'`或`'paddle'`，默认为`'numpy'`。
 - `epochs`: 训练轮次，默认为50。
 - `learning_rate`: Fine-tune的学习率；默认为5e-5。
@ -42,8 +42,8 @@ $ CUDA_VISIBLE_DEVICES=0 ./run.sh 1

 示例代码中使用的预训练模型为`CNN14`，如果想更换为其他预训练模型，可通过以下方式执行：
 ```python
-from model import SoundClassifier
-from paddlespeech.cls.datasets import ESC50
+from paddleaudio.datasets import ESC50
+from paddlespeech.cls.models import SoundClassifier
 from paddlespeech.cls.models import cnn14, cnn10, cnn6

 # CNN14
@ -67,7 +67,7 @@ $ CUDA_VISIBLE_DEVICES=0 ./run.sh 2

 `paddlespeech/cls/exps/PANNs/predict.py` 脚本中可支持配置的参数：

- `device`: 选用什么设备进行训练，可选cpu或gpu，默认为gpu。如使用gpu训练则参数gpus指定GPU卡号。
+- `device`: 指定模型预测时使用的设备。
 - `wav`: 指定预测的音频文件。
 - `feat_backend`: 选择提取特征的后端，可选`'numpy'`或`'paddle'`，默认为`'numpy'`。
 - `top_k`: 预测显示的top k标签的得分，默认为1。
@ -88,10 +88,10 @@ Cat: 6.579841738130199e-06
 模型训练结束后，可以将已保存的动态图参数导出成静态图的模型和参数，然后实施静态图的部署。

 ```shell
-python -u export_model.py --checkpoint ./checkpoint/epoch_50/model.pdparams --output_dir ./export
+$ CUDA_VISIBLE_DEVICES=0 ./run.sh 3
 ```

-可支持配置的参数：
+`paddlespeech/cls/exps/PANNs/export_model.py` 脚本中可支持配置的参数：
 - `checkpoint`: 模型参数checkpoint文件。
 - `output_dir`: 导出静态图模型和参数文件的保存目录。

@ -106,8 +106,16 @@ export

 #### 2. 模型部署和预测

-`deploy/python/predict.py` 脚本使用了`paddle.inference`模块下的api，提供了python端部署的示例：
+`paddlespeech/cls/exps/PANNs/deploy/predict.py` 脚本使用了`paddle.inference`模块下的api，提供了python端部署的示例：

+```shell
+$ CUDA_VISIBLE_DEVICES=0 ./run.sh 3
+```
 ```sh
-python deploy/python/predict.py --model_dir ./export --device gpu
+python paddlespeech/cls/exps/PANNs/deploy/predict.py --model_dir ./export --device gpu
 ```
+
+`paddlespeech/cls/exps/PANNs/deploy/predict.py` 脚本中可支持配置的主要参数：
+- `device`: 指定模型预测时使用的设备。
+- `model_dir`: 导出静态图模型和参数文件的保存目录。
+- `wav`: 指定预测的音频文件。
--- a/examples/esc50/cls0/local/export.sh
+++ b/examples/esc50/cls0/local/export.sh
@ -0,0 +1,8 @@
+#!/bin/bash
+
+ckpt_dir=$1
+output_dir=$2
+
+python3 ${BIN_DIR}/export_model.py \
+--checkpoint ${ckpt_dir}/model.pdparams \
+--output_dir ${output_dir}
--- a/examples/esc50/cls0/local/static_model_infer.sh
+++ b/examples/esc50/cls0/local/static_model_infer.sh
@ -0,0 +1,11 @@
+#!/bin/bash
+
+device=$1
+model_dir=$2
+audio_file=$3
+
+python3 ${BIN_DIR}/deploy/predict.py \
+--device ${device} \
+--model_dir ${model_dir} \
+--wav ${audio_file} 
+
--- a/examples/esc50/cls0/run.sh
+++ b/examples/esc50/cls0/run.sh
@ -15,13 +15,23 @@ feat_backend=numpy

 if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
    ./local/train.sh ${ngpu} ${device} ${feat_backend} || exit -1
+    exit 0
 fi

 audio_file=~/cat.wav
 ckpt_dir=./checkpoint/epoch_50
 if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
    ./local/infer.sh ${device} ${audio_file} ${ckpt_dir} ${feat_backend} || exit -1
+    exit 0
 fi

+output_dir=./export
+if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
+    ./local/export.sh ${ckpt_dir} ${output_dir} || exit -1
+    exit 0
+fi

-exit 0
+if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ]; then
+    ./local/static_model_infer.sh ${device} ${output_dir} ${audio_file} || exit -1
+    exit 0
+fi
--- a/paddlespeech/cls/exps/PANNs/init.py
+++ b/paddlespeech/cls/exps/PANNs/init.py
@ -11,4 +11,3 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from .panns import *
--- a/paddlespeech/cls/exps/PANNs/deploy/init.py
+++ b/paddlespeech/cls/exps/PANNs/deploy/init.py
@ -0,0 +1,13 @@
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
--- a/paddlespeech/cls/exps/PANNs/deploy/python/predict.py
+++ b/paddlespeech/cls/exps/PANNs/deploy/python/predict.py
@ -18,15 +18,16 @@ import numpy as np
 from paddle import inference
 from scipy.special import softmax

-from paddlespeech.cls.backends import load as load_audio
-from paddlespeech.cls.datasets import ESC50
-from paddlespeech.cls.features import melspectrogram
+from paddleaudio.backends import load as load_audio
+from paddleaudio.datasets import ESC50
+from paddleaudio.features import melspectrogram

 # yapf: disable
 parser = argparse.ArgumentParser()
 parser.add_argument("--model_dir", type=str, required=True, default="./export", help="The directory to static model.")
-parser.add_argument("--batch_size", type=int, default=2, help="Batch size per GPU/CPU for training.")
 parser.add_argument('--device', choices=['cpu', 'gpu', 'xpu'], default="gpu", help="Select which device to train model, defaults to gpu.")
+parser.add_argument("--wav", type=str, required=True, help="Audio file to infer.")
+parser.add_argument("--batch_size", type=int, default=1, help="Batch size per GPU/CPU for training.")
 parser.add_argument('--use_tensorrt', type=eval, default=False, choices=[True, False], help='Enable to use tensorrt to speed up.')
 parser.add_argument("--precision", type=str, default="fp32", choices=["fp32", "fp16"], help='The tensorrt precision.')
 parser.add_argument('--cpu_threads', type=int, default=10, help='Number of threads to predict when using cpu.')
@ -132,10 +133,7 @@ if __name__ == "__main__":
                          args.use_tensorrt, args.precision, args.cpu_threads,
                          args.enable_mkldnn)

-    wavs = [
-        '~/audio_demo_resource/cat.wav',
-        '~/audio_demo_resource/dog.wav',
-    ]
+    wavs = [args.wav]

    for i in range(len(wavs)):
        wavs[i] = os.path.abspath(os.path.expanduser(wavs[i]))
--- a/paddlespeech/cls/exps/PANNs/export_model.py
+++ b/paddlespeech/cls/exps/PANNs/export_model.py
@ -16,9 +16,9 @@ import os

 import paddle

-from .model import SoundClassifier
-from .panns import cnn14
 from paddleaudio.datasets import ESC50
+from paddlespeech.cls.models import cnn14
+from paddlespeech.cls.models import SoundClassifier

 # yapf: disable
 parser = argparse.ArgumentParser(__doc__)
--- a/paddlespeech/cls/exps/PANNs/predict.py
+++ b/paddlespeech/cls/exps/PANNs/predict.py
@ -16,13 +16,13 @@ import argparse
 import numpy as np
 import paddle
 import paddle.nn.functional as F
-from model import SoundClassifier
-from panns import cnn14

 from paddleaudio.backends import load as load_audio
 from paddleaudio.datasets import ESC50
 from paddleaudio.features import LogMelSpectrogram
 from paddleaudio.features import melspectrogram
+from paddlespeech.cls.models import cnn14
+from paddlespeech.cls.models import SoundClassifier

 # yapf: disable
 parser = argparse.ArgumentParser(__doc__)
--- a/paddlespeech/cls/exps/PANNs/train.py
+++ b/paddlespeech/cls/exps/PANNs/train.py
@ -15,13 +15,13 @@ import argparse
 import os

 import paddle
-from model import SoundClassifier
-from panns import cnn14

 from paddleaudio.datasets import ESC50
 from paddleaudio.features import LogMelSpectrogram
 from paddleaudio.utils import logger
 from paddleaudio.utils import Timer
+from paddlespeech.cls.models import cnn14
+from paddlespeech.cls.models import SoundClassifier

 # yapf: disable
 parser = argparse.ArgumentParser(__doc__)
--- a/paddlespeech/cls/models/PANNs/init.py
+++ b/paddlespeech/cls/models/PANNs/init.py
@ -0,0 +1,15 @@
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from .classifier import *
+from .panns import *
--- a/paddlespeech/cls/models/PANNs/classifier.py
+++ b/paddlespeech/cls/models/PANNs/classifier.py
--- a/paddlespeech/cls/models/PANNs/panns.py
+++ b/paddlespeech/cls/models/PANNs/panns.py
--- a/paddlespeech/cls/models/init.py
+++ b/paddlespeech/cls/models/init.py
@ -0,0 +1,14 @@
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from .PANNs import *