From 33f0e7622ca250d3e520bcf316ddd2d0c9a04cc0 Mon Sep 17 00:00:00 2001 From: KP <109694228@qq.com> Date: Thu, 25 Nov 2021 22:24:30 +0800 Subject: [PATCH] Add paddlespeech.cls and esc50 example. --- examples/esc50/README.md | 24 ++++++++++++------- examples/esc50/cls0/local/export.sh | 8 +++++++ .../esc50/cls0/local/static_model_infer.sh | 11 +++++++++ examples/esc50/cls0/run.sh | 12 +++++++++- paddlespeech/cls/exps/PANNs/__init__.py | 1 - .../cls/exps/PANNs/deploy/__init__.py | 13 ++++++++++ .../exps/PANNs/deploy/{python => }/predict.py | 14 +++++------ paddlespeech/cls/exps/PANNs/export_model.py | 4 ++-- paddlespeech/cls/exps/PANNs/predict.py | 4 ++-- paddlespeech/cls/exps/PANNs/train.py | 4 ++-- paddlespeech/cls/models/PANNs/__init__.py | 15 ++++++++++++ .../model.py => models/PANNs/classifier.py} | 0 .../cls/{exps => models}/PANNs/panns.py | 0 paddlespeech/cls/models/__init__.py | 14 +++++++++++ 14 files changed, 100 insertions(+), 24 deletions(-) create mode 100755 examples/esc50/cls0/local/export.sh create mode 100755 examples/esc50/cls0/local/static_model_infer.sh create mode 100644 paddlespeech/cls/exps/PANNs/deploy/__init__.py rename paddlespeech/cls/exps/PANNs/deploy/{python => }/predict.py (94%) create mode 100644 paddlespeech/cls/models/PANNs/__init__.py rename paddlespeech/cls/{exps/PANNs/model.py => models/PANNs/classifier.py} (100%) rename paddlespeech/cls/{exps => models}/PANNs/panns.py (100%) create mode 100644 paddlespeech/cls/models/__init__.py diff --git a/examples/esc50/README.md b/examples/esc50/README.md index 3cf93259..aa283845 100644 --- a/examples/esc50/README.md +++ b/examples/esc50/README.md @@ -30,7 +30,7 @@ $ CUDA_VISIBLE_DEVICES=0 ./run.sh 1 `paddlespeech/cls/exps/PANNs/train.py` 脚本中可支持配置的参数: -- `device`: 选用什么设备进行训练,可选cpu或gpu,默认为gpu。如使用gpu训练则参数gpus指定GPU卡号。 +- `device`: 指定模型预测时使用的设备。 - `feat_backend`: 选择提取特征的后端,可选`'numpy'`或`'paddle'`,默认为`'numpy'`。 - `epochs`: 训练轮次,默认为50。 - `learning_rate`: Fine-tune的学习率;默认为5e-5。 @@ -42,8 +42,8 @@ $ CUDA_VISIBLE_DEVICES=0 ./run.sh 1 示例代码中使用的预训练模型为`CNN14`,如果想更换为其他预训练模型,可通过以下方式执行: ```python -from model import SoundClassifier -from paddlespeech.cls.datasets import ESC50 +from paddleaudio.datasets import ESC50 +from paddlespeech.cls.models import SoundClassifier from paddlespeech.cls.models import cnn14, cnn10, cnn6 # CNN14 @@ -67,7 +67,7 @@ $ CUDA_VISIBLE_DEVICES=0 ./run.sh 2 `paddlespeech/cls/exps/PANNs/predict.py` 脚本中可支持配置的参数: -- `device`: 选用什么设备进行训练,可选cpu或gpu,默认为gpu。如使用gpu训练则参数gpus指定GPU卡号。 +- `device`: 指定模型预测时使用的设备。 - `wav`: 指定预测的音频文件。 - `feat_backend`: 选择提取特征的后端,可选`'numpy'`或`'paddle'`,默认为`'numpy'`。 - `top_k`: 预测显示的top k标签的得分,默认为1。 @@ -88,10 +88,10 @@ Cat: 6.579841738130199e-06 模型训练结束后,可以将已保存的动态图参数导出成静态图的模型和参数,然后实施静态图的部署。 ```shell -python -u export_model.py --checkpoint ./checkpoint/epoch_50/model.pdparams --output_dir ./export +$ CUDA_VISIBLE_DEVICES=0 ./run.sh 3 ``` -可支持配置的参数: +`paddlespeech/cls/exps/PANNs/export_model.py` 脚本中可支持配置的参数: - `checkpoint`: 模型参数checkpoint文件。 - `output_dir`: 导出静态图模型和参数文件的保存目录。 @@ -106,8 +106,16 @@ export #### 2. 模型部署和预测 -`deploy/python/predict.py` 脚本使用了`paddle.inference`模块下的api,提供了python端部署的示例: +`paddlespeech/cls/exps/PANNs/deploy/predict.py` 脚本使用了`paddle.inference`模块下的api,提供了python端部署的示例: +```shell +$ CUDA_VISIBLE_DEVICES=0 ./run.sh 3 +``` ```sh -python deploy/python/predict.py --model_dir ./export --device gpu +python paddlespeech/cls/exps/PANNs/deploy/predict.py --model_dir ./export --device gpu ``` + +`paddlespeech/cls/exps/PANNs/deploy/predict.py` 脚本中可支持配置的主要参数: +- `device`: 指定模型预测时使用的设备。 +- `model_dir`: 导出静态图模型和参数文件的保存目录。 +- `wav`: 指定预测的音频文件。 diff --git a/examples/esc50/cls0/local/export.sh b/examples/esc50/cls0/local/export.sh new file mode 100755 index 00000000..160dc743 --- /dev/null +++ b/examples/esc50/cls0/local/export.sh @@ -0,0 +1,8 @@ +#!/bin/bash + +ckpt_dir=$1 +output_dir=$2 + +python3 ${BIN_DIR}/export_model.py \ +--checkpoint ${ckpt_dir}/model.pdparams \ +--output_dir ${output_dir} diff --git a/examples/esc50/cls0/local/static_model_infer.sh b/examples/esc50/cls0/local/static_model_infer.sh new file mode 100755 index 00000000..ba4eeda4 --- /dev/null +++ b/examples/esc50/cls0/local/static_model_infer.sh @@ -0,0 +1,11 @@ +#!/bin/bash + +device=$1 +model_dir=$2 +audio_file=$3 + +python3 ${BIN_DIR}/deploy/predict.py \ +--device ${device} \ +--model_dir ${model_dir} \ +--wav ${audio_file} + diff --git a/examples/esc50/cls0/run.sh b/examples/esc50/cls0/run.sh index e75ad517..63ba99f4 100755 --- a/examples/esc50/cls0/run.sh +++ b/examples/esc50/cls0/run.sh @@ -15,13 +15,23 @@ feat_backend=numpy if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then ./local/train.sh ${ngpu} ${device} ${feat_backend} || exit -1 + exit 0 fi audio_file=~/cat.wav ckpt_dir=./checkpoint/epoch_50 if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then ./local/infer.sh ${device} ${audio_file} ${ckpt_dir} ${feat_backend} || exit -1 + exit 0 fi +output_dir=./export +if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then + ./local/export.sh ${ckpt_dir} ${output_dir} || exit -1 + exit 0 +fi -exit 0 \ No newline at end of file +if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ]; then + ./local/static_model_infer.sh ${device} ${output_dir} ${audio_file} || exit -1 + exit 0 +fi diff --git a/paddlespeech/cls/exps/PANNs/__init__.py b/paddlespeech/cls/exps/PANNs/__init__.py index 4bfadda1..185a92b8 100644 --- a/paddlespeech/cls/exps/PANNs/__init__.py +++ b/paddlespeech/cls/exps/PANNs/__init__.py @@ -11,4 +11,3 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from .panns import * diff --git a/paddlespeech/cls/exps/PANNs/deploy/__init__.py b/paddlespeech/cls/exps/PANNs/deploy/__init__.py new file mode 100644 index 00000000..185a92b8 --- /dev/null +++ b/paddlespeech/cls/exps/PANNs/deploy/__init__.py @@ -0,0 +1,13 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/paddlespeech/cls/exps/PANNs/deploy/python/predict.py b/paddlespeech/cls/exps/PANNs/deploy/predict.py similarity index 94% rename from paddlespeech/cls/exps/PANNs/deploy/python/predict.py rename to paddlespeech/cls/exps/PANNs/deploy/predict.py index 13730acd..d4e5c22f 100644 --- a/paddlespeech/cls/exps/PANNs/deploy/python/predict.py +++ b/paddlespeech/cls/exps/PANNs/deploy/predict.py @@ -18,15 +18,16 @@ import numpy as np from paddle import inference from scipy.special import softmax -from paddlespeech.cls.backends import load as load_audio -from paddlespeech.cls.datasets import ESC50 -from paddlespeech.cls.features import melspectrogram +from paddleaudio.backends import load as load_audio +from paddleaudio.datasets import ESC50 +from paddleaudio.features import melspectrogram # yapf: disable parser = argparse.ArgumentParser() parser.add_argument("--model_dir", type=str, required=True, default="./export", help="The directory to static model.") -parser.add_argument("--batch_size", type=int, default=2, help="Batch size per GPU/CPU for training.") parser.add_argument('--device', choices=['cpu', 'gpu', 'xpu'], default="gpu", help="Select which device to train model, defaults to gpu.") +parser.add_argument("--wav", type=str, required=True, help="Audio file to infer.") +parser.add_argument("--batch_size", type=int, default=1, help="Batch size per GPU/CPU for training.") parser.add_argument('--use_tensorrt', type=eval, default=False, choices=[True, False], help='Enable to use tensorrt to speed up.') parser.add_argument("--precision", type=str, default="fp32", choices=["fp32", "fp16"], help='The tensorrt precision.') parser.add_argument('--cpu_threads', type=int, default=10, help='Number of threads to predict when using cpu.') @@ -132,10 +133,7 @@ if __name__ == "__main__": args.use_tensorrt, args.precision, args.cpu_threads, args.enable_mkldnn) - wavs = [ - '~/audio_demo_resource/cat.wav', - '~/audio_demo_resource/dog.wav', - ] + wavs = [args.wav] for i in range(len(wavs)): wavs[i] = os.path.abspath(os.path.expanduser(wavs[i])) diff --git a/paddlespeech/cls/exps/PANNs/export_model.py b/paddlespeech/cls/exps/PANNs/export_model.py index 4dac5237..c295c6a3 100644 --- a/paddlespeech/cls/exps/PANNs/export_model.py +++ b/paddlespeech/cls/exps/PANNs/export_model.py @@ -16,9 +16,9 @@ import os import paddle -from .model import SoundClassifier -from .panns import cnn14 from paddleaudio.datasets import ESC50 +from paddlespeech.cls.models import cnn14 +from paddlespeech.cls.models import SoundClassifier # yapf: disable parser = argparse.ArgumentParser(__doc__) diff --git a/paddlespeech/cls/exps/PANNs/predict.py b/paddlespeech/cls/exps/PANNs/predict.py index 2d97ab1b..717b35ed 100644 --- a/paddlespeech/cls/exps/PANNs/predict.py +++ b/paddlespeech/cls/exps/PANNs/predict.py @@ -16,13 +16,13 @@ import argparse import numpy as np import paddle import paddle.nn.functional as F -from model import SoundClassifier -from panns import cnn14 from paddleaudio.backends import load as load_audio from paddleaudio.datasets import ESC50 from paddleaudio.features import LogMelSpectrogram from paddleaudio.features import melspectrogram +from paddlespeech.cls.models import cnn14 +from paddlespeech.cls.models import SoundClassifier # yapf: disable parser = argparse.ArgumentParser(__doc__) diff --git a/paddlespeech/cls/exps/PANNs/train.py b/paddlespeech/cls/exps/PANNs/train.py index a3fb01ef..e66724b8 100644 --- a/paddlespeech/cls/exps/PANNs/train.py +++ b/paddlespeech/cls/exps/PANNs/train.py @@ -15,13 +15,13 @@ import argparse import os import paddle -from model import SoundClassifier -from panns import cnn14 from paddleaudio.datasets import ESC50 from paddleaudio.features import LogMelSpectrogram from paddleaudio.utils import logger from paddleaudio.utils import Timer +from paddlespeech.cls.models import cnn14 +from paddlespeech.cls.models import SoundClassifier # yapf: disable parser = argparse.ArgumentParser(__doc__) diff --git a/paddlespeech/cls/models/PANNs/__init__.py b/paddlespeech/cls/models/PANNs/__init__.py new file mode 100644 index 00000000..638f772f --- /dev/null +++ b/paddlespeech/cls/models/PANNs/__init__.py @@ -0,0 +1,15 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from .classifier import * +from .panns import * diff --git a/paddlespeech/cls/exps/PANNs/model.py b/paddlespeech/cls/models/PANNs/classifier.py similarity index 100% rename from paddlespeech/cls/exps/PANNs/model.py rename to paddlespeech/cls/models/PANNs/classifier.py diff --git a/paddlespeech/cls/exps/PANNs/panns.py b/paddlespeech/cls/models/PANNs/panns.py similarity index 100% rename from paddlespeech/cls/exps/PANNs/panns.py rename to paddlespeech/cls/models/PANNs/panns.py diff --git a/paddlespeech/cls/models/__init__.py b/paddlespeech/cls/models/__init__.py new file mode 100644 index 00000000..66030b72 --- /dev/null +++ b/paddlespeech/cls/models/__init__.py @@ -0,0 +1,14 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from .PANNs import *