From 6e1ac1cc159cd4ee3ffcb5c7861b858cf854623d Mon Sep 17 00:00:00 2001 From: KP <109694228@qq.com> Date: Fri, 26 Nov 2021 17:06:57 +0800 Subject: [PATCH] Add paddlespeech.cls and esc50 example. --- examples/esc50/README.md | 15 ++++++--------- examples/esc50/cls0/local/infer.sh | 10 ++++------ .../esc50/cls0/local/static_model_infer.sh | 1 - examples/esc50/cls0/local/train.sh | 6 ++---- examples/esc50/cls0/path.sh | 2 +- examples/esc50/cls0/run.sh | 18 +++++++----------- .../cls/exps/{PANNs => panns}/__init__.py | 0 .../exps/{PANNs => panns}/deploy/__init__.py | 0 .../exps/{PANNs => panns}/deploy/predict.py | 0 .../cls/exps/{PANNs => panns}/export_model.py | 0 .../cls/exps/{PANNs => panns}/predict.py | 2 -- .../cls/exps/{PANNs => panns}/train.py | 2 -- paddlespeech/cls/models/__init__.py | 2 +- .../cls/models/{PANNs => panns}/__init__.py | 0 .../cls/models/{PANNs => panns}/classifier.py | 0 .../cls/models/{PANNs => panns}/panns.py | 0 16 files changed, 21 insertions(+), 37 deletions(-) rename paddlespeech/cls/exps/{PANNs => panns}/__init__.py (100%) rename paddlespeech/cls/exps/{PANNs => panns}/deploy/__init__.py (100%) rename paddlespeech/cls/exps/{PANNs => panns}/deploy/predict.py (100%) rename paddlespeech/cls/exps/{PANNs => panns}/export_model.py (100%) rename paddlespeech/cls/exps/{PANNs => panns}/predict.py (94%) rename paddlespeech/cls/exps/{PANNs => panns}/train.py (97%) rename paddlespeech/cls/models/{PANNs => panns}/__init__.py (100%) rename paddlespeech/cls/models/{PANNs => panns}/classifier.py (100%) rename paddlespeech/cls/models/{PANNs => panns}/panns.py (100%) diff --git a/examples/esc50/README.md b/examples/esc50/README.md index aa283845..66409754 100644 --- a/examples/esc50/README.md +++ b/examples/esc50/README.md @@ -28,7 +28,7 @@ PaddleAudio提供了PANNs的CNN14、CNN10和CNN6的预训练模型,可供用 $ CUDA_VISIBLE_DEVICES=0 ./run.sh 1 ``` -`paddlespeech/cls/exps/PANNs/train.py` 脚本中可支持配置的参数: +`paddlespeech/cls/exps/panns/train.py` 脚本中可支持配置的参数: - `device`: 指定模型预测时使用的设备。 - `feat_backend`: 选择提取特征的后端,可选`'numpy'`或`'paddle'`,默认为`'numpy'`。 @@ -65,7 +65,7 @@ model = SoundClassifier(backbone, num_class=len(ESC50.label_list)) $ CUDA_VISIBLE_DEVICES=0 ./run.sh 2 ``` -`paddlespeech/cls/exps/PANNs/predict.py` 脚本中可支持配置的参数: +`paddlespeech/cls/exps/panns/predict.py` 脚本中可支持配置的参数: - `device`: 指定模型预测时使用的设备。 - `wav`: 指定预测的音频文件。 @@ -91,7 +91,7 @@ Cat: 6.579841738130199e-06 $ CUDA_VISIBLE_DEVICES=0 ./run.sh 3 ``` -`paddlespeech/cls/exps/PANNs/export_model.py` 脚本中可支持配置的参数: +`paddlespeech/cls/exps/panns/export_model.py` 脚本中可支持配置的参数: - `checkpoint`: 模型参数checkpoint文件。 - `output_dir`: 导出静态图模型和参数文件的保存目录。 @@ -106,16 +106,13 @@ export #### 2. 模型部署和预测 -`paddlespeech/cls/exps/PANNs/deploy/predict.py` 脚本使用了`paddle.inference`模块下的api,提供了python端部署的示例: +`paddlespeech/cls/exps/panns/deploy/predict.py` 脚本使用了`paddle.inference`模块下的api,提供了python端部署的示例: ```shell -$ CUDA_VISIBLE_DEVICES=0 ./run.sh 3 -``` -```sh -python paddlespeech/cls/exps/PANNs/deploy/predict.py --model_dir ./export --device gpu +$ CUDA_VISIBLE_DEVICES=0 ./run.sh 4 ``` -`paddlespeech/cls/exps/PANNs/deploy/predict.py` 脚本中可支持配置的主要参数: +`paddlespeech/cls/exps/panns/deploy/predict.py` 脚本中可支持配置的主要参数: - `device`: 指定模型预测时使用的设备。 - `model_dir`: 导出静态图模型和参数文件的保存目录。 - `wav`: 指定预测的音频文件。 diff --git a/examples/esc50/cls0/local/infer.sh b/examples/esc50/cls0/local/infer.sh index 57fc157a..bc03d681 100755 --- a/examples/esc50/cls0/local/infer.sh +++ b/examples/esc50/cls0/local/infer.sh @@ -1,13 +1,11 @@ #!/bin/bash -device=$1 -audio_file=$2 -ckpt_dir=$3 -feat_backend=$4 +audio_file=$1 +ckpt_dir=$2 +feat_backend=$3 python3 ${BIN_DIR}/predict.py \ ---device ${device} \ --wav ${audio_file} \ --feat_backend ${feat_backend} \ --top_k 10 \ ---checkpoint ${ckpt_dir}/model.pdparams \ No newline at end of file +--checkpoint ${ckpt_dir}/model.pdparams diff --git a/examples/esc50/cls0/local/static_model_infer.sh b/examples/esc50/cls0/local/static_model_infer.sh index ba4eeda4..9b3abb5d 100755 --- a/examples/esc50/cls0/local/static_model_infer.sh +++ b/examples/esc50/cls0/local/static_model_infer.sh @@ -8,4 +8,3 @@ python3 ${BIN_DIR}/deploy/predict.py \ --device ${device} \ --model_dir ${model_dir} \ --wav ${audio_file} - diff --git a/examples/esc50/cls0/local/train.sh b/examples/esc50/cls0/local/train.sh index 19490472..0f0f3d09 100755 --- a/examples/esc50/cls0/local/train.sh +++ b/examples/esc50/cls0/local/train.sh @@ -1,15 +1,14 @@ #!/bin/bash ngpu=$1 -device=$2 -feat_backend=$3 +feat_backend=$2 num_epochs=50 batch_size=16 ckpt_dir=./checkpoint save_freq=10 -if [ ${ngpu} -gt 1 ]; then +if [ ${ngpu} -gt 0 ]; then python3 -m paddle.distributed.launch --gpus $CUDA_VISIBLE_DEVICES ${BIN_DIR}/train.py \ --epochs ${num_epochs} \ --feat_backend ${feat_backend} \ @@ -18,7 +17,6 @@ if [ ${ngpu} -gt 1 ]; then --save_freq ${save_freq} else python3 ${BIN_DIR}/train.py \ - --device ${device} \ --epochs ${num_epochs} \ --feat_backend ${feat_backend} \ --batch_size ${batch_size} \ diff --git a/examples/esc50/cls0/path.sh b/examples/esc50/cls0/path.sh index 2cc73e27..3eff28e4 100644 --- a/examples/esc50/cls0/path.sh +++ b/examples/esc50/cls0/path.sh @@ -9,5 +9,5 @@ export PYTHONDONTWRITEBYTECODE=1 export PYTHONIOENCODING=UTF-8 export PYTHONPATH=${MAIN_ROOT}:${PYTHONPATH} -MODEL=PANNs +MODEL=panns export BIN_DIR=${MAIN_ROOT}/paddlespeech/cls/exps/${MODEL} \ No newline at end of file diff --git a/examples/esc50/cls0/run.sh b/examples/esc50/cls0/run.sh index 63ba99f4..7283aa8d 100755 --- a/examples/esc50/cls0/run.sh +++ b/examples/esc50/cls0/run.sh @@ -3,35 +3,31 @@ set -e source path.sh ngpu=$(echo $CUDA_VISIBLE_DEVICES | awk -F "," '{print NF}') -if [ ${ngpu} == 0 ];then - device=cpu -else - device=gpu -fi stage=$1 stop_stage=100 feat_backend=numpy +audio_file=~/cat.wav +ckpt_dir=./checkpoint/epoch_50 +output_dir=./export +infer_device=cpu if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then - ./local/train.sh ${ngpu} ${device} ${feat_backend} || exit -1 + ./local/train.sh ${ngpu} ${feat_backend} || exit -1 exit 0 fi -audio_file=~/cat.wav -ckpt_dir=./checkpoint/epoch_50 if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then - ./local/infer.sh ${device} ${audio_file} ${ckpt_dir} ${feat_backend} || exit -1 + ./local/infer.sh ${audio_file} ${ckpt_dir} ${feat_backend} || exit -1 exit 0 fi -output_dir=./export if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then ./local/export.sh ${ckpt_dir} ${output_dir} || exit -1 exit 0 fi if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ]; then - ./local/static_model_infer.sh ${device} ${output_dir} ${audio_file} || exit -1 + ./local/static_model_infer.sh ${infer_device} ${output_dir} ${audio_file} || exit -1 exit 0 fi diff --git a/paddlespeech/cls/exps/PANNs/__init__.py b/paddlespeech/cls/exps/panns/__init__.py similarity index 100% rename from paddlespeech/cls/exps/PANNs/__init__.py rename to paddlespeech/cls/exps/panns/__init__.py diff --git a/paddlespeech/cls/exps/PANNs/deploy/__init__.py b/paddlespeech/cls/exps/panns/deploy/__init__.py similarity index 100% rename from paddlespeech/cls/exps/PANNs/deploy/__init__.py rename to paddlespeech/cls/exps/panns/deploy/__init__.py diff --git a/paddlespeech/cls/exps/PANNs/deploy/predict.py b/paddlespeech/cls/exps/panns/deploy/predict.py similarity index 100% rename from paddlespeech/cls/exps/PANNs/deploy/predict.py rename to paddlespeech/cls/exps/panns/deploy/predict.py diff --git a/paddlespeech/cls/exps/PANNs/export_model.py b/paddlespeech/cls/exps/panns/export_model.py similarity index 100% rename from paddlespeech/cls/exps/PANNs/export_model.py rename to paddlespeech/cls/exps/panns/export_model.py diff --git a/paddlespeech/cls/exps/PANNs/predict.py b/paddlespeech/cls/exps/panns/predict.py similarity index 94% rename from paddlespeech/cls/exps/PANNs/predict.py rename to paddlespeech/cls/exps/panns/predict.py index 717b35ed..9cfd8b6c 100644 --- a/paddlespeech/cls/exps/PANNs/predict.py +++ b/paddlespeech/cls/exps/panns/predict.py @@ -26,7 +26,6 @@ from paddlespeech.cls.models import SoundClassifier # yapf: disable parser = argparse.ArgumentParser(__doc__) -parser.add_argument('--device', choices=['cpu', 'gpu'], default="gpu", help="Select which device to predict, defaults to gpu.") parser.add_argument("--wav", type=str, required=True, help="Audio file to infer.") parser.add_argument("--feat_backend", type=str, choices=['numpy', 'paddle'], default='numpy', help="Choose backend to extract features from audio files.") parser.add_argument("--top_k", type=int, default=1, help="Show top k predicted results") @@ -51,7 +50,6 @@ def extract_features(file: str, feat_backend: str='numpy', if __name__ == '__main__': - paddle.set_device(args.device) model = SoundClassifier( backbone=cnn14(pretrained=False, extract_embedding=True), diff --git a/paddlespeech/cls/exps/PANNs/train.py b/paddlespeech/cls/exps/panns/train.py similarity index 97% rename from paddlespeech/cls/exps/PANNs/train.py rename to paddlespeech/cls/exps/panns/train.py index e66724b8..12130978 100644 --- a/paddlespeech/cls/exps/PANNs/train.py +++ b/paddlespeech/cls/exps/panns/train.py @@ -25,7 +25,6 @@ from paddlespeech.cls.models import SoundClassifier # yapf: disable parser = argparse.ArgumentParser(__doc__) -parser.add_argument('--device', choices=['cpu', 'gpu'], default="gpu", help="Select which device to train model, defaults to gpu.") parser.add_argument("--epochs", type=int, default=50, help="Number of epoches for fine-tuning.") parser.add_argument("--feat_backend", type=str, choices=['numpy', 'paddle'], default='numpy', help="Choose backend to extract features from audio files.") parser.add_argument("--learning_rate", type=float, default=5e-5, help="Learning rate used to train with warmup.") @@ -38,7 +37,6 @@ args = parser.parse_args() # yapf: enable if __name__ == "__main__": - paddle.set_device(args.device) nranks = paddle.distributed.get_world_size() if paddle.distributed.get_world_size() > 1: paddle.distributed.init_parallel_env() diff --git a/paddlespeech/cls/models/__init__.py b/paddlespeech/cls/models/__init__.py index 66030b72..4bfadda1 100644 --- a/paddlespeech/cls/models/__init__.py +++ b/paddlespeech/cls/models/__init__.py @@ -11,4 +11,4 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from .PANNs import * +from .panns import * diff --git a/paddlespeech/cls/models/PANNs/__init__.py b/paddlespeech/cls/models/panns/__init__.py similarity index 100% rename from paddlespeech/cls/models/PANNs/__init__.py rename to paddlespeech/cls/models/panns/__init__.py diff --git a/paddlespeech/cls/models/PANNs/classifier.py b/paddlespeech/cls/models/panns/classifier.py similarity index 100% rename from paddlespeech/cls/models/PANNs/classifier.py rename to paddlespeech/cls/models/panns/classifier.py diff --git a/paddlespeech/cls/models/PANNs/panns.py b/paddlespeech/cls/models/panns/panns.py similarity index 100% rename from paddlespeech/cls/models/PANNs/panns.py rename to paddlespeech/cls/models/panns/panns.py