diff --git a/examples/esc50/cls0/conf/panns.yaml b/examples/esc50/cls0/conf/panns.yaml index 3a9d42aa5..a0668b27f 100644 --- a/examples/esc50/cls0/conf/panns.yaml +++ b/examples/esc50/cls0/conf/panns.yaml @@ -1,5 +1,5 @@ data: - dataset: 'paddleaudio.datasets:ESC50' + dataset: 'paddle.audio.datasets:ESC50' num_classes: 50 train: mode: 'train' @@ -33,4 +33,4 @@ training: predicting: audio_file: '/audio/dog.wav' top_k: 10 - checkpoint: './checkpoint/epoch_50/model.pdparams' \ No newline at end of file + checkpoint: './checkpoint/epoch_50/model.pdparams' diff --git a/examples/tess/cls0/local/train.py b/examples/tess/cls0/local/train.py new file mode 100644 index 000000000..c1f0e7e43 --- /dev/null +++ b/examples/tess/cls0/local/train.py @@ -0,0 +1,184 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import argparse +import os + +import paddle +import yaml + +from paddleaudio.utils import logger +from paddleaudio.utils import Timer +from paddlespeech.cls.models import SoundClassifier +from paddlespeech.utils.dynamic_import import dynamic_import + +# yapf: disable +parser = argparse.ArgumentParser(__doc__) +parser.add_argument("--cfg_path", type=str, required=True) +args = parser.parse_args() +# yapf: enable + +def _collate_features(batch): + # (feat, label) + # (( n_mels, length), label) + feats = [] + labels = [] + lengths = [] + for sample in batch: + feats.append(paddle.transpose(sample[0], perm=[1,0])) + lengths.append(sample[0].shape[1]) + labels.append(sample[1]) + + max_length = max(lengths) + for i in range(len(feats)): + feats[i] = paddle.nn.functional.pad( + feats[i], [0, max_length - feats[i].shape[0], 0, 0], + data_format='NLC') + + return paddle.stack(feats), paddle.to_tensor( + labels), paddle.to_tensor(lengths) + +if __name__ == "__main__": + nranks = paddle.distributed.get_world_size() + if paddle.distributed.get_world_size() > 1: + paddle.distributed.init_parallel_env() + local_rank = paddle.distributed.get_rank() + + args.cfg_path = os.path.abspath(os.path.expanduser(args.cfg_path)) + with open(args.cfg_path, 'r') as f: + config = yaml.safe_load(f) + + model_conf = config['model'] + data_conf = config['data'] + feat_conf = config['feature'] + training_conf = config['training'] + + # Dataset + ds_class = dynamic_import(data_conf['dataset']) + train_ds = ds_class(**data_conf['train']) + dev_ds = ds_class(**data_conf['dev']) + train_sampler = paddle.io.DistributedBatchSampler( + train_ds, + batch_size=training_conf['batch_size'], + shuffle=True, + drop_last=False) + train_loader = paddle.io.DataLoader( + train_ds, + batch_sampler=train_sampler, + num_workers=training_conf['num_workers'], + return_list=True, + use_buffer_reader=True, + collate_fn=_collate_features) + + # Model + backbone_class = dynamic_import(model_conf['backbone']) + backbone = backbone_class(pretrained=True, extract_embedding=True) + model = SoundClassifier(backbone, num_class=data_conf['num_classes']) + model = paddle.DataParallel(model) + optimizer = paddle.optimizer.Adam( + learning_rate=training_conf['learning_rate'], + parameters=model.parameters()) + criterion = paddle.nn.loss.CrossEntropyLoss() + + steps_per_epoch = len(train_sampler) + timer = Timer(steps_per_epoch * training_conf['epochs']) + timer.start() + + for epoch in range(1, training_conf['epochs'] + 1): + model.train() + + avg_loss = 0 + num_corrects = 0 + num_samples = 0 + for batch_idx, batch in enumerate(train_loader): + feats, labels, length = batch # feats(N, length, n_mels) + + logits = model(feats) + + loss = criterion(logits, labels) + loss.backward() + optimizer.step() + if isinstance(optimizer._learning_rate, + paddle.optimizer.lr.LRScheduler): + optimizer._learning_rate.step() + optimizer.clear_grad() + + # Calculate loss + avg_loss += loss.numpy()[0] + + # Calculate metrics + preds = paddle.argmax(logits, axis=1) + num_corrects += (preds == labels).numpy().sum() + num_samples += feats.shape[0] + + timer.count() + + if (batch_idx + 1 + ) % training_conf['log_freq'] == 0 and local_rank == 0: + lr = optimizer.get_lr() + avg_loss /= training_conf['log_freq'] + avg_acc = num_corrects / num_samples + + print_msg = 'Epoch={}/{}, Step={}/{}'.format( + epoch, training_conf['epochs'], batch_idx + 1, + steps_per_epoch) + print_msg += ' loss={:.4f}'.format(avg_loss) + print_msg += ' acc={:.4f}'.format(avg_acc) + print_msg += ' lr={:.6f} step/sec={:.2f} | ETA {}'.format( + lr, timer.timing, timer.eta) + logger.train(print_msg) + + avg_loss = 0 + num_corrects = 0 + num_samples = 0 + + if epoch % training_conf[ + 'save_freq'] == 0 and batch_idx + 1 == steps_per_epoch and local_rank == 0: + dev_sampler = paddle.io.BatchSampler( + dev_ds, + batch_size=training_conf['batch_size'], + shuffle=False, + drop_last=False) + dev_loader = paddle.io.DataLoader( + dev_ds, + batch_sampler=dev_sampler, + num_workers=training_conf['num_workers'], + return_list=True, ) + + model.eval() + num_corrects = 0 + num_samples = 0 + with logger.processing('Evaluation on validation dataset'): + for batch_idx, batch in enumerate(dev_loader): + waveforms, labels = batch + feats = feature_extractor(waveforms) + + logits = model(feats) + + preds = paddle.argmax(logits, axis=1) + num_corrects += (preds == labels).numpy().sum() + num_samples += feats.shape[0] + + print_msg = '[Evaluation result]' + print_msg += ' dev_acc={:.4f}'.format(num_corrects / num_samples) + + logger.eval(print_msg) + + # Save model + save_dir = os.path.join(training_conf['checkpoint_dir'], + 'epoch_{}'.format(epoch)) + logger.info('Saving model checkpoint to {}'.format(save_dir)) + paddle.save(model.state_dict(), + os.path.join(save_dir, 'model.pdparams')) + paddle.save(optimizer.state_dict(), + os.path.join(save_dir, 'model.pdopt')) diff --git a/examples/tess/cls0/local/train.sh b/examples/tess/cls0/local/train.sh new file mode 100755 index 000000000..953c56bf8 --- /dev/null +++ b/examples/tess/cls0/local/train.sh @@ -0,0 +1,12 @@ +#!/bin/bash + +ngpu=$1 +cfg_path=$2 + +if [ ${ngpu} -gt 0 ]; then + python3 -m paddle.distributed.launch --gpus $CUDA_VISIBLE_DEVICES local/train.py \ + --cfg_path ${cfg_path} +else + python3 local/train.py \ + --cfg_path ${cfg_path} +fi diff --git a/examples/tess/cls0/path.sh b/examples/tess/cls0/path.sh new file mode 100644 index 000000000..3eff28e48 --- /dev/null +++ b/examples/tess/cls0/path.sh @@ -0,0 +1,13 @@ +#!/bin/bash +export MAIN_ROOT=`realpath ${PWD}/../../../` + +export PATH=${MAIN_ROOT}:${MAIN_ROOT}/utils:${PATH} +export LC_ALL=C + +export PYTHONDONTWRITEBYTECODE=1 +# Use UTF-8 in Python to avoid UnicodeDecodeError when LC_ALL=C +export PYTHONIOENCODING=UTF-8 +export PYTHONPATH=${MAIN_ROOT}:${PYTHONPATH} + +MODEL=panns +export BIN_DIR=${MAIN_ROOT}/paddlespeech/cls/exps/${MODEL} \ No newline at end of file diff --git a/examples/tess/cls0/run.sh b/examples/tess/cls0/run.sh new file mode 100755 index 000000000..0e407b40e --- /dev/null +++ b/examples/tess/cls0/run.sh @@ -0,0 +1,35 @@ +#!/bin/bash +set -e +source path.sh + +ngpu=$(echo $CUDA_VISIBLE_DEVICES | awk -F "," '{print NF}') + +stage=$1 +stop_stage=100 + +if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then + cfg_path=$2 + ./local/train.sh ${ngpu} ${cfg_path} || exit -1 + exit 0 +fi + +if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then + cfg_path=$2 + ./local/infer.sh ${cfg_path} || exit -1 + exit 0 +fi + +if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then + ckpt=$2 + output_dir=$3 + ./local/export.sh ${ckpt} ${output_dir} || exit -1 + exit 0 +fi + +if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ]; then + infer_device=$2 + graph_dir=$3 + audio_file=$4 + ./local/static_model_infer.sh ${infer_device} ${graph_dir} ${audio_file} || exit -1 + exit 0 +fi diff --git a/paddlespeech/cls/exps/panns/train.py b/paddlespeech/cls/exps/panns/train.py index 9258ab516..ab942b2a3 100644 --- a/paddlespeech/cls/exps/panns/train.py +++ b/paddlespeech/cls/exps/panns/train.py @@ -17,9 +17,9 @@ import os import paddle import yaml -from paddleaudio.features import LogMelSpectrogram +from paddle.audio.features import LogMelSpectrogram from paddleaudio.utils import logger -from paddlesaudio.utils import Timer +from paddleaudio.utils import Timer from paddlespeech.cls.models import SoundClassifier from paddlespeech.utils.dynamic_import import dynamic_import