test tess dataset&& esc50 dataset by paddle2.4

pull/2468/head
YangZhou 3 years ago
parent 3169995ee8
commit 5819770914

@ -1,5 +1,5 @@
data:
dataset: 'paddleaudio.datasets:ESC50'
dataset: 'paddle.audio.datasets:ESC50'
num_classes: 50
train:
mode: 'train'

@ -0,0 +1,184 @@
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import argparse
import os
import paddle
import yaml
from paddleaudio.utils import logger
from paddleaudio.utils import Timer
from paddlespeech.cls.models import SoundClassifier
from paddlespeech.utils.dynamic_import import dynamic_import
# yapf: disable
parser = argparse.ArgumentParser(__doc__)
parser.add_argument("--cfg_path", type=str, required=True)
args = parser.parse_args()
# yapf: enable
def _collate_features(batch):
# (feat, label)
# (( n_mels, length), label)
feats = []
labels = []
lengths = []
for sample in batch:
feats.append(paddle.transpose(sample[0], perm=[1,0]))
lengths.append(sample[0].shape[1])
labels.append(sample[1])
max_length = max(lengths)
for i in range(len(feats)):
feats[i] = paddle.nn.functional.pad(
feats[i], [0, max_length - feats[i].shape[0], 0, 0],
data_format='NLC')
return paddle.stack(feats), paddle.to_tensor(
labels), paddle.to_tensor(lengths)
if __name__ == "__main__":
nranks = paddle.distributed.get_world_size()
if paddle.distributed.get_world_size() > 1:
paddle.distributed.init_parallel_env()
local_rank = paddle.distributed.get_rank()
args.cfg_path = os.path.abspath(os.path.expanduser(args.cfg_path))
with open(args.cfg_path, 'r') as f:
config = yaml.safe_load(f)
model_conf = config['model']
data_conf = config['data']
feat_conf = config['feature']
training_conf = config['training']
# Dataset
ds_class = dynamic_import(data_conf['dataset'])
train_ds = ds_class(**data_conf['train'])
dev_ds = ds_class(**data_conf['dev'])
train_sampler = paddle.io.DistributedBatchSampler(
train_ds,
batch_size=training_conf['batch_size'],
shuffle=True,
drop_last=False)
train_loader = paddle.io.DataLoader(
train_ds,
batch_sampler=train_sampler,
num_workers=training_conf['num_workers'],
return_list=True,
use_buffer_reader=True,
collate_fn=_collate_features)
# Model
backbone_class = dynamic_import(model_conf['backbone'])
backbone = backbone_class(pretrained=True, extract_embedding=True)
model = SoundClassifier(backbone, num_class=data_conf['num_classes'])
model = paddle.DataParallel(model)
optimizer = paddle.optimizer.Adam(
learning_rate=training_conf['learning_rate'],
parameters=model.parameters())
criterion = paddle.nn.loss.CrossEntropyLoss()
steps_per_epoch = len(train_sampler)
timer = Timer(steps_per_epoch * training_conf['epochs'])
timer.start()
for epoch in range(1, training_conf['epochs'] + 1):
model.train()
avg_loss = 0
num_corrects = 0
num_samples = 0
for batch_idx, batch in enumerate(train_loader):
feats, labels, length = batch # feats(N, length, n_mels)
logits = model(feats)
loss = criterion(logits, labels)
loss.backward()
optimizer.step()
if isinstance(optimizer._learning_rate,
paddle.optimizer.lr.LRScheduler):
optimizer._learning_rate.step()
optimizer.clear_grad()
# Calculate loss
avg_loss += loss.numpy()[0]
# Calculate metrics
preds = paddle.argmax(logits, axis=1)
num_corrects += (preds == labels).numpy().sum()
num_samples += feats.shape[0]
timer.count()
if (batch_idx + 1
) % training_conf['log_freq'] == 0 and local_rank == 0:
lr = optimizer.get_lr()
avg_loss /= training_conf['log_freq']
avg_acc = num_corrects / num_samples
print_msg = 'Epoch={}/{}, Step={}/{}'.format(
epoch, training_conf['epochs'], batch_idx + 1,
steps_per_epoch)
print_msg += ' loss={:.4f}'.format(avg_loss)
print_msg += ' acc={:.4f}'.format(avg_acc)
print_msg += ' lr={:.6f} step/sec={:.2f} | ETA {}'.format(
lr, timer.timing, timer.eta)
logger.train(print_msg)
avg_loss = 0
num_corrects = 0
num_samples = 0
if epoch % training_conf[
'save_freq'] == 0 and batch_idx + 1 == steps_per_epoch and local_rank == 0:
dev_sampler = paddle.io.BatchSampler(
dev_ds,
batch_size=training_conf['batch_size'],
shuffle=False,
drop_last=False)
dev_loader = paddle.io.DataLoader(
dev_ds,
batch_sampler=dev_sampler,
num_workers=training_conf['num_workers'],
return_list=True, )
model.eval()
num_corrects = 0
num_samples = 0
with logger.processing('Evaluation on validation dataset'):
for batch_idx, batch in enumerate(dev_loader):
waveforms, labels = batch
feats = feature_extractor(waveforms)
logits = model(feats)
preds = paddle.argmax(logits, axis=1)
num_corrects += (preds == labels).numpy().sum()
num_samples += feats.shape[0]
print_msg = '[Evaluation result]'
print_msg += ' dev_acc={:.4f}'.format(num_corrects / num_samples)
logger.eval(print_msg)
# Save model
save_dir = os.path.join(training_conf['checkpoint_dir'],
'epoch_{}'.format(epoch))
logger.info('Saving model checkpoint to {}'.format(save_dir))
paddle.save(model.state_dict(),
os.path.join(save_dir, 'model.pdparams'))
paddle.save(optimizer.state_dict(),
os.path.join(save_dir, 'model.pdopt'))

@ -0,0 +1,12 @@
#!/bin/bash
ngpu=$1
cfg_path=$2
if [ ${ngpu} -gt 0 ]; then
python3 -m paddle.distributed.launch --gpus $CUDA_VISIBLE_DEVICES local/train.py \
--cfg_path ${cfg_path}
else
python3 local/train.py \
--cfg_path ${cfg_path}
fi

@ -0,0 +1,13 @@
#!/bin/bash
export MAIN_ROOT=`realpath ${PWD}/../../../`
export PATH=${MAIN_ROOT}:${MAIN_ROOT}/utils:${PATH}
export LC_ALL=C
export PYTHONDONTWRITEBYTECODE=1
# Use UTF-8 in Python to avoid UnicodeDecodeError when LC_ALL=C
export PYTHONIOENCODING=UTF-8
export PYTHONPATH=${MAIN_ROOT}:${PYTHONPATH}
MODEL=panns
export BIN_DIR=${MAIN_ROOT}/paddlespeech/cls/exps/${MODEL}

@ -0,0 +1,35 @@
#!/bin/bash
set -e
source path.sh
ngpu=$(echo $CUDA_VISIBLE_DEVICES | awk -F "," '{print NF}')
stage=$1
stop_stage=100
if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
cfg_path=$2
./local/train.sh ${ngpu} ${cfg_path} || exit -1
exit 0
fi
if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
cfg_path=$2
./local/infer.sh ${cfg_path} || exit -1
exit 0
fi
if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
ckpt=$2
output_dir=$3
./local/export.sh ${ckpt} ${output_dir} || exit -1
exit 0
fi
if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ]; then
infer_device=$2
graph_dir=$3
audio_file=$4
./local/static_model_infer.sh ${infer_device} ${graph_dir} ${audio_file} || exit -1
exit 0
fi

@ -17,9 +17,9 @@ import os
import paddle
import yaml
from paddleaudio.features import LogMelSpectrogram
from paddle.audio.features import LogMelSpectrogram
from paddleaudio.utils import logger
from paddlesaudio.utils import Timer
from paddleaudio.utils import Timer
from paddlespeech.cls.models import SoundClassifier
from paddlespeech.utils.dynamic_import import dynamic_import

Loading…
Cancel
Save