From 382503fcb33603d61e16bfd8dca8d795362e0f7c Mon Sep 17 00:00:00 2001 From: YangZhou Date: Thu, 29 Sep 2022 12:46:27 +0800 Subject: [PATCH] update READMe --- examples/tess/README.md | 28 +++++++++---------- .../cls0/conf/panns_logmelspectrogram.yaml | 2 +- .../tess/cls0/conf/panns_melspectrogram.yaml | 2 +- examples/tess/cls0/conf/panns_mfcc.yaml | 2 +- .../tess/cls0/conf/panns_spectrogram.yaml | 2 +- 5 files changed, 18 insertions(+), 18 deletions(-) diff --git a/examples/tess/README.md b/examples/tess/README.md index f56ab8d40..ef180f84d 100644 --- a/examples/tess/README.md +++ b/examples/tess/README.md @@ -1,6 +1,12 @@ # 背景 -模型任务与模型间接请参见 examples/esc50, 本目录是为了校验和测试 paddle.audio 的feature, backend等相关模块而建立. +TESS音频情绪分类任务. +从而校验和测试 paddle.audio 的feature, backend等相关模块. + +本实验采用了PaddleSpeech提供了PANNs的CNN14的预训练模型进行finetune: +- CNN14: 该模型主要包含12个卷积层和2个全连接层,模型参数的数量为 79.6M,embbedding维度是 2048。 + +`PANNs`([PANNs: Large-Scale Pretrained Audio Neural Networks for Audio Pattern Recognition](https://arxiv.org/pdf/1912.10211.pdf))是基于Audioset数据集训练的声音分类/识别的模型。经过预训练后,模型可以用于提取音频的embbedding。本示例将使用`PANNs`的预训练模型Finetune完成声音分类的任务。 ## 数据集 @@ -8,20 +14,14 @@ ## 模型指标 -根据 `TESS` 提供的fold信息,对数据集进行 5-fold 的 fine-tune 2 epoch 训练和评估,dev准确率如下: - -|Model|feat_type|Acc| -|--|--|--| -|CNN14| mfcc | 0.8304 | -|CNN14| logmelspectrogram | 0.9893 | -|CNN14| spectrogram| 0.1304 | -|CNN14| melspectrogram| 0.1339 | +根据 `TESS` 提供的fold信息,对数据集进行 5-fold 的 fine-tune 训练和评估,dev准确率如下: -因为是功能验证,所以只config中训练了 2 个epoch. -log_melspectrogram feature 在迭代 3 个epoch后, acc可以达到0.9983%. -mfcc feature 在迭代3个epoch后, acc可以达到0.9983%. -spectrogram feature 在迭代11个epoch后,acc可达0.95%. -melspectrogram feature 在迭代17个epoch后,acc可到0.9375%. +|Model|feat_type|Acc| note | +|--|--|--| -- | +|CNN14| mfcc | 0.9929 |3 epoch | +|CNN14| logmelspectrogram | 0.9983 | 3 epoch | +|CNN14| spectrogram| 0.95 | 11 epoch | +|CNN14| melspectrogram| 0.9375 | 17 epoch | ### 模型训练 diff --git a/examples/tess/cls0/conf/panns_logmelspectrogram.yaml b/examples/tess/cls0/conf/panns_logmelspectrogram.yaml index ba953c235..c48e517ea 100644 --- a/examples/tess/cls0/conf/panns_logmelspectrogram.yaml +++ b/examples/tess/cls0/conf/panns_logmelspectrogram.yaml @@ -23,7 +23,7 @@ feature: n_mels: 64 training: - epochs: 2 + epochs: 5 learning_rate: 0.0005 num_workers: 2 batch_size: 128 diff --git a/examples/tess/cls0/conf/panns_melspectrogram.yaml b/examples/tess/cls0/conf/panns_melspectrogram.yaml index a5d53d3d6..66aa4a717 100644 --- a/examples/tess/cls0/conf/panns_melspectrogram.yaml +++ b/examples/tess/cls0/conf/panns_melspectrogram.yaml @@ -23,7 +23,7 @@ feature: n_mels: 64 training: - epochs: 2 + epochs: 10 learning_rate: 0.0005 num_workers: 2 batch_size: 128 diff --git a/examples/tess/cls0/conf/panns_mfcc.yaml b/examples/tess/cls0/conf/panns_mfcc.yaml index 08b1387d9..6800e3abc 100644 --- a/examples/tess/cls0/conf/panns_mfcc.yaml +++ b/examples/tess/cls0/conf/panns_mfcc.yaml @@ -24,7 +24,7 @@ feature: n_mels: 64 training: - epochs: 2 + epochs: 5 learning_rate: 0.0005 num_workers: 2 batch_size: 128 diff --git a/examples/tess/cls0/conf/panns_spectrogram.yaml b/examples/tess/cls0/conf/panns_spectrogram.yaml index a4a6a7bc7..8d88f41c4 100644 --- a/examples/tess/cls0/conf/panns_spectrogram.yaml +++ b/examples/tess/cls0/conf/panns_spectrogram.yaml @@ -19,7 +19,7 @@ feature: window: 'hann' training: - epochs: 2 + epochs: 10 learning_rate: 0.0005 num_workers: 2 batch_size: 128