From fa74e1ebc68c77f81c238e7107b658dfaa5ee5f9 Mon Sep 17 00:00:00 2001 From: zxcd <228587199@qq.com> Date: Fri, 27 Sep 2024 03:41:07 +0000 Subject: [PATCH] fix --- README.md | 2 +- paddlespeech/cls/exps/panns/deploy/predict.py | 7 +++++-- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 19ec61cb0..5197ff4c7 100644 --- a/README.md +++ b/README.md @@ -7,7 +7,7 @@ - + diff --git a/paddlespeech/cls/exps/panns/deploy/predict.py b/paddlespeech/cls/exps/panns/deploy/predict.py index f14b44215..866a669e8 100644 --- a/paddlespeech/cls/exps/panns/deploy/predict.py +++ b/paddlespeech/cls/exps/panns/deploy/predict.py @@ -17,7 +17,7 @@ import os import numpy as np from paddle import inference from paddle.audio.datasets import ESC50 -from paddle.audio.features import MelSpectrogram +from paddle.audio.features import LogMelSpectrogram from paddleaudio.backends import soundfile_load as load_audio from scipy.special import softmax @@ -53,7 +53,10 @@ def extract_features(files: str, **kwargs): pad_width = max_length - len(waveforms[i]) waveforms[i] = np.pad(waveforms[i], pad_width=(0, pad_width)) - feat = MelSpectrogram(waveforms[i], sr, **kwargs).transpose() + feature_extractor = LogMelSpectrogram(sr, **kwargs) + feat = feature_extractor(paddle.to_tensor(waveforms[i])) + feat = paddle.transpose(feat, perm=[1, 0]).unsqueeze(0) + feats.append(feat) return np.stack(feats, axis=0)