pull/3856/head
zxcd 12 months ago
parent f66d7d25c4
commit fa74e1ebc6

@ -7,7 +7,7 @@
<a href="./LICENSE"><img src="https://img.shields.io/badge/license-Apache%202-red.svg"></a> <a href="./LICENSE"><img src="https://img.shields.io/badge/license-Apache%202-red.svg"></a>
<a href="https://github.com/PaddlePaddle/PaddleSpeech/releases"><img src="https://img.shields.io/github/v/release/PaddlePaddle/PaddleSpeech?color=ffa"></a> <a href="https://github.com/PaddlePaddle/PaddleSpeech/releases"><img src="https://img.shields.io/github/v/release/PaddlePaddle/PaddleSpeech?color=ffa"></a>
<a href="support os"><img src="https://img.shields.io/badge/os-linux%2C%20win%2C%20mac-pink.svg"></a> <a href="support os"><img src="https://img.shields.io/badge/os-linux%2C%20win%2C%20mac-pink.svg"></a>
<a href=""><img src="https://img.shields.io/badge/python-3.7+-aff.svg"></a> <a href=""><img src="https://img.shields.io/badge/python-3.8+-aff.svg"></a>
<a href="https://github.com/PaddlePaddle/PaddleSpeech/graphs/contributors"><img src="https://img.shields.io/github/contributors/PaddlePaddle/PaddleSpeech?color=9ea"></a> <a href="https://github.com/PaddlePaddle/PaddleSpeech/graphs/contributors"><img src="https://img.shields.io/github/contributors/PaddlePaddle/PaddleSpeech?color=9ea"></a>
<a href="https://github.com/PaddlePaddle/PaddleSpeech/commits"><img src="https://img.shields.io/github/commit-activity/m/PaddlePaddle/PaddleSpeech?color=3af"></a> <a href="https://github.com/PaddlePaddle/PaddleSpeech/commits"><img src="https://img.shields.io/github/commit-activity/m/PaddlePaddle/PaddleSpeech?color=3af"></a>
<a href="https://github.com/PaddlePaddle/PaddleSpeech/issues"><img src="https://img.shields.io/github/issues/PaddlePaddle/PaddleSpeech?color=9cc"></a> <a href="https://github.com/PaddlePaddle/PaddleSpeech/issues"><img src="https://img.shields.io/github/issues/PaddlePaddle/PaddleSpeech?color=9cc"></a>

@ -17,7 +17,7 @@ import os
import numpy as np import numpy as np
from paddle import inference from paddle import inference
from paddle.audio.datasets import ESC50 from paddle.audio.datasets import ESC50
from paddle.audio.features import MelSpectrogram from paddle.audio.features import LogMelSpectrogram
from paddleaudio.backends import soundfile_load as load_audio from paddleaudio.backends import soundfile_load as load_audio
from scipy.special import softmax from scipy.special import softmax
@ -53,7 +53,10 @@ def extract_features(files: str, **kwargs):
pad_width = max_length - len(waveforms[i]) pad_width = max_length - len(waveforms[i])
waveforms[i] = np.pad(waveforms[i], pad_width=(0, pad_width)) waveforms[i] = np.pad(waveforms[i], pad_width=(0, pad_width))
feat = MelSpectrogram(waveforms[i], sr, **kwargs).transpose() feature_extractor = LogMelSpectrogram(sr, **kwargs)
feat = feature_extractor(paddle.to_tensor(waveforms[i]))
feat = paddle.transpose(feat, perm=[1, 0]).unsqueeze(0)
feats.append(feat) feats.append(feat)
return np.stack(feats, axis=0) return np.stack(feats, axis=0)

Loading…
Cancel
Save