From fcdaef6cb4bb0bbfea61cafce22989191f4c2c6a Mon Sep 17 00:00:00 2001 From: huangyuxin Date: Mon, 25 Apr 2022 03:36:58 +0000 Subject: [PATCH 1/2] replace fbank, test=asr --- .../frontend/featurizer/audio_featurizer.py | 24 +++++++++---------- 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/paddlespeech/s2t/frontend/featurizer/audio_featurizer.py b/paddlespeech/s2t/frontend/featurizer/audio_featurizer.py index 6f3b646c..e0fe81fe 100644 --- a/paddlespeech/s2t/frontend/featurizer/audio_featurizer.py +++ b/paddlespeech/s2t/frontend/featurizer/audio_featurizer.py @@ -13,6 +13,8 @@ # limitations under the License. """Contains the audio featurizer class.""" import numpy as np +import paddle +import paddleaudio.compliance.kaldi as kaldi from python_speech_features import delta from python_speech_features import logfbank from python_speech_features import mfcc @@ -345,19 +347,17 @@ class AudioFeaturizer(): raise ValueError("Stride size must not be greater than " "window size.") # (T, D) - fbank_feat = logfbank( - signal=samples, - samplerate=sample_rate, - winlen=0.001 * window_ms, - winstep=0.001 * stride_ms, - nfilt=feat_dim, - nfft=512, - lowfreq=20, - highfreq=max_freq, + waveform = paddle.to_tensor( + np.expand_dims(samples, 0), dtype=paddle.float32) + mat = kaldi.fbank( + waveform, + n_mels=feat_dim, + frame_length=window_ms, # default : 25 + frame_shift=stride_ms, # default : 10 dither=dither, - remove_dc_offset=True, - preemph=0.97, - wintype='povey') + energy_floor=0.0, + sr=sample_rate) + fbank_feat = np.squeeze(mat.numpy()) if delta_delta: fbank_feat = self._concat_delta_delta(fbank_feat) return fbank_feat From 0df8d80833990dbf44509a9a6fbc8302fdc0f9eb Mon Sep 17 00:00:00 2001 From: huangyuxin Date: Mon, 25 Apr 2022 05:20:13 +0000 Subject: [PATCH 2/2] remove logfbank from python_speech_features, test=asr --- paddlespeech/s2t/frontend/featurizer/audio_featurizer.py | 1 - 1 file changed, 1 deletion(-) diff --git a/paddlespeech/s2t/frontend/featurizer/audio_featurizer.py b/paddlespeech/s2t/frontend/featurizer/audio_featurizer.py index e0fe81fe..22329d5e 100644 --- a/paddlespeech/s2t/frontend/featurizer/audio_featurizer.py +++ b/paddlespeech/s2t/frontend/featurizer/audio_featurizer.py @@ -16,7 +16,6 @@ import numpy as np import paddle import paddleaudio.compliance.kaldi as kaldi from python_speech_features import delta -from python_speech_features import logfbank from python_speech_features import mfcc