replace fbank, test=asr

4 years ago · fcdaef6cb4
parent f256bb9c0e
commit fcdaef6cb4
1 changed files with 12 additions and 12 deletions
--- a/paddlespeech/s2t/frontend/featurizer/audio_featurizer.py
+++ b/paddlespeech/s2t/frontend/featurizer/audio_featurizer.py
@ -13,6 +13,8 @@
 # limitations under the License.
 """Contains the audio featurizer class."""
 import numpy as np
 import paddle
 import paddleaudio.compliance.kaldi as kaldi
 from python_speech_features import delta
 from python_speech_features import logfbank
 from python_speech_features import mfcc
@ -345,19 +347,17 @@ class AudioFeaturizer():
            raise ValueError("Stride size must not be greater than "
                             "window size.")
        # (T, D)
-        fbank_feat = logfbank(
+        waveform = paddle.to_tensor(
-            signal=samples,
+            np.expand_dims(samples, 0), dtype=paddle.float32)
-            samplerate=sample_rate,
+        mat = kaldi.fbank(
-            winlen=0.001 * window_ms,
+            waveform,
-            winstep=0.001 * stride_ms,
+            n_mels=feat_dim,
-            nfilt=feat_dim,
+            frame_length=window_ms,  # default : 25
-            nfft=512,
+            frame_shift=stride_ms,  # default : 10
            lowfreq=20,
            highfreq=max_freq,
            dither=dither,
-            remove_dc_offset=True,
+            energy_floor=0.0,
-            preemph=0.97,
+            sr=sample_rate)
-            wintype='povey')
+        fbank_feat = np.squeeze(mat.numpy())
        if delta_delta:
            fbank_feat = self._concat_delta_delta(fbank_feat)
        return fbank_feat