replace fbank, test=asr

pull/1776/head
huangyuxin 2 years ago
parent f256bb9c0e
commit fcdaef6cb4

@ -13,6 +13,8 @@
# limitations under the License. # limitations under the License.
"""Contains the audio featurizer class.""" """Contains the audio featurizer class."""
import numpy as np import numpy as np
import paddle
import paddleaudio.compliance.kaldi as kaldi
from python_speech_features import delta from python_speech_features import delta
from python_speech_features import logfbank from python_speech_features import logfbank
from python_speech_features import mfcc from python_speech_features import mfcc
@ -345,19 +347,17 @@ class AudioFeaturizer():
raise ValueError("Stride size must not be greater than " raise ValueError("Stride size must not be greater than "
"window size.") "window size.")
# (T, D) # (T, D)
fbank_feat = logfbank( waveform = paddle.to_tensor(
signal=samples, np.expand_dims(samples, 0), dtype=paddle.float32)
samplerate=sample_rate, mat = kaldi.fbank(
winlen=0.001 * window_ms, waveform,
winstep=0.001 * stride_ms, n_mels=feat_dim,
nfilt=feat_dim, frame_length=window_ms, # default : 25
nfft=512, frame_shift=stride_ms, # default : 10
lowfreq=20,
highfreq=max_freq,
dither=dither, dither=dither,
remove_dc_offset=True, energy_floor=0.0,
preemph=0.97, sr=sample_rate)
wintype='povey') fbank_feat = np.squeeze(mat.numpy())
if delta_delta: if delta_delta:
fbank_feat = self._concat_delta_delta(fbank_feat) fbank_feat = self._concat_delta_delta(fbank_feat)
return fbank_feat return fbank_feat

Loading…
Cancel
Save