Merge pull request #1776 from Jackwaterveg/ds2

[ASR] Replace fbank
pull/1779/head
Hui Zhang 3 years ago committed by GitHub
commit 33ca17359f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -13,8 +13,9 @@
# limitations under the License. # limitations under the License.
"""Contains the audio featurizer class.""" """Contains the audio featurizer class."""
import numpy as np import numpy as np
import paddle
import paddleaudio.compliance.kaldi as kaldi
from python_speech_features import delta from python_speech_features import delta
from python_speech_features import logfbank
from python_speech_features import mfcc from python_speech_features import mfcc
@ -345,19 +346,17 @@ class AudioFeaturizer():
raise ValueError("Stride size must not be greater than " raise ValueError("Stride size must not be greater than "
"window size.") "window size.")
# (T, D) # (T, D)
fbank_feat = logfbank( waveform = paddle.to_tensor(
signal=samples, np.expand_dims(samples, 0), dtype=paddle.float32)
samplerate=sample_rate, mat = kaldi.fbank(
winlen=0.001 * window_ms, waveform,
winstep=0.001 * stride_ms, n_mels=feat_dim,
nfilt=feat_dim, frame_length=window_ms, # default : 25
nfft=512, frame_shift=stride_ms, # default : 10
lowfreq=20,
highfreq=max_freq,
dither=dither, dither=dither,
remove_dc_offset=True, energy_floor=0.0,
preemph=0.97, sr=sample_rate)
wintype='povey') fbank_feat = np.squeeze(mat.numpy())
if delta_delta: if delta_delta:
fbank_feat = self._concat_delta_delta(fbank_feat) fbank_feat = self._concat_delta_delta(fbank_feat)
return fbank_feat return fbank_feat

Loading…
Cancel
Save