diff --git a/third_party/paddle_audio/frontend/common.py b/third_party/paddle_audio/frontend/common.py index ad74c67d0..13e7cf8fb 100644 --- a/third_party/paddle_audio/frontend/common.py +++ b/third_party/paddle_audio/frontend/common.py @@ -1,6 +1,6 @@ import paddle import numpy as np -from typing import Tuple +from typing import Tuple, Optional # https://github.com/kaldi-asr/kaldi/blob/cbed4ff688/src/feat/feature-window.cc#L109 @@ -32,7 +32,7 @@ def hamm_window(frame_len:int) -> np.ndarray: win[i] = 0.54 - 0.46 * np.cos(a * i) return win -def get_window(wintype:str, winlen:int) -> np.ndarray: +def get_window(wintype:Optional[None, str], winlen:int) -> np.ndarray: # calculate window if not wintype or wintype == 'rectangular': window = np.ones(winlen) diff --git a/third_party/paddle_audio/frontend/kaldi.py b/third_party/paddle_audio/frontend/kaldi.py index 8cb3f8c6c..6c98dad60 100644 --- a/third_party/paddle_audio/frontend/kaldi.py +++ b/third_party/paddle_audio/frontend/kaldi.py @@ -68,6 +68,29 @@ def frames(x: Tensor, return frames, num_frames +def do_dither(signal, dither_value=1.0): + signal += paddle.normal(shape=signal.shape) * dither_value + return signal + + +def do_remove_dc_offset(signal): + signal -= paddle.mean(signal) + return signal + + +def do_preemphasis(signal, coeff=0.97): + """perform preemphasis on the input signal. + + :param signal: The signal to filter. + :param coeff: The preemphasis coefficient. 0 is no filter, default is 0.95. + :returns: the filtered signal. + """ + return paddle.concat([ + (1-coeff)*signal[0:1], + signal[1:] - coeff * signal[:-1] + ]) + + class STFT(nn.Layer): """A module for computing stft transformation in a differentiable way.