From 9a71c091c575a204a73128fc31034a7f0d9587a7 Mon Sep 17 00:00:00 2001 From: Hui Zhang Date: Mon, 8 Nov 2021 07:09:07 +0000 Subject: [PATCH] remove debug info and format code --- examples/librispeech/s1/conf/preprocess.yaml | 10 ++--- paddlespeech/s2t/frontend/audio.py | 4 +- paddlespeech/s2t/frontend/utility.py | 1 - paddlespeech/s2t/transform/spec_augment.py | 3 ++ paddlespeech/s2t/transform/spectrogram.py | 40 ++++++++------------ 5 files changed, 26 insertions(+), 32 deletions(-) diff --git a/examples/librispeech/s1/conf/preprocess.yaml b/examples/librispeech/s1/conf/preprocess.yaml index 97ebf41de..021ca4c58 100644 --- a/examples/librispeech/s1/conf/preprocess.yaml +++ b/examples/librispeech/s1/conf/preprocess.yaml @@ -10,16 +10,16 @@ process: cmvn_path: data/mean_std.json # these three processes are a.k.a. SpecAugument - type: time_warp - max_time_warp: 0 + max_time_warp: 5 inplace: true mode: PIL - type: freq_mask - F: 10 + F: 30 n_mask: 2 inplace: true - replace_with_zero: true + replace_with_zero: false - type: time_mask - T: 50 + T: 40 n_mask: 2 inplace: true - replace_with_zero: true + replace_with_zero: false diff --git a/paddlespeech/s2t/frontend/audio.py b/paddlespeech/s2t/frontend/audio.py index 4171f85bb..65dccad38 100644 --- a/paddlespeech/s2t/frontend/audio.py +++ b/paddlespeech/s2t/frontend/audio.py @@ -24,9 +24,9 @@ import soundfile import soxbindings as sox from scipy import signal -from .utility import subfile_from_tar -from .utility import convert_samples_to_float32 from .utility import convert_samples_from_float32 +from .utility import convert_samples_to_float32 +from .utility import subfile_from_tar class AudioSegment(): diff --git a/paddlespeech/s2t/frontend/utility.py b/paddlespeech/s2t/frontend/utility.py index 58e5b1b0c..703f2127d 100644 --- a/paddlespeech/s2t/frontend/utility.py +++ b/paddlespeech/s2t/frontend/utility.py @@ -390,4 +390,3 @@ def convert_samples_from_float32(samples, dtype): else: raise TypeError("Unsupported sample type: %s." % samples.dtype) return output_samples.astype(dtype) - diff --git a/paddlespeech/s2t/transform/spec_augment.py b/paddlespeech/s2t/transform/spec_augment.py index 83e4e2e75..5ce950851 100644 --- a/paddlespeech/s2t/transform/spec_augment.py +++ b/paddlespeech/s2t/transform/spec_augment.py @@ -34,6 +34,9 @@ def time_warp(x, max_time_warp=80, inplace=False, mode="PIL"): :returns numpy.ndarray: time warped spectrogram (time, freq) """ window = max_time_warp + if window == 0: + return x + if mode == "PIL": t = x.shape[0] if t - window <= window: diff --git a/paddlespeech/s2t/transform/spectrogram.py b/paddlespeech/s2t/transform/spectrogram.py index 9e576d0df..da91ef921 100644 --- a/paddlespeech/s2t/transform/spectrogram.py +++ b/paddlespeech/s2t/transform/spectrogram.py @@ -307,9 +307,6 @@ class IStft(): center=self.center, ) -from paddlespeech.s2t.utils.log import Log -logger = Log(__name__).getlog() - class LogMelSpectrogramKaldi(): def __init__( self, @@ -347,22 +344,22 @@ class LogMelSpectrogramKaldi(): self.dither = dither def __repr__(self): - return ("{name}(fs={fs}, n_mels={n_mels}, n_fft={n_fft}, " - "n_shift={n_shift}, win_length={win_length}, window={window}, " - "fmin={fmin}, fmax={fmax}, eps={eps}, preemph={preemph}, window={window}, dither={dither}))".format( - name=self.__class__.__name__, - fs=self.fs, - n_mels=self.n_mels, - n_fft=self.n_fft, - n_shift=self.n_shift, - win_length=self.win_length, - window=self.window, - fmin=self.fmin, - fmax=self.fmax, - eps=self.eps, - preemph=self.preemph, - window=self.window, - dither=self.dither)) + return ( + "{name}(fs={fs}, n_mels={n_mels}, n_fft={n_fft}, " + "n_shift={n_shift}, win_length={win_length}, preemph={preemph}, window={window}, " + "fmin={fmin}, fmax={fmax}, eps={eps}, dither={dither}))".format( + name=self.__class__.__name__, + fs=self.fs, + n_mels=self.n_mels, + n_fft=self.n_fft, + n_shift=self.n_shift, + preemph=self.preemph, + win_length=self.win_length, + window=self.window, + fmin=self.fmin, + fmax=self.fmax, + eps=self.eps, + dither=self.dither, )) def __call__(self, x): """ @@ -379,12 +376,10 @@ class LogMelSpectrogramKaldi(): if x.ndim != 1: raise ValueError("Not support x: [Time, Channel]") - logger.info(f"in {x}") if x.dtype in np.sctypes['float']: # PCM32 -> PCM16 bits = np.iinfo(np.int16).bits x = x * 2**(bits - 1) - logger.info(f"b {x}") # logfbank need PCM16 input y = logfbank( @@ -400,7 +395,4 @@ class LogMelSpectrogramKaldi(): remove_dc_offset=self.remove_dc_offset, preemph=self.preemph, wintype=self.window) - logger.info(f"a {y}") - - return y