From 9a71c091c575a204a73128fc31034a7f0d9587a7 Mon Sep 17 00:00:00 2001
From: Hui Zhang <zhtclz@foxmail.com>
Date: Mon, 8 Nov 2021 07:09:07 +0000
Subject: [PATCH] remove debug info and format code

---
 examples/librispeech/s1/conf/preprocess.yaml | 10 ++---
 paddlespeech/s2t/frontend/audio.py           |  4 +-
 paddlespeech/s2t/frontend/utility.py         |  1 -
 paddlespeech/s2t/transform/spec_augment.py   |  3 ++
 paddlespeech/s2t/transform/spectrogram.py    | 40 ++++++++------------
 5 files changed, 26 insertions(+), 32 deletions(-)

diff --git a/examples/librispeech/s1/conf/preprocess.yaml b/examples/librispeech/s1/conf/preprocess.yaml
index 97ebf41de..021ca4c58 100644
--- a/examples/librispeech/s1/conf/preprocess.yaml
+++ b/examples/librispeech/s1/conf/preprocess.yaml
@@ -10,16 +10,16 @@ process:
     cmvn_path: data/mean_std.json
   # these three processes are a.k.a. SpecAugument
   - type: time_warp
-    max_time_warp: 0
+    max_time_warp: 5
     inplace: true
     mode: PIL
   - type: freq_mask
-    F: 10
+    F: 30
     n_mask: 2
     inplace: true
-    replace_with_zero: true
+    replace_with_zero: false
   - type: time_mask
-    T: 50
+    T: 40
     n_mask: 2
     inplace: true
-    replace_with_zero: true
+    replace_with_zero: false
diff --git a/paddlespeech/s2t/frontend/audio.py b/paddlespeech/s2t/frontend/audio.py
index 4171f85bb..65dccad38 100644
--- a/paddlespeech/s2t/frontend/audio.py
+++ b/paddlespeech/s2t/frontend/audio.py
@@ -24,9 +24,9 @@ import soundfile
 import soxbindings as sox
 from scipy import signal
 
-from .utility import subfile_from_tar
-from .utility import convert_samples_to_float32
 from .utility import convert_samples_from_float32
+from .utility import convert_samples_to_float32
+from .utility import subfile_from_tar
 
 
 class AudioSegment():
diff --git a/paddlespeech/s2t/frontend/utility.py b/paddlespeech/s2t/frontend/utility.py
index 58e5b1b0c..703f2127d 100644
--- a/paddlespeech/s2t/frontend/utility.py
+++ b/paddlespeech/s2t/frontend/utility.py
@@ -390,4 +390,3 @@ def convert_samples_from_float32(samples, dtype):
     else:
         raise TypeError("Unsupported sample type: %s." % samples.dtype)
     return output_samples.astype(dtype)
-
diff --git a/paddlespeech/s2t/transform/spec_augment.py b/paddlespeech/s2t/transform/spec_augment.py
index 83e4e2e75..5ce950851 100644
--- a/paddlespeech/s2t/transform/spec_augment.py
+++ b/paddlespeech/s2t/transform/spec_augment.py
@@ -34,6 +34,9 @@ def time_warp(x, max_time_warp=80, inplace=False, mode="PIL"):
     :returns numpy.ndarray: time warped spectrogram (time, freq)
     """
     window = max_time_warp
+    if window == 0:
+        return x
+
     if mode == "PIL":
         t = x.shape[0]
         if t - window <= window:
diff --git a/paddlespeech/s2t/transform/spectrogram.py b/paddlespeech/s2t/transform/spectrogram.py
index 9e576d0df..da91ef921 100644
--- a/paddlespeech/s2t/transform/spectrogram.py
+++ b/paddlespeech/s2t/transform/spectrogram.py
@@ -307,9 +307,6 @@ class IStft():
             center=self.center, )
 
 
-from paddlespeech.s2t.utils.log import Log
-logger = Log(__name__).getlog()
-
 class LogMelSpectrogramKaldi():
     def __init__(
             self,
@@ -347,22 +344,22 @@ class LogMelSpectrogramKaldi():
         self.dither = dither
 
     def __repr__(self):
-        return ("{name}(fs={fs}, n_mels={n_mels}, n_fft={n_fft}, "
-                "n_shift={n_shift}, win_length={win_length}, window={window}, "
-                "fmin={fmin}, fmax={fmax}, eps={eps}, preemph={preemph}, window={window}, dither={dither}))".format(
-                    name=self.__class__.__name__,
-                    fs=self.fs,
-                    n_mels=self.n_mels,
-                    n_fft=self.n_fft,
-                    n_shift=self.n_shift,
-                    win_length=self.win_length,
-                    window=self.window,
-                    fmin=self.fmin,
-                    fmax=self.fmax,
-                    eps=self.eps, 
-                    preemph=self.preemph,
-                    window=self.window,
-                    dither=self.dither))
+        return (
+            "{name}(fs={fs}, n_mels={n_mels}, n_fft={n_fft}, "
+            "n_shift={n_shift}, win_length={win_length}, preemph={preemph}, window={window}, "
+            "fmin={fmin}, fmax={fmax}, eps={eps}, dither={dither}))".format(
+                name=self.__class__.__name__,
+                fs=self.fs,
+                n_mels=self.n_mels,
+                n_fft=self.n_fft,
+                n_shift=self.n_shift,
+                preemph=self.preemph,
+                win_length=self.win_length,
+                window=self.window,
+                fmin=self.fmin,
+                fmax=self.fmax,
+                eps=self.eps,
+                dither=self.dither, ))
 
     def __call__(self, x):
         """
@@ -379,12 +376,10 @@ class LogMelSpectrogramKaldi():
         if x.ndim != 1:
             raise ValueError("Not support x: [Time, Channel]")
 
-        logger.info(f"in {x}")
         if x.dtype in np.sctypes['float']:
             # PCM32 -> PCM16
             bits = np.iinfo(np.int16).bits
             x = x * 2**(bits - 1)
-        logger.info(f"b {x}")
 
         # logfbank need PCM16 input
         y = logfbank(
@@ -400,7 +395,4 @@ class LogMelSpectrogramKaldi():
             remove_dc_offset=self.remove_dc_offset,
             preemph=self.preemph,
             wintype=self.window)
-        logger.info(f"a {y}")
-
-
         return y