From c09b0e894019d7de78bbc0bece1b90b44b7aff28 Mon Sep 17 00:00:00 2001
From: Hui Zhang <zhtclz@foxmail.com>
Date: Thu, 19 Aug 2021 03:35:07 +0000
Subject: [PATCH] fix specaug

---
 README.md                                      |  5 +++--
 README_cn.md                                   |  5 +++--
 deepspeech/frontend/augmentor/base.py          |  6 +++---
 deepspeech/frontend/augmentor/spec_augment.py  |  7 +++++--
 examples/librispeech/s2/conf/augmentation.json | 17 -----------------
 5 files changed, 14 insertions(+), 26 deletions(-)

diff --git a/README.md b/README.md
index f7d1e0882..d10fd5d59 100644
--- a/README.md
+++ b/README.md
@@ -1,12 +1,12 @@
 [中文版](README_cn.md)
 
-# PaddlePaddle ASR toolkit
+# PaddlePaddle Speech to Any toolkit
 
 ![License](https://img.shields.io/badge/license-Apache%202-red.svg)
 ![python version](https://img.shields.io/badge/python-3.7+-orange.svg)
 ![support os](https://img.shields.io/badge/os-linux-yellow.svg)
 
-*PaddleASR* is an open-source implementation of end-to-end Automatic Speech Recognition (ASR) engine, with [PaddlePaddle](https://github.com/PaddlePaddle/Paddle) platform. Our vision is to empower both industrial application and academic research on speech recognition, via an easy-to-use, efficient, samller and scalable implementation, including training, inference & testing module, and deployment.
+*DeepSpeech* is an open-source implementation of end-to-end Automatic Speech Recognition engine, with [PaddlePaddle](https://github.com/PaddlePaddle/Paddle) platform. Our vision is to empower both industrial application and academic research on speech recognition, via an easy-to-use, efficient, samller and scalable implementation, including training, inference & testing module, and deployment.
 
 
 ## Features
@@ -15,6 +15,7 @@
 
 ## Setup
 
+* Ubuntu 16.04
 * python>=3.7
 * paddlepaddle>=2.1.2
 
diff --git a/README_cn.md b/README_cn.md
index 019b38c15..90a65c440 100644
--- a/README_cn.md
+++ b/README_cn.md
@@ -1,12 +1,12 @@
 [English](README.md)
 
-# PaddlePaddle ASR toolkit
+# PaddlePaddle Speech to Any toolkit
 
 ![License](https://img.shields.io/badge/license-Apache%202-red.svg)
 ![python version](https://img.shields.io/badge/python-3.7+-orange.svg)
 ![support os](https://img.shields.io/badge/os-linux-yellow.svg)
 
-*PaddleASR*是一个采用[PaddlePaddle](https://github.com/PaddlePaddle/Paddle)平台的端到端自动语音识别（ASR）引擎的开源项目，
+*DeepSpeech*是一个采用[PaddlePaddle](https://github.com/PaddlePaddle/Paddle)平台的端到端自动语音识别引擎的开源项目，
 我们的愿景是为语音识别在工业应用和学术研究上，提供易于使用、高效、小型化和可扩展的工具，包括训练，推理，以及  部署。
 
 ## 特性
@@ -16,6 +16,7 @@
 
 ## 安装
 
+* Ubuntu 16.04
 * python>=3.7
 * paddlepaddle>=2.1.2
 
diff --git a/deepspeech/frontend/augmentor/base.py b/deepspeech/frontend/augmentor/base.py
index 87cb4ef72..18d003c0b 100644
--- a/deepspeech/frontend/augmentor/base.py
+++ b/deepspeech/frontend/augmentor/base.py
@@ -30,7 +30,7 @@ class AugmentorBase():
 
     @abstractmethod
     def __call__(self, xs):
-        raise NotImplementedError
+        raise NotImplementedError("AugmentorBase: Not impl __call__")
 
     @abstractmethod
     def transform_audio(self, audio_segment):
@@ -44,7 +44,7 @@ class AugmentorBase():
         :param audio_segment: Audio segment to add effects to.
         :type audio_segment: AudioSegmenet|SpeechSegment
         """
-        raise NotImplementedError
+        raise NotImplementedError("AugmentorBase: Not impl transform_audio")
 
     @abstractmethod
     def transform_feature(self, spec_segment):
@@ -56,4 +56,4 @@ class AugmentorBase():
         Args:
             spec_segment (Spectrogram): Spectrogram segment to add effects to.
         """
-        raise NotImplementedError
+        raise NotImplementedError("AugmentorBase: Not impl transform_feature")
diff --git a/deepspeech/frontend/augmentor/spec_augment.py b/deepspeech/frontend/augmentor/spec_augment.py
index 94d23bf46..1786099c8 100644
--- a/deepspeech/frontend/augmentor/spec_augment.py
+++ b/deepspeech/frontend/augmentor/spec_augment.py
@@ -64,7 +64,7 @@ class SpecAugmentor(AugmentorBase):
         self.n_freq_masks = n_freq_masks
         self.n_time_masks = n_time_masks
         self.p = p
-        #logger.info(f"specaug: F-{F}, T-{T}, F-n-{n_freq_masks}, T-n-{n_time_masks}")
+        
 
         # adaptive SpecAugment
         self.adaptive_number_ratio = adaptive_number_ratio
@@ -120,6 +120,9 @@ class SpecAugmentor(AugmentorBase):
     @property
     def time_mask(self):
         return self._time_mask
+    
+    def __repr__(self):
+        return f"specaug: F-{F}, T-{T}, F-n-{n_freq_masks}, T-n-{n_time_masks}"
 
     def time_warp(xs, W=40):
         raise NotImplementedError
@@ -160,7 +163,7 @@ class SpecAugmentor(AugmentorBase):
     def __call__(self, x, train=True):
         if not train:
             return
-        self.transform_audio(x)
+        self.transform_feature(x)
 
     def transform_feature(self, xs: np.ndarray):
         """
diff --git a/examples/librispeech/s2/conf/augmentation.json b/examples/librispeech/s2/conf/augmentation.json
index c1078393d..49fe333ec 100644
--- a/examples/librispeech/s2/conf/augmentation.json
+++ b/examples/librispeech/s2/conf/augmentation.json
@@ -1,21 +1,4 @@
 [
-  {
-    "type": "shift",
-    "params": {
-      "min_shift_ms": -5,
-      "max_shift_ms": 5
-    },
-    "prob": 1.0
-  },
-  {
-    "type": "speed",
-    "params": {
-      "min_speed_rate": 0.9,
-      "max_speed_rate": 1.1,
-      "num_rates": 3
-    },
-    "prob": 0.0
-  },
   {
     "type": "specaug",
     "params": {