From 7d133368e5d18839947ea550c93281a89ac53f8d Mon Sep 17 00:00:00 2001
From: Hui Zhang <zhtclz@foxmail.com>
Date: Tue, 17 Aug 2021 09:49:33 +0000
Subject: [PATCH] fix bugs

---
 .bashrc                                       | 10 ----------
 .notebook/u2_confermer_model_wenet.ipynb      |  2 +-
 deepspeech/frontend/augmentor/augmentation.py |  5 +----
 deepspeech/io/dataset.py                      |  1 +
 deepspeech/models/ds2/rnn.py                  |  2 +-
 deepspeech/models/u2.py                       |  2 +-
 deepspeech/models/u2_st.py                    |  2 +-
 deepspeech/modules/decoder.py                 |  4 ++--
 deepspeech/modules/decoder_layer.py           | 14 +++++++-------
 deepspeech/modules/encoder.py                 |  4 ++--
 deepspeech/modules/rnn.py                     |  2 +-
 examples/librispeech/s0/conf/deepspeech2.yaml |  2 +-
 12 files changed, 19 insertions(+), 31 deletions(-)
 delete mode 100755 .bashrc

diff --git a/.bashrc b/.bashrc
deleted file mode 100755
index 15131969..00000000
--- a/.bashrc
+++ /dev/null
@@ -1,10 +0,0 @@
-# Locales
-
-export LC_ALL=en_US.UTF-8
-export LANG=en_US.UTF-8
-export LANGUAGE=en_US.UTF-8
-
-# Aliases
-alias nvs="nvidia-smi"
-alias rsync="rsync --progress -raz"
-alias his="history"
diff --git a/.notebook/u2_confermer_model_wenet.ipynb b/.notebook/u2_confermer_model_wenet.ipynb
index 4f2c9632..a425e16c 100644
--- a/.notebook/u2_confermer_model_wenet.ipynb
+++ b/.notebook/u2_confermer_model_wenet.ipynb
@@ -3431,7 +3431,7 @@
     "        convolution_layer_args = (output_size, cnn_module_kernel, activation,\n",
     "                                  cnn_module_norm, causal)\n",
     "\n",
-    "        self.encoders = nn.ModuleList([\n",
+    "        self.encoders = nn.LayerList([\n",
     "            ConformerEncoderLayer(\n",
     "                size=output_size,\n",
     "                self_attn=encoder_selfattn_layer(*encoder_selfattn_layer_args),\n",
diff --git a/deepspeech/frontend/augmentor/augmentation.py b/deepspeech/frontend/augmentor/augmentation.py
index a61ca37b..cfebc463 100644
--- a/deepspeech/frontend/augmentor/augmentation.py
+++ b/deepspeech/frontend/augmentor/augmentation.py
@@ -164,8 +164,6 @@ class AugmentationPipeline():
         :param audio_segment: Audio segment to process.
         :type audio_segment: AudioSegmenet|SpeechSegment
         """
-        if not self._train:
-            return
         for augmentor, rate in zip(self._audio_augmentors, self._audio_rates):
             if self._rng.uniform(0., 1.) < rate:
                 augmentor.transform_audio(audio_segment)
@@ -176,8 +174,6 @@ class AugmentationPipeline():
         Args:
             spec_segment (np.ndarray): audio feature, (D, T).
         """
-        if not self._train:
-            return
         for augmentor, rate in zip(self._spec_augmentors, self._spec_rates):
             if self._rng.uniform(0., 1.) < rate:
                 spec_segment = augmentor.transform_feature(spec_segment)
@@ -217,3 +213,4 @@ class AugmentationPipeline():
             obj = class_obj(self._rng, **params)
         except Exception:
             raise ValueError("Unknown augmentor type [%s]." % augmentor_type)
+        return obj
diff --git a/deepspeech/io/dataset.py b/deepspeech/io/dataset.py
index a7bf1fc2..259b3b49 100644
--- a/deepspeech/io/dataset.py
+++ b/deepspeech/io/dataset.py
@@ -13,6 +13,7 @@
 # limitations under the License.
 from typing import Optional
 
+import numpy as np
 from paddle.io import Dataset
 from yacs.config import CfgNode
 
diff --git a/deepspeech/models/ds2/rnn.py b/deepspeech/models/ds2/rnn.py
index 01b55c4a..0d8c9fd2 100644
--- a/deepspeech/models/ds2/rnn.py
+++ b/deepspeech/models/ds2/rnn.py
@@ -297,7 +297,7 @@ class RNNStack(nn.Layer):
                         share_weights=share_rnn_weights))
             i_size = h_size * 2
 
-        self.rnn_stacks = nn.ModuleList(rnn_stacks)
+        self.rnn_stacks = nn.LayerList(rnn_stacks)
 
     def forward(self, x: paddle.Tensor, x_len: paddle.Tensor):
         """
diff --git a/deepspeech/models/u2.py b/deepspeech/models/u2.py
index f1d466a2..7ed16c9d 100644
--- a/deepspeech/models/u2.py
+++ b/deepspeech/models/u2.py
@@ -54,7 +54,7 @@ __all__ = ["U2Model", "U2InferModel"]
 logger = Log(__name__).getlog()
 
 
-class U2BaseModel(nn.Module):
+class U2BaseModel(nn.Layer):
     """CTC-Attention hybrid Encoder-Decoder model"""
 
     @classmethod
diff --git a/deepspeech/models/u2_st.py b/deepspeech/models/u2_st.py
index a73f52e9..99420a89 100644
--- a/deepspeech/models/u2_st.py
+++ b/deepspeech/models/u2_st.py
@@ -48,7 +48,7 @@ __all__ = ["U2STModel", "U2STInferModel"]
 logger = Log(__name__).getlog()
 
 
-class U2STBaseModel(nn.Module):
+class U2STBaseModel(nn.Layer):
     """CTC-Attention hybrid Encoder-Decoder model"""
 
     @classmethod
diff --git a/deepspeech/modules/decoder.py b/deepspeech/modules/decoder.py
index 696a6315..87c9fa49 100644
--- a/deepspeech/modules/decoder.py
+++ b/deepspeech/modules/decoder.py
@@ -33,7 +33,7 @@ logger = Log(__name__).getlog()
 __all__ = ["TransformerDecoder"]
 
 
-class TransformerDecoder(nn.Module):
+class TransformerDecoder(nn.Layer):
     """Base class of Transfomer decoder module.
     Args:
         vocab_size: output dim
@@ -86,7 +86,7 @@ class TransformerDecoder(nn.Module):
         self.use_output_layer = use_output_layer
         self.output_layer = nn.Linear(attention_dim, vocab_size)
 
-        self.decoders = nn.ModuleList([
+        self.decoders = nn.LayerList([
             DecoderLayer(
                 size=attention_dim,
                 self_attn=MultiHeadedAttention(attention_heads, attention_dim,
diff --git a/deepspeech/modules/decoder_layer.py b/deepspeech/modules/decoder_layer.py
index c6fac541..47c42615 100644
--- a/deepspeech/modules/decoder_layer.py
+++ b/deepspeech/modules/decoder_layer.py
@@ -25,15 +25,15 @@ logger = Log(__name__).getlog()
 __all__ = ["DecoderLayer"]
 
 
-class DecoderLayer(nn.Module):
+class DecoderLayer(nn.Layer):
     """Single decoder layer module.
     Args:
         size (int): Input dimension.
-        self_attn (nn.Module): Self-attention module instance.
+        self_attn (nn.Layer): Self-attention module instance.
             `MultiHeadedAttention` instance can be used as the argument.
-        src_attn (nn.Module): Self-attention module instance.
+        src_attn (nn.Layer): Self-attention module instance.
             `MultiHeadedAttention` instance can be used as the argument.
-        feed_forward (nn.Module): Feed-forward module instance.
+        feed_forward (nn.Layer): Feed-forward module instance.
             `PositionwiseFeedForward` instance can be used as the argument.
         dropout_rate (float): Dropout rate.
         normalize_before (bool):
@@ -48,9 +48,9 @@ class DecoderLayer(nn.Module):
     def __init__(
             self,
             size: int,
-            self_attn: nn.Module,
-            src_attn: nn.Module,
-            feed_forward: nn.Module,
+            self_attn: nn.Layer,
+            src_attn: nn.Layer,
+            feed_forward: nn.Layer,
             dropout_rate: float,
             normalize_before: bool=True,
             concat_after: bool=False, ):
diff --git a/deepspeech/modules/encoder.py b/deepspeech/modules/encoder.py
index 27e0f8d7..71ec61a0 100644
--- a/deepspeech/modules/encoder.py
+++ b/deepspeech/modules/encoder.py
@@ -358,7 +358,7 @@ class TransformerEncoder(BaseEncoder):
                          pos_enc_layer_type, normalize_before, concat_after,
                          static_chunk_size, use_dynamic_chunk, global_cmvn,
                          use_dynamic_left_chunk)
-        self.encoders = nn.ModuleList([
+        self.encoders = nn.LayerList([
             TransformerEncoderLayer(
                 size=output_size,
                 self_attn=MultiHeadedAttention(attention_heads, output_size,
@@ -438,7 +438,7 @@ class ConformerEncoder(BaseEncoder):
         convolution_layer_args = (output_size, cnn_module_kernel, activation,
                                   cnn_module_norm, causal)
 
-        self.encoders = nn.ModuleList([
+        self.encoders = nn.LayerList([
             ConformerEncoderLayer(
                 size=output_size,
                 self_attn=encoder_selfattn_layer(*encoder_selfattn_layer_args),
diff --git a/deepspeech/modules/rnn.py b/deepspeech/modules/rnn.py
index 01b55c4a..0d8c9fd2 100644
--- a/deepspeech/modules/rnn.py
+++ b/deepspeech/modules/rnn.py
@@ -297,7 +297,7 @@ class RNNStack(nn.Layer):
                         share_weights=share_rnn_weights))
             i_size = h_size * 2
 
-        self.rnn_stacks = nn.ModuleList(rnn_stacks)
+        self.rnn_stacks = nn.LayerList(rnn_stacks)
 
     def forward(self, x: paddle.Tensor, x_len: paddle.Tensor):
         """
diff --git a/examples/librispeech/s0/conf/deepspeech2.yaml b/examples/librispeech/s0/conf/deepspeech2.yaml
index acee94c3..dab8d046 100644
--- a/examples/librispeech/s0/conf/deepspeech2.yaml
+++ b/examples/librispeech/s0/conf/deepspeech2.yaml
@@ -32,7 +32,7 @@ collator:
   keep_transcription_text: False
   sortagrad: True 
   shuffle_method: batch_shuffle
-  num_workers: 0
+  num_workers: 2
 
 model:
   num_conv_layers: 2