From 7d133368e5d18839947ea550c93281a89ac53f8d Mon Sep 17 00:00:00 2001 From: Hui Zhang Date: Tue, 17 Aug 2021 09:49:33 +0000 Subject: [PATCH] fix bugs --- .bashrc | 10 ---------- .notebook/u2_confermer_model_wenet.ipynb | 2 +- deepspeech/frontend/augmentor/augmentation.py | 5 +---- deepspeech/io/dataset.py | 1 + deepspeech/models/ds2/rnn.py | 2 +- deepspeech/models/u2.py | 2 +- deepspeech/models/u2_st.py | 2 +- deepspeech/modules/decoder.py | 4 ++-- deepspeech/modules/decoder_layer.py | 14 +++++++------- deepspeech/modules/encoder.py | 4 ++-- deepspeech/modules/rnn.py | 2 +- examples/librispeech/s0/conf/deepspeech2.yaml | 2 +- 12 files changed, 19 insertions(+), 31 deletions(-) delete mode 100755 .bashrc diff --git a/.bashrc b/.bashrc deleted file mode 100755 index 15131969..00000000 --- a/.bashrc +++ /dev/null @@ -1,10 +0,0 @@ -# Locales - -export LC_ALL=en_US.UTF-8 -export LANG=en_US.UTF-8 -export LANGUAGE=en_US.UTF-8 - -# Aliases -alias nvs="nvidia-smi" -alias rsync="rsync --progress -raz" -alias his="history" diff --git a/.notebook/u2_confermer_model_wenet.ipynb b/.notebook/u2_confermer_model_wenet.ipynb index 4f2c9632..a425e16c 100644 --- a/.notebook/u2_confermer_model_wenet.ipynb +++ b/.notebook/u2_confermer_model_wenet.ipynb @@ -3431,7 +3431,7 @@ " convolution_layer_args = (output_size, cnn_module_kernel, activation,\n", " cnn_module_norm, causal)\n", "\n", - " self.encoders = nn.ModuleList([\n", + " self.encoders = nn.LayerList([\n", " ConformerEncoderLayer(\n", " size=output_size,\n", " self_attn=encoder_selfattn_layer(*encoder_selfattn_layer_args),\n", diff --git a/deepspeech/frontend/augmentor/augmentation.py b/deepspeech/frontend/augmentor/augmentation.py index a61ca37b..cfebc463 100644 --- a/deepspeech/frontend/augmentor/augmentation.py +++ b/deepspeech/frontend/augmentor/augmentation.py @@ -164,8 +164,6 @@ class AugmentationPipeline(): :param audio_segment: Audio segment to process. :type audio_segment: AudioSegmenet|SpeechSegment """ - if not self._train: - return for augmentor, rate in zip(self._audio_augmentors, self._audio_rates): if self._rng.uniform(0., 1.) < rate: augmentor.transform_audio(audio_segment) @@ -176,8 +174,6 @@ class AugmentationPipeline(): Args: spec_segment (np.ndarray): audio feature, (D, T). """ - if not self._train: - return for augmentor, rate in zip(self._spec_augmentors, self._spec_rates): if self._rng.uniform(0., 1.) < rate: spec_segment = augmentor.transform_feature(spec_segment) @@ -217,3 +213,4 @@ class AugmentationPipeline(): obj = class_obj(self._rng, **params) except Exception: raise ValueError("Unknown augmentor type [%s]." % augmentor_type) + return obj diff --git a/deepspeech/io/dataset.py b/deepspeech/io/dataset.py index a7bf1fc2..259b3b49 100644 --- a/deepspeech/io/dataset.py +++ b/deepspeech/io/dataset.py @@ -13,6 +13,7 @@ # limitations under the License. from typing import Optional +import numpy as np from paddle.io import Dataset from yacs.config import CfgNode diff --git a/deepspeech/models/ds2/rnn.py b/deepspeech/models/ds2/rnn.py index 01b55c4a..0d8c9fd2 100644 --- a/deepspeech/models/ds2/rnn.py +++ b/deepspeech/models/ds2/rnn.py @@ -297,7 +297,7 @@ class RNNStack(nn.Layer): share_weights=share_rnn_weights)) i_size = h_size * 2 - self.rnn_stacks = nn.ModuleList(rnn_stacks) + self.rnn_stacks = nn.LayerList(rnn_stacks) def forward(self, x: paddle.Tensor, x_len: paddle.Tensor): """ diff --git a/deepspeech/models/u2.py b/deepspeech/models/u2.py index f1d466a2..7ed16c9d 100644 --- a/deepspeech/models/u2.py +++ b/deepspeech/models/u2.py @@ -54,7 +54,7 @@ __all__ = ["U2Model", "U2InferModel"] logger = Log(__name__).getlog() -class U2BaseModel(nn.Module): +class U2BaseModel(nn.Layer): """CTC-Attention hybrid Encoder-Decoder model""" @classmethod diff --git a/deepspeech/models/u2_st.py b/deepspeech/models/u2_st.py index a73f52e9..99420a89 100644 --- a/deepspeech/models/u2_st.py +++ b/deepspeech/models/u2_st.py @@ -48,7 +48,7 @@ __all__ = ["U2STModel", "U2STInferModel"] logger = Log(__name__).getlog() -class U2STBaseModel(nn.Module): +class U2STBaseModel(nn.Layer): """CTC-Attention hybrid Encoder-Decoder model""" @classmethod diff --git a/deepspeech/modules/decoder.py b/deepspeech/modules/decoder.py index 696a6315..87c9fa49 100644 --- a/deepspeech/modules/decoder.py +++ b/deepspeech/modules/decoder.py @@ -33,7 +33,7 @@ logger = Log(__name__).getlog() __all__ = ["TransformerDecoder"] -class TransformerDecoder(nn.Module): +class TransformerDecoder(nn.Layer): """Base class of Transfomer decoder module. Args: vocab_size: output dim @@ -86,7 +86,7 @@ class TransformerDecoder(nn.Module): self.use_output_layer = use_output_layer self.output_layer = nn.Linear(attention_dim, vocab_size) - self.decoders = nn.ModuleList([ + self.decoders = nn.LayerList([ DecoderLayer( size=attention_dim, self_attn=MultiHeadedAttention(attention_heads, attention_dim, diff --git a/deepspeech/modules/decoder_layer.py b/deepspeech/modules/decoder_layer.py index c6fac541..47c42615 100644 --- a/deepspeech/modules/decoder_layer.py +++ b/deepspeech/modules/decoder_layer.py @@ -25,15 +25,15 @@ logger = Log(__name__).getlog() __all__ = ["DecoderLayer"] -class DecoderLayer(nn.Module): +class DecoderLayer(nn.Layer): """Single decoder layer module. Args: size (int): Input dimension. - self_attn (nn.Module): Self-attention module instance. + self_attn (nn.Layer): Self-attention module instance. `MultiHeadedAttention` instance can be used as the argument. - src_attn (nn.Module): Self-attention module instance. + src_attn (nn.Layer): Self-attention module instance. `MultiHeadedAttention` instance can be used as the argument. - feed_forward (nn.Module): Feed-forward module instance. + feed_forward (nn.Layer): Feed-forward module instance. `PositionwiseFeedForward` instance can be used as the argument. dropout_rate (float): Dropout rate. normalize_before (bool): @@ -48,9 +48,9 @@ class DecoderLayer(nn.Module): def __init__( self, size: int, - self_attn: nn.Module, - src_attn: nn.Module, - feed_forward: nn.Module, + self_attn: nn.Layer, + src_attn: nn.Layer, + feed_forward: nn.Layer, dropout_rate: float, normalize_before: bool=True, concat_after: bool=False, ): diff --git a/deepspeech/modules/encoder.py b/deepspeech/modules/encoder.py index 27e0f8d7..71ec61a0 100644 --- a/deepspeech/modules/encoder.py +++ b/deepspeech/modules/encoder.py @@ -358,7 +358,7 @@ class TransformerEncoder(BaseEncoder): pos_enc_layer_type, normalize_before, concat_after, static_chunk_size, use_dynamic_chunk, global_cmvn, use_dynamic_left_chunk) - self.encoders = nn.ModuleList([ + self.encoders = nn.LayerList([ TransformerEncoderLayer( size=output_size, self_attn=MultiHeadedAttention(attention_heads, output_size, @@ -438,7 +438,7 @@ class ConformerEncoder(BaseEncoder): convolution_layer_args = (output_size, cnn_module_kernel, activation, cnn_module_norm, causal) - self.encoders = nn.ModuleList([ + self.encoders = nn.LayerList([ ConformerEncoderLayer( size=output_size, self_attn=encoder_selfattn_layer(*encoder_selfattn_layer_args), diff --git a/deepspeech/modules/rnn.py b/deepspeech/modules/rnn.py index 01b55c4a..0d8c9fd2 100644 --- a/deepspeech/modules/rnn.py +++ b/deepspeech/modules/rnn.py @@ -297,7 +297,7 @@ class RNNStack(nn.Layer): share_weights=share_rnn_weights)) i_size = h_size * 2 - self.rnn_stacks = nn.ModuleList(rnn_stacks) + self.rnn_stacks = nn.LayerList(rnn_stacks) def forward(self, x: paddle.Tensor, x_len: paddle.Tensor): """ diff --git a/examples/librispeech/s0/conf/deepspeech2.yaml b/examples/librispeech/s0/conf/deepspeech2.yaml index acee94c3..dab8d046 100644 --- a/examples/librispeech/s0/conf/deepspeech2.yaml +++ b/examples/librispeech/s0/conf/deepspeech2.yaml @@ -32,7 +32,7 @@ collator: keep_transcription_text: False sortagrad: True shuffle_method: batch_shuffle - num_workers: 0 + num_workers: 2 model: num_conv_layers: 2