pull/756/head
Hui Zhang 3 years ago
parent 16f4bdc5f1
commit 7d133368e5

@ -1,10 +0,0 @@
# Locales
export LC_ALL=en_US.UTF-8
export LANG=en_US.UTF-8
export LANGUAGE=en_US.UTF-8
# Aliases
alias nvs="nvidia-smi"
alias rsync="rsync --progress -raz"
alias his="history"

@ -3431,7 +3431,7 @@
" convolution_layer_args = (output_size, cnn_module_kernel, activation,\n", " convolution_layer_args = (output_size, cnn_module_kernel, activation,\n",
" cnn_module_norm, causal)\n", " cnn_module_norm, causal)\n",
"\n", "\n",
" self.encoders = nn.ModuleList([\n", " self.encoders = nn.LayerList([\n",
" ConformerEncoderLayer(\n", " ConformerEncoderLayer(\n",
" size=output_size,\n", " size=output_size,\n",
" self_attn=encoder_selfattn_layer(*encoder_selfattn_layer_args),\n", " self_attn=encoder_selfattn_layer(*encoder_selfattn_layer_args),\n",

@ -164,8 +164,6 @@ class AugmentationPipeline():
:param audio_segment: Audio segment to process. :param audio_segment: Audio segment to process.
:type audio_segment: AudioSegmenet|SpeechSegment :type audio_segment: AudioSegmenet|SpeechSegment
""" """
if not self._train:
return
for augmentor, rate in zip(self._audio_augmentors, self._audio_rates): for augmentor, rate in zip(self._audio_augmentors, self._audio_rates):
if self._rng.uniform(0., 1.) < rate: if self._rng.uniform(0., 1.) < rate:
augmentor.transform_audio(audio_segment) augmentor.transform_audio(audio_segment)
@ -176,8 +174,6 @@ class AugmentationPipeline():
Args: Args:
spec_segment (np.ndarray): audio feature, (D, T). spec_segment (np.ndarray): audio feature, (D, T).
""" """
if not self._train:
return
for augmentor, rate in zip(self._spec_augmentors, self._spec_rates): for augmentor, rate in zip(self._spec_augmentors, self._spec_rates):
if self._rng.uniform(0., 1.) < rate: if self._rng.uniform(0., 1.) < rate:
spec_segment = augmentor.transform_feature(spec_segment) spec_segment = augmentor.transform_feature(spec_segment)
@ -217,3 +213,4 @@ class AugmentationPipeline():
obj = class_obj(self._rng, **params) obj = class_obj(self._rng, **params)
except Exception: except Exception:
raise ValueError("Unknown augmentor type [%s]." % augmentor_type) raise ValueError("Unknown augmentor type [%s]." % augmentor_type)
return obj

@ -13,6 +13,7 @@
# limitations under the License. # limitations under the License.
from typing import Optional from typing import Optional
import numpy as np
from paddle.io import Dataset from paddle.io import Dataset
from yacs.config import CfgNode from yacs.config import CfgNode

@ -297,7 +297,7 @@ class RNNStack(nn.Layer):
share_weights=share_rnn_weights)) share_weights=share_rnn_weights))
i_size = h_size * 2 i_size = h_size * 2
self.rnn_stacks = nn.ModuleList(rnn_stacks) self.rnn_stacks = nn.LayerList(rnn_stacks)
def forward(self, x: paddle.Tensor, x_len: paddle.Tensor): def forward(self, x: paddle.Tensor, x_len: paddle.Tensor):
""" """

@ -54,7 +54,7 @@ __all__ = ["U2Model", "U2InferModel"]
logger = Log(__name__).getlog() logger = Log(__name__).getlog()
class U2BaseModel(nn.Module): class U2BaseModel(nn.Layer):
"""CTC-Attention hybrid Encoder-Decoder model""" """CTC-Attention hybrid Encoder-Decoder model"""
@classmethod @classmethod

@ -48,7 +48,7 @@ __all__ = ["U2STModel", "U2STInferModel"]
logger = Log(__name__).getlog() logger = Log(__name__).getlog()
class U2STBaseModel(nn.Module): class U2STBaseModel(nn.Layer):
"""CTC-Attention hybrid Encoder-Decoder model""" """CTC-Attention hybrid Encoder-Decoder model"""
@classmethod @classmethod

@ -33,7 +33,7 @@ logger = Log(__name__).getlog()
__all__ = ["TransformerDecoder"] __all__ = ["TransformerDecoder"]
class TransformerDecoder(nn.Module): class TransformerDecoder(nn.Layer):
"""Base class of Transfomer decoder module. """Base class of Transfomer decoder module.
Args: Args:
vocab_size: output dim vocab_size: output dim
@ -86,7 +86,7 @@ class TransformerDecoder(nn.Module):
self.use_output_layer = use_output_layer self.use_output_layer = use_output_layer
self.output_layer = nn.Linear(attention_dim, vocab_size) self.output_layer = nn.Linear(attention_dim, vocab_size)
self.decoders = nn.ModuleList([ self.decoders = nn.LayerList([
DecoderLayer( DecoderLayer(
size=attention_dim, size=attention_dim,
self_attn=MultiHeadedAttention(attention_heads, attention_dim, self_attn=MultiHeadedAttention(attention_heads, attention_dim,

@ -25,15 +25,15 @@ logger = Log(__name__).getlog()
__all__ = ["DecoderLayer"] __all__ = ["DecoderLayer"]
class DecoderLayer(nn.Module): class DecoderLayer(nn.Layer):
"""Single decoder layer module. """Single decoder layer module.
Args: Args:
size (int): Input dimension. size (int): Input dimension.
self_attn (nn.Module): Self-attention module instance. self_attn (nn.Layer): Self-attention module instance.
`MultiHeadedAttention` instance can be used as the argument. `MultiHeadedAttention` instance can be used as the argument.
src_attn (nn.Module): Self-attention module instance. src_attn (nn.Layer): Self-attention module instance.
`MultiHeadedAttention` instance can be used as the argument. `MultiHeadedAttention` instance can be used as the argument.
feed_forward (nn.Module): Feed-forward module instance. feed_forward (nn.Layer): Feed-forward module instance.
`PositionwiseFeedForward` instance can be used as the argument. `PositionwiseFeedForward` instance can be used as the argument.
dropout_rate (float): Dropout rate. dropout_rate (float): Dropout rate.
normalize_before (bool): normalize_before (bool):
@ -48,9 +48,9 @@ class DecoderLayer(nn.Module):
def __init__( def __init__(
self, self,
size: int, size: int,
self_attn: nn.Module, self_attn: nn.Layer,
src_attn: nn.Module, src_attn: nn.Layer,
feed_forward: nn.Module, feed_forward: nn.Layer,
dropout_rate: float, dropout_rate: float,
normalize_before: bool=True, normalize_before: bool=True,
concat_after: bool=False, ): concat_after: bool=False, ):

@ -358,7 +358,7 @@ class TransformerEncoder(BaseEncoder):
pos_enc_layer_type, normalize_before, concat_after, pos_enc_layer_type, normalize_before, concat_after,
static_chunk_size, use_dynamic_chunk, global_cmvn, static_chunk_size, use_dynamic_chunk, global_cmvn,
use_dynamic_left_chunk) use_dynamic_left_chunk)
self.encoders = nn.ModuleList([ self.encoders = nn.LayerList([
TransformerEncoderLayer( TransformerEncoderLayer(
size=output_size, size=output_size,
self_attn=MultiHeadedAttention(attention_heads, output_size, self_attn=MultiHeadedAttention(attention_heads, output_size,
@ -438,7 +438,7 @@ class ConformerEncoder(BaseEncoder):
convolution_layer_args = (output_size, cnn_module_kernel, activation, convolution_layer_args = (output_size, cnn_module_kernel, activation,
cnn_module_norm, causal) cnn_module_norm, causal)
self.encoders = nn.ModuleList([ self.encoders = nn.LayerList([
ConformerEncoderLayer( ConformerEncoderLayer(
size=output_size, size=output_size,
self_attn=encoder_selfattn_layer(*encoder_selfattn_layer_args), self_attn=encoder_selfattn_layer(*encoder_selfattn_layer_args),

@ -297,7 +297,7 @@ class RNNStack(nn.Layer):
share_weights=share_rnn_weights)) share_weights=share_rnn_weights))
i_size = h_size * 2 i_size = h_size * 2
self.rnn_stacks = nn.ModuleList(rnn_stacks) self.rnn_stacks = nn.LayerList(rnn_stacks)
def forward(self, x: paddle.Tensor, x_len: paddle.Tensor): def forward(self, x: paddle.Tensor, x_len: paddle.Tensor):
""" """

@ -32,7 +32,7 @@ collator:
keep_transcription_text: False keep_transcription_text: False
sortagrad: True sortagrad: True
shuffle_method: batch_shuffle shuffle_method: batch_shuffle
num_workers: 0 num_workers: 2
model: model:
num_conv_layers: 2 num_conv_layers: 2

Loading…
Cancel
Save