diff --git a/paddlespeech/s2t/models/u2/u2.py b/paddlespeech/s2t/models/u2/u2.py index e077cd5b..e94a127d 100644 --- a/paddlespeech/s2t/models/u2/u2.py +++ b/paddlespeech/s2t/models/u2/u2.py @@ -36,6 +36,7 @@ from paddlespeech.s2t.modules.ctc import CTCDecoderBase from paddlespeech.s2t.modules.decoder import TransformerDecoder from paddlespeech.s2t.modules.encoder import ConformerEncoder from paddlespeech.s2t.modules.encoder import TransformerEncoder +from paddlespeech.s2t.modules.initializer import DefaultInitializerContext from paddlespeech.s2t.modules.loss import LabelSmoothingLoss from paddlespeech.s2t.modules.mask import make_pad_mask from paddlespeech.s2t.modules.mask import mask_finished_preds @@ -50,7 +51,6 @@ from paddlespeech.s2t.utils.tensor_utils import pad_sequence from paddlespeech.s2t.utils.tensor_utils import th_accuracy from paddlespeech.s2t.utils.utility import log_add from paddlespeech.s2t.utils.utility import UpdateConfig -from paddlespeech.s2t.modules.initializer import DefaultInitializerContext # from paddlespeech.s2t.modules.initializer import initialize __all__ = ["U2Model", "U2InferModel"] @@ -786,7 +786,8 @@ class U2Model(U2DecodeModel): model_conf = configs.get('model_conf', dict()) init_type = model_conf.get("init_type", None) with DefaultInitializerContext(init_type): - vocab_size, encoder, decoder, ctc = U2Model._init_from_config(configs) + vocab_size, encoder, decoder, ctc = U2Model._init_from_config( + configs) super().__init__( vocab_size=vocab_size, diff --git a/paddlespeech/s2t/modules/activation.py b/paddlespeech/s2t/modules/activation.py index 48c84fa6..2f387b0d 100644 --- a/paddlespeech/s2t/modules/activation.py +++ b/paddlespeech/s2t/modules/activation.py @@ -16,8 +16,9 @@ from collections import OrderedDict import paddle from paddle import nn from paddle.nn import functional as F -from paddlespeech.s2t.modules.align import Linear + from paddlespeech.s2t.modules.align import Conv2D +from paddlespeech.s2t.modules.align import Linear from paddlespeech.s2t.utils.log import Log logger = Log(__name__).getlog() diff --git a/paddlespeech/s2t/modules/align.py b/paddlespeech/s2t/modules/align.py index 575773d7..f8891679 100644 --- a/paddlespeech/s2t/modules/align.py +++ b/paddlespeech/s2t/modules/align.py @@ -1,7 +1,20 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. import paddle from paddle import nn -from paddlespeech.s2t.modules.initializer import KaimingUniform +from paddlespeech.s2t.modules.initializer import KaimingUniform """ To align the initializer between paddle and torch, the API below are set defalut initializer with priority higger than global initializer. @@ -10,65 +23,117 @@ global_init_type = None class LayerNorm(nn.LayerNorm): - def __init__(self, normalized_shape, epsilon=1e-05, weight_attr=None, bias_attr=None, name=None): + def __init__(self, + normalized_shape, + epsilon=1e-05, + weight_attr=None, + bias_attr=None, + name=None): if weight_attr is None: weight_attr = paddle.ParamAttr( initializer=nn.initializer.Constant(1.0)) if bias_attr is None: bias_attr = paddle.ParamAttr( initializer=nn.initializer.Constant(0.0)) - super(LayerNorm, self).__init__(normalized_shape, epsilon, weight_attr, bias_attr, name) + super(LayerNorm, self).__init__(normalized_shape, epsilon, weight_attr, + bias_attr, name) + -class BatchNorm1D(nn.BatchNorm1D): - def __init__(self, num_features, momentum=0.9, epsilon=1e-05, weight_attr=None, bias_attr=None, data_format='NCL', name=None): +class BatchNorm1D(nn.BatchNorm1D): + def __init__(self, + num_features, + momentum=0.9, + epsilon=1e-05, + weight_attr=None, + bias_attr=None, + data_format='NCL', + name=None): if weight_attr is None: weight_attr = paddle.ParamAttr( initializer=nn.initializer.Constant(1.0)) if bias_attr is None: bias_attr = paddle.ParamAttr( initializer=nn.initializer.Constant(0.0)) - super(BatchNorm1D, self).__init__(num_features, momentum, epsilon, weight_attr, bias_attr, data_format, name) + super(BatchNorm1D, + self).__init__(num_features, momentum, epsilon, weight_attr, + bias_attr, data_format, name) + class Embedding(nn.Embedding): - def __init__(self, num_embeddings, embedding_dim, padding_idx=None, sparse=False, weight_attr=None, name=None): + def __init__(self, + num_embeddings, + embedding_dim, + padding_idx=None, + sparse=False, + weight_attr=None, + name=None): if weight_attr is None: - weight_attr = paddle.ParamAttr( - initializer=nn.initializer.Normal()) - super(Embedding, self).__init__(num_embeddings, embedding_dim, padding_idx, sparse, weight_attr, name) + weight_attr = paddle.ParamAttr(initializer=nn.initializer.Normal()) + super(Embedding, self).__init__(num_embeddings, embedding_dim, + padding_idx, sparse, weight_attr, name) + class Linear(nn.Linear): - def __init__(self, in_features, out_features, weight_attr=None, bias_attr=None, name=None): - if weight_attr is None: - if global_init_type == "kaiming_uniform": - weight_attr = paddle.ParamAttr( - initializer=KaimingUniform()) - if bias_attr is None: - if global_init_type == "kaiming_uniform": - bias_attr = paddle.ParamAttr( - initializer=KaimingUniform()) - super(Linear, self).__init__(in_features, out_features, weight_attr, bias_attr, name) + def __init__(self, + in_features, + out_features, + weight_attr=None, + bias_attr=None, + name=None): + if weight_attr is None: + if global_init_type == "kaiming_uniform": + weight_attr = paddle.ParamAttr(initializer=KaimingUniform()) + if bias_attr is None: + if global_init_type == "kaiming_uniform": + bias_attr = paddle.ParamAttr(initializer=KaimingUniform()) + super(Linear, self).__init__(in_features, out_features, weight_attr, + bias_attr, name) + class Conv1D(nn.Conv1D): - def __init__(self, in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1, groups=1, padding_mode='zeros', weight_attr=None, bias_attr=None, data_format='NCL'): + def __init__(self, + in_channels, + out_channels, + kernel_size, + stride=1, + padding=0, + dilation=1, + groups=1, + padding_mode='zeros', + weight_attr=None, + bias_attr=None, + data_format='NCL'): if weight_attr is None: if global_init_type == "kaiming_uniform": print("set kaiming_uniform") - weight_attr = paddle.ParamAttr( - initializer=KaimingUniform()) + weight_attr = paddle.ParamAttr(initializer=KaimingUniform()) if bias_attr is None: if global_init_type == "kaiming_uniform": - bias_attr = paddle.ParamAttr( - initializer=KaimingUniform()) - super(Conv1D, self).__init__(in_channels, out_channels, kernel_size, stride, padding, dilation, groups, padding_mode, weight_attr, bias_attr, data_format) - + bias_attr = paddle.ParamAttr(initializer=KaimingUniform()) + super(Conv1D, self).__init__( + in_channels, out_channels, kernel_size, stride, padding, dilation, + groups, padding_mode, weight_attr, bias_attr, data_format) + + class Conv2D(nn.Conv2D): - def __init__(self, in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1, groups=1, padding_mode='zeros', weight_attr=None, bias_attr=None, data_format='NCHW'): - if weight_attr is None: - if global_init_type == "kaiming_uniform": - weight_attr = paddle.ParamAttr( - initializer=KaimingUniform()) - if bias_attr is None: - if global_init_type == "kaiming_uniform": - bias_attr = paddle.ParamAttr( - initializer=KaimingUniform()) - super(Conv2D, self).__init__(in_channels, out_channels, kernel_size, stride, padding, dilation, groups, padding_mode, weight_attr, bias_attr, data_format) + def __init__(self, + in_channels, + out_channels, + kernel_size, + stride=1, + padding=0, + dilation=1, + groups=1, + padding_mode='zeros', + weight_attr=None, + bias_attr=None, + data_format='NCHW'): + if weight_attr is None: + if global_init_type == "kaiming_uniform": + weight_attr = paddle.ParamAttr(initializer=KaimingUniform()) + if bias_attr is None: + if global_init_type == "kaiming_uniform": + bias_attr = paddle.ParamAttr(initializer=KaimingUniform()) + super(Conv2D, self).__init__( + in_channels, out_channels, kernel_size, stride, padding, dilation, + groups, padding_mode, weight_attr, bias_attr, data_format) diff --git a/paddlespeech/s2t/modules/encoder.py b/paddlespeech/s2t/modules/encoder.py index 71a2bad4..c843c0e2 100644 --- a/paddlespeech/s2t/modules/encoder.py +++ b/paddlespeech/s2t/modules/encoder.py @@ -24,7 +24,6 @@ from typeguard import check_argument_types from paddlespeech.s2t.modules.activation import get_activation from paddlespeech.s2t.modules.align import LayerNorm -from paddlespeech.s2t.modules.align import Linear from paddlespeech.s2t.modules.attention import MultiHeadedAttention from paddlespeech.s2t.modules.attention import RelPositionMultiHeadedAttention from paddlespeech.s2t.modules.conformer_convolution import ConvolutionModule diff --git a/paddlespeech/s2t/modules/initializer.py b/paddlespeech/s2t/modules/initializer.py index 3fbab285..98466ebd 100644 --- a/paddlespeech/s2t/modules/initializer.py +++ b/paddlespeech/s2t/modules/initializer.py @@ -12,15 +12,10 @@ # See the License for the specific language governing permissions and # limitations under the License. import numpy as np -from paddle import nn from paddle.fluid import framework from paddle.fluid import unique_name from paddle.fluid.core import VarDesc -from paddle.fluid.framework import default_main_program -from paddle.fluid.framework import in_dygraph_mode -from paddle.fluid.initializer import Initializer from paddle.fluid.initializer import MSRAInitializer -from typeguard import check_argument_types __all__ = ['KaimingUniform'] @@ -160,16 +155,15 @@ class DefaultInitializerContext(object): with DefaultInitializerContext("kaiming_uniform"): code for setup_model """ + def __init__(self, init_type=None): self.init_type = init_type - + def __enter__(self): from paddlespeech.s2t.modules import align align.global_init_type = self.init_type return self - + def __exit__(self, exc_type, exc_val, exc_tb): from paddlespeech.s2t.modules import align align.global_init_type = None - -