diff --git a/paddlespeech/t2s/models/starganv2_vc/AuxiliaryASR/layers.py b/paddlespeech/t2s/models/starganv2_vc/AuxiliaryASR/layers.py
index bb73f35ee..5901c805a 100644
--- a/paddlespeech/t2s/models/starganv2_vc/AuxiliaryASR/layers.py
+++ b/paddlespeech/t2s/models/starganv2_vc/AuxiliaryASR/layers.py
@@ -44,7 +44,8 @@ class LinearNorm(nn.Layer):
             self.linear_layer.weight, gain=_calculate_gain(w_init_gain))
 
     def forward(self, x: paddle.Tensor):
-        return self.linear_layer(x)
+        out = self.linear_layer(x)
+        return out
 
 
 class ConvNorm(nn.Layer):
@@ -183,13 +184,14 @@ class Attention(nn.Layer):
         """
         Args:
             query: 
-                decoder output (batch, n_mel_channels * n_frames_per_step)
+                decoder output (B, n_mel_channels * n_frames_per_step)
             processed_memory: 
                 processed encoder outputs (B, T_in, attention_dim)
             attention_weights_cat: 
                 cumulative and prev. att weights (B, 2, max_time)
         Returns:
-            Tensor: alignment (batch, max_time)
+            Tensor: 
+                alignment (B, max_time)
         """
 
         processed_query = self.query_layer(query.unsqueeze(1))
@@ -254,7 +256,6 @@ class MFCC(nn.Layer):
         # -> (channel, time, n_mfcc).tranpose(...)
         mfcc = paddle.matmul(mel_specgram.transpose([0, 2, 1]),
                              self.dct_mat).transpose([0, 2, 1])
-
         # unpack batch
         if unsqueezed:
             mfcc = mfcc.squeeze(0)
diff --git a/paddlespeech/t2s/models/starganv2_vc/AuxiliaryASR/model.py b/paddlespeech/t2s/models/starganv2_vc/AuxiliaryASR/model.py
index 032611cd7..251974572 100644
--- a/paddlespeech/t2s/models/starganv2_vc/AuxiliaryASR/model.py
+++ b/paddlespeech/t2s/models/starganv2_vc/AuxiliaryASR/model.py
@@ -194,9 +194,8 @@ class ASRS2S(nn.Layer):
             logit_outputs += [logit]
             alignments += [attention_weights]
 
-        hidden_outputs, logit_outputs, alignments = \
-            self.parse_decoder_outputs(
-                hidden_outputs, logit_outputs, alignments)
+        hidden_outputs, logit_outputs, alignments = self.parse_decoder_outputs(
+            hidden_outputs, logit_outputs, alignments)
 
         return hidden_outputs, logit_outputs, alignments
 
diff --git a/paddlespeech/t2s/models/starganv2_vc/JDCNet/model.py b/paddlespeech/t2s/models/starganv2_vc/JDCNet/model.py
index 02bc7e99a..5938e6a7c 100644
--- a/paddlespeech/t2s/models/starganv2_vc/JDCNet/model.py
+++ b/paddlespeech/t2s/models/starganv2_vc/JDCNet/model.py
@@ -46,11 +46,11 @@ class JDCNet(nn.Layer):
             nn.LeakyReLU(leaky_relu_slope),
             # out: (B, out_channels, T, n_mels)
             nn.Conv2D(64, 64, 3, padding=1, bias_attr=False), )
-        # output: (B, out_channels, T, n_mels//2)
+        # output: (B, out_channels, T, n_mels // 2)
         self.res_block1 = ResBlock(in_channels=64, out_channels=128)
-        # output: (B, out_channels, T, n_mels//4) 
+        # output: (B, out_channels, T, n_mels // 4) 
         self.res_block2 = ResBlock(in_channels=128, out_channels=192)
-        # output: (B, out_channels, T, n_mels//8)  
+        # output: (B, out_channels, T, n_mels // 8)  
         self.res_block3 = ResBlock(in_channels=192, out_channels=256)
         # pool block
         self.pool_block = nn.Sequential(
@@ -59,7 +59,7 @@ class JDCNet(nn.Layer):
             # (B, num_features, T, 2)
             nn.MaxPool2D(kernel_size=(1, 4)),
             nn.Dropout(p=0.5), )
-        # input: (B, T, input_size) - resized from (B, input_size//2, T, 2)
+        # input: (B, T, input_size), resized from (B, input_size // 2, T, 2)
         # output: (B, T, input_size)
         self.bilstm_classifier = nn.LSTM(
             input_size=512,
@@ -81,7 +81,7 @@ class JDCNet(nn.Layer):
                 Shape (B, num_class, n_mels, T).
         Returns:
             Tensor:
-                Shape (B, num_features, n_mels//8, T).
+                Shape (B, num_features, n_mels // 8, T).
         """
         x = x.astype(paddle.float32)
         x = x.transpose([0, 1, 3, 2] if len(x.shape) == 4 else [0, 2, 1])
@@ -105,10 +105,9 @@ class JDCNet(nn.Layer):
                 classifier output consists of predicted pitch classes per frame.
                 Shape: (B, seq_len, num_class).
             Tensor:
-                GAN_feature. Shape: (B, num_features, n_mels//8, seq_len)
+                GAN_feature. Shape: (B, num_features, n_mels // 8, seq_len)
             Tensor:
-                poolblock_out. Shape (B, seq_len, 512)
-                
+                poolblock_out. Shape (B, seq_len, 512)     
         """
         ###############################
         # forward pass for classifier #
@@ -201,7 +200,7 @@ class ResBlock(nn.Layer):
             x(Tensor(float32)): Shape (B, in_channels, T, n_mels).
         Returns:
             Tensor:
-                The residual output, Shape (B, out_channels, T, n_mels//2).
+                The residual output, Shape (B, out_channels, T, n_mels // 2).
         """
         x = self.pre_conv(x)
         if self.downsample:
diff --git a/paddlespeech/t2s/models/starganv2_vc/starganv2_vc.py b/paddlespeech/t2s/models/starganv2_vc/starganv2_vc.py
index c938c7a90..2a96b30c6 100644
--- a/paddlespeech/t2s/models/starganv2_vc/starganv2_vc.py
+++ b/paddlespeech/t2s/models/starganv2_vc/starganv2_vc.py
@@ -32,12 +32,23 @@ class DownSample(nn.Layer):
         self.layer_type = layer_type
 
     def forward(self, x: paddle.Tensor):
+        """Calculate forward propagation.
+        Args:
+            x(Tensor(float32)): Shape (B, dim_in, n_mels, T).
+        Returns:
+            Tensor:
+                layer_type == 'none': Shape (B, dim_in, n_mels, T)
+                layer_type == 'timepreserve': Shape (B, dim_in, n_mels // 2, T)
+                layer_type == 'half': Shape (B, dim_in, n_mels // 2, T // 2)
+        """
         if self.layer_type == 'none':
             return x
         elif self.layer_type == 'timepreserve':
-            return F.avg_pool2d(x, (2, 1))
+            out = F.avg_pool2d(x, (2, 1))
+            return out
         elif self.layer_type == 'half':
-            return F.avg_pool2d(x, 2)
+            out = F.avg_pool2d(x, 2)
+            return out
         else:
             raise RuntimeError(
                 'Got unexpected donwsampletype %s, expected is [none, timepreserve, half]'
@@ -50,12 +61,23 @@ class UpSample(nn.Layer):
         self.layer_type = layer_type
 
     def forward(self, x: paddle.Tensor):
+        """Calculate forward propagation.
+        Args:
+            x(Tensor(float32)): Shape (B, dim_in, n_mels, T).
+        Returns:
+            Tensor:
+                layer_type == 'none': Shape (B, dim_in, n_mels, T)
+                layer_type == 'timepreserve': Shape (B, dim_in, n_mels * 2, T)
+                layer_type == 'half': Shape (B, dim_in, n_mels * 2, T * 2)
+        """
         if self.layer_type == 'none':
             return x
         elif self.layer_type == 'timepreserve':
-            return F.interpolate(x, scale_factor=(2, 1), mode='nearest')
+            out = F.interpolate(x, scale_factor=(2, 1), mode='nearest')
+            return out
         elif self.layer_type == 'half':
-            return F.interpolate(x, scale_factor=2, mode='nearest')
+            out = F.interpolate(x, scale_factor=2, mode='nearest')
+            return out
         else:
             raise RuntimeError(
                 'Got unexpected upsampletype %s, expected is [none, timepreserve, half]'
@@ -126,7 +148,9 @@ class ResBlk(nn.Layer):
             x(Tensor(float32)): Shape (B, dim_in, n_mels, T).
         Returns:
             Tensor:
-                Shape (B, dim_out, T, n_mels//(1 or 2), T//(1 or 2)).
+                downsample == 'none': Shape (B, dim_in, n_mels, T).
+                downsample == 'timepreserve': Shape (B, dim_out, T, n_mels // 2, T).
+                downsample == 'half': Shape (B, dim_out, T, n_mels // 2, T // 2).
         """
         x = self._shortcut(x) + self._residual(x)
         # unit variance
@@ -142,12 +166,21 @@ class AdaIN(nn.Layer):
         self.fc = nn.Linear(style_dim, num_features * 2)
 
     def forward(self, x: paddle.Tensor, s: paddle.Tensor):
+        """Calculate forward propagation.
+        Args:
+            x(Tensor(float32)): Shape (B, style_dim, n_mels, T).
+            s(Tensor(float32)): Shape (style_dim, ).
+        Returns:
+            Tensor:
+                Shape (B, style_dim, T, n_mels, T).
+        """
         if len(s.shape) == 1:
             s = s[None]
         h = self.fc(s)
         h = h.reshape((h.shape[0], h.shape[1], 1, 1))
         gamma, beta = paddle.split(h, 2, axis=1)
-        return (1 + gamma) * self.norm(x) + beta
+        out = (1 + gamma) * self.norm(x) + beta
+        return out
 
 
 class AdainResBlk(nn.Layer):
@@ -164,6 +197,7 @@ class AdainResBlk(nn.Layer):
         self.upsample = UpSample(layer_type=upsample)
         self.learned_sc = dim_in != dim_out
         self._build_weights(dim_in, dim_out, style_dim)
+        self.layer_type = upsample
 
     def _build_weights(self, dim_in: int, dim_out: int, style_dim: int=64):
         self.conv1 = nn.Conv2D(
@@ -209,12 +243,14 @@ class AdainResBlk(nn.Layer):
         """Calculate forward propagation.
         Args:
             x(Tensor(float32)): 
-                Shape (B, dim_in, n_mels', T').
+                Shape (B, dim_in, n_mels, T).
             s(Tensor(float32)):
                 Shape (64,).
         Returns:
             Tensor:
-                Shape (B, dim_out, n_mels'', T'').
+                upsample == 'none': Shape (B, dim_out, T, n_mels, T).  
+                upsample == 'timepreserve': Shape (B, dim_out, T, n_mels * 2, T).
+                upsample == 'half': Shape (B, dim_out, T, n_mels * 2, T * 2).  
         """
         out = self._residual(x, s)
         if self.w_hpf == 0:
@@ -333,27 +369,27 @@ class Generator(nn.Layer):
             masks:
                 None.
             F0:
-                Shape (B, num_features(256), n_mels//8, T).
+                Shape (B, num_features(256), n_mels // 8, T).
         Returns:
             Tensor:
-                output of generator. Shape (B, 1, n_mels, T//4*4)
+                output of generator. Shape (B, 1, n_mels, T // 4 * 4)
         """
         x = self.stem(x)
         cache = {}
-        # output: (B, max_conv_dim, n_mels//16, T//4)
+        # output: (B, max_conv_dim, n_mels // 16, T // 4)
         for block in self.encode:
             if (masks is not None) and (x.shape[2] in [32, 64, 128]):
                 cache[x.shape[2]] = x
             x = block(x)
         if F0 is not None:
-            # input: (B, num_features(256), n_mels//8, T)
-            # output: (B, num_features(256)//2, n_mels//16, T//2)
+            # input: (B, num_features(256), n_mels // 8, T)
+            # output: (B, num_features(256) // 2, n_mels // 16, T // 2)
             F0 = self.F0_conv(F0)
-            # output: (B, num_features(256)//2, n_mels//16, T//4)
+            # output: (B, num_features(256) // 2, n_mels // 16, T // 4)
             F0 = F.adaptive_avg_pool2d(F0, [x.shape[-2], x.shape[-1]])
             x = paddle.concat([x, F0], axis=1)
-        # input: (B, max_conv_dim+num_features(256)//2, n_mels//16, T//4*4)
-        # output: (B, dim_in, n_mels, T//4*4)
+        # input: (B, max_conv_dim+num_features(256) // 2, n_mels // 16, T // 4 * 4)
+        # output: (B, dim_in, n_mels, T // 4 * 4)
         for block in self.decode:
             x = block(x, s)
             if (masks is not None) and (x.shape[2] in [32, 64, 128]):
@@ -397,11 +433,10 @@ class MappingNetwork(nn.Layer):
             z(Tensor(float32)): 
                 Shape (B, 1, n_mels, T).
             y(Tensor(float32)):
-                speaker label. Shape (B,).
-                
+                speaker label. Shape (B, ).    
         Returns:
             Tensor:
-                Shape (style_dim,)
+                Shape (style_dim, )
         """
 
         h = self.shared(z)
@@ -411,7 +446,7 @@ class MappingNetwork(nn.Layer):
         # (B, num_domains, style_dim)
         out = paddle.stack(out, axis=1)
         idx = paddle.arange(y.shape[0])
-        # (style_dim,)
+        # (style_dim, )
         s = out[idx, y]
         return s
 
@@ -462,10 +497,10 @@ class StyleEncoder(nn.Layer):
             x(Tensor(float32)): 
                 Shape (B, 1, n_mels, T).   
             y(Tensor(float32)):
-                speaker label. Shape (B,).
+                speaker label. Shape (B, ).
         Returns:
             Tensor:
-                Shape (style_dim,)
+                Shape (style_dim, )
         """
         h = self.shared(x)
         h = h.reshape((h.shape[0], -1))
@@ -502,10 +537,12 @@ class Discriminator(nn.Layer):
         self.num_domains = num_domains
 
     def forward(self, x: paddle.Tensor, y: paddle.Tensor):
-        return self.dis(x, y)
+        out = self.dis(x, y)
+        return out
 
     def classifier(self, x: paddle.Tensor):
-        return self.cls.get_feature(x)
+        out = self.cls.get_feature(x)
+        return out
 
 
 class Discriminator2D(nn.Layer):