diff --git a/deepspeech/decoders/recog.py b/deepspeech/decoders/recog.py
index d1ddfc8a..eb39636c 100644
--- a/deepspeech/decoders/recog.py
+++ b/deepspeech/decoders/recog.py
@@ -24,11 +24,7 @@ from .utils import add_results_to_json
 from deepspeech.exps import dynamic_import_tester
 from deepspeech.io.reader import LoadInputsAndTargets
 from deepspeech.models.asr_interface import ASRInterface
-from deepspeech.models.lm.transformer import TransformerLM
 from deepspeech.utils.log import Log
-# from espnet.asr.asr_utils import get_model_conf
-# from espnet.asr.asr_utils import torch_load
-# from espnet.nets.lm_interface import dynamic_import_lm
 
 logger = Log(__name__).getlog()
 
@@ -49,21 +45,24 @@ def load_trained_model(args):
     model = exp.model
     return model, char_list, exp, confs
 
+
 def get_config(config_path):
     stream = open(config_path, mode='r', encoding="utf-8")
     config = yaml.load(stream, Loader=yaml.FullLoader)
     stream.close()
     return config
 
+
 def load_trained_lm(args):
-        lm_args = get_config(args.rnnlm_conf)
-        # NOTE: for a compatibility with less than 0.5.0 version models
-        lm_model_module = getattr(lm_args, "model_module", "default")
-        lm_class = dynamic_import_lm(lm_model_module)
-        lm = lm_class(lm_args.model)
-        model_dict = paddle.load(args.rnnlm)
-        lm.set_state_dict(model_dict)
-        return lm
+    lm_args = get_config(args.rnnlm_conf)
+    # NOTE: for a compatibility with less than 0.5.0 version models
+    lm_model_module = getattr(lm_args, "model_module", "default")
+    lm_class = dynamic_import_lm(lm_model_module)
+    lm = lm_class(lm_args.model)
+    model_dict = paddle.load(args.rnnlm)
+    lm.set_state_dict(model_dict)
+    return lm
+
 
 def recog_v2(args):
     """Decode with custom models that implements ScorerInterface.
diff --git a/deepspeech/decoders/recog_bin.py b/deepspeech/decoders/recog_bin.py
index cb3d5757..7c866648 100644
--- a/deepspeech/decoders/recog_bin.py
+++ b/deepspeech/decoders/recog_bin.py
@@ -21,6 +21,7 @@ from distutils.util import strtobool
 import configargparse
 import numpy as np
 
+
 def get_parser():
     """Get default arguments."""
     parser = configargparse.ArgumentParser(
diff --git a/deepspeech/models/lm/transformer.py b/deepspeech/models/lm/transformer.py
index 28371ae2..35ecf678 100644
--- a/deepspeech/models/lm/transformer.py
+++ b/deepspeech/models/lm/transformer.py
@@ -30,20 +30,19 @@ logger = Log(__name__).getlog()
 
 
 class TransformerLM(nn.Layer, LMInterface, BatchScorerInterface):
-    def __init__(
-            self,
-            n_vocab: int,
-            pos_enc: str=None,
-            embed_unit: int=128,
-            att_unit: int=256,
-            head: int=2,
-            unit: int=1024,
-            layer: int=4,
-            dropout_rate: float=0.5,
-            emb_dropout_rate: float=0.0,
-            att_dropout_rate: float=0.0,
-            tie_weights: bool=False, 
-            **kwargs):
+    def __init__(self,
+                 n_vocab: int,
+                 pos_enc: str=None,
+                 embed_unit: int=128,
+                 att_unit: int=256,
+                 head: int=2,
+                 unit: int=1024,
+                 layer: int=4,
+                 dropout_rate: float=0.5,
+                 emb_dropout_rate: float=0.0,
+                 att_dropout_rate: float=0.0,
+                 tie_weights: bool=False,
+                 **kwargs):
         nn.Layer.__init__(self)
 
         if pos_enc == "sinusoidal":
diff --git a/deepspeech/transform/add_deltas.py b/deepspeech/transform/add_deltas.py
index 68f44d41..4cab0084 100644
--- a/deepspeech/transform/add_deltas.py
+++ b/deepspeech/transform/add_deltas.py
@@ -1,3 +1,16 @@
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 import numpy as np
 
 
@@ -9,7 +22,7 @@ def delta(feat, window):
         delta_feat[i:] += -i * feat[:-i]
         delta_feat[-i:] += i * feat[-1]
         delta_feat[:i] += -i * feat[0]
-    delta_feat /= 2 * sum(i ** 2 for i in range(1, window + 1))
+    delta_feat /= 2 * sum(i**2 for i in range(1, window + 1))
     return delta_feat
 
 
@@ -34,8 +47,7 @@ class AddDeltas():
 
     def __repr__(self):
         return "{name}(window={window}, order={order}".format(
-            name=self.__class__.__name__, window=self.window, order=self.order
-        )
+            name=self.__class__.__name__, window=self.window, order=self.order)
 
     def __call__(self, x):
         return add_deltas(x, window=self.window, order=self.order)
diff --git a/deepspeech/transform/channel_selector.py b/deepspeech/transform/channel_selector.py
index 1ac9e350..d985b482 100644
--- a/deepspeech/transform/channel_selector.py
+++ b/deepspeech/transform/channel_selector.py
@@ -1,3 +1,16 @@
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 import numpy
 
 
@@ -10,15 +23,12 @@ class ChannelSelector():
         self.axis = axis
 
     def __repr__(self):
-        return (
-            "{name}(train_channel={train_channel}, "
-            "eval_channel={eval_channel}, axis={axis})".format(
-                name=self.__class__.__name__,
-                train_channel=self.train_channel,
-                eval_channel=self.eval_channel,
-                axis=self.axis,
-            )
-        )
+        return ("{name}(train_channel={train_channel}, "
+                "eval_channel={eval_channel}, axis={axis})".format(
+                    name=self.__class__.__name__,
+                    train_channel=self.train_channel,
+                    eval_channel=self.eval_channel,
+                    axis=self.axis, ))
 
     def __call__(self, x, train=True):
         # Assuming x: [Time, Channel] by default
@@ -27,8 +37,8 @@ class ChannelSelector():
             # If the dimension is insufficient, then unsqueeze
             # (e.g [Time] -> [Time, 1])
             ind = tuple(
-                slice(None) if i < x.ndim else None for i in range(self.axis + 1)
-            )
+                slice(None) if i < x.ndim else None
+                for i in range(self.axis + 1))
             x = x[ind]
 
         if train:
@@ -41,5 +51,6 @@ class ChannelSelector():
         else:
             ch = channel
 
-        ind = tuple(slice(None) if i != self.axis else ch for i in range(x.ndim))
+        ind = tuple(
+            slice(None) if i != self.axis else ch for i in range(x.ndim))
         return x[ind]
diff --git a/deepspeech/transform/functional.py b/deepspeech/transform/functional.py
index 5eec6cc1..914e484e 100644
--- a/deepspeech/transform/functional.py
+++ b/deepspeech/transform/functional.py
@@ -1,3 +1,16 @@
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 import inspect
 
 from deepspeech.transform.transform_interface import TransformInterface
@@ -57,7 +70,8 @@ class FuncTrans(TransformInterface):
         except ValueError:
             d = dict()
         return {
-            k: v.default for k, v in d.items() if v.default != inspect.Parameter.empty
+            k: v.default
+            for k, v in d.items() if v.default != inspect.Parameter.empty
         }
 
     def __repr__(self):
diff --git a/deepspeech/transform/perturb.py b/deepspeech/transform/perturb.py
index 05766cad..e425fd2e 100644
--- a/deepspeech/transform/perturb.py
+++ b/deepspeech/transform/perturb.py
@@ -1,3 +1,16 @@
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 import librosa
 import numpy
 import scipy
@@ -5,6 +18,7 @@ import soundfile
 
 from deepspeech.io.reader import SoundHDF5File
 
+
 class SpeedPerturbation():
     """SpeedPerturbation
 
@@ -22,14 +36,13 @@ class SpeedPerturbation():
     """
 
     def __init__(
-        self,
-        lower=0.9,
-        upper=1.1,
-        utt2ratio=None,
-        keep_length=True,
-        res_type="kaiser_best",
-        seed=None,
-    ):
+            self,
+            lower=0.9,
+            upper=1.1,
+            utt2ratio=None,
+            keep_length=True,
+            res_type="kaiser_best",
+            seed=None, ):
         self.res_type = res_type
         self.keep_length = keep_length
         self.state = numpy.random.RandomState(seed)
@@ -60,12 +73,10 @@ class SpeedPerturbation():
                 self.lower,
                 self.upper,
                 self.keep_length,
-                self.res_type,
-            )
+                self.res_type, )
         else:
             return "{}({}, res_type={})".format(
-                self.__class__.__name__, self.utt2ratio_file, self.res_type
-            )
+                self.__class__.__name__, self.utt2ratio_file, self.res_type)
 
     def __call__(self, x, uttid=None, train=True):
         if not train:
@@ -85,15 +96,14 @@ class SpeedPerturbation():
             diff = abs(len(x) - len(y))
             if len(y) > len(x):
                 # Truncate noise
-                y = y[diff // 2 : -((diff + 1) // 2)]
+                y = y[diff // 2:-((diff + 1) // 2)]
             elif len(y) < len(x):
                 # Assume the time-axis is the first: (Time, Channel)
                 pad_width = [(diff // 2, (diff + 1) // 2)] + [
                     (0, 0) for _ in range(y.ndim - 1)
                 ]
                 y = numpy.pad(
-                    y, pad_width=pad_width, constant_values=0, mode="constant"
-                )
+                    y, pad_width=pad_width, constant_values=0, mode="constant")
         return y
 
 
@@ -111,7 +121,7 @@ class BandpassPerturbation():
 
     """
 
-    def __init__(self, lower=0.0, upper=0.75, seed=None, axes=(-1,)):
+    def __init__(self, lower=0.0, upper=0.75, seed=None, axes=(-1, )):
         self.lower = lower
         self.upper = upper
         self.state = numpy.random.RandomState(seed)
@@ -119,18 +129,16 @@ class BandpassPerturbation():
         self.axes = axes
 
     def __repr__(self):
-        return "{}(lower={}, upper={})".format(
-            self.__class__.__name__, self.lower, self.upper
-        )
+        return "{}(lower={}, upper={})".format(self.__class__.__name__,
+                                               self.lower, self.upper)
 
     def __call__(self, x_stft, uttid=None, train=True):
         if not train:
             return x_stft
 
         if x_stft.ndim == 1:
-            raise RuntimeError(
-                "Input in time-freq domain: " "(Time, Channel, Freq) or (Time, Freq)"
-            )
+            raise RuntimeError("Input in time-freq domain: "
+                               "(Time, Channel, Freq) or (Time, Freq)")
 
         ratio = self.state.uniform(self.lower, self.upper)
         axes = [i if i >= 0 else x_stft.ndim - i for i in self.axes]
@@ -142,7 +150,12 @@ class BandpassPerturbation():
 
 
 class VolumePerturbation():
-    def __init__(self, lower=-1.6, upper=1.6, utt2ratio=None, dbunit=True, seed=None):
+    def __init__(self,
+                 lower=-1.6,
+                 upper=1.6,
+                 utt2ratio=None,
+                 dbunit=True,
+                 seed=None):
         self.dbunit = dbunit
         self.utt2ratio_file = utt2ratio
         self.lower = lower
@@ -168,12 +181,10 @@ class VolumePerturbation():
     def __repr__(self):
         if self.utt2ratio is None:
             return "{}(lower={}, upper={}, dbunit={})".format(
-                self.__class__.__name__, self.lower, self.upper, self.dbunit
-            )
+                self.__class__.__name__, self.lower, self.upper, self.dbunit)
         else:
             return '{}("{}", dbunit={})'.format(
-                self.__class__.__name__, self.utt2ratio_file, self.dbunit
-            )
+                self.__class__.__name__, self.utt2ratio_file, self.dbunit)
 
     def __call__(self, x, uttid=None, train=True):
         if not train:
@@ -186,7 +197,7 @@ class VolumePerturbation():
         else:
             ratio = self.state.uniform(self.lower, self.upper)
         if self.dbunit:
-            ratio = 10 ** (ratio / 20)
+            ratio = 10**(ratio / 20)
         return x * ratio
 
 
@@ -194,15 +205,14 @@ class NoiseInjection():
     """Add isotropic noise"""
 
     def __init__(
-        self,
-        utt2noise=None,
-        lower=-20,
-        upper=-5,
-        utt2ratio=None,
-        filetype="list",
-        dbunit=True,
-        seed=None,
-    ):
+            self,
+            utt2noise=None,
+            lower=-20,
+            upper=-5,
+            utt2ratio=None,
+            filetype="list",
+            dbunit=True,
+            seed=None, ):
         self.utt2noise_file = utt2noise
         self.utt2ratio_file = utt2ratio
         self.filetype = filetype
@@ -242,19 +252,16 @@ class NoiseInjection():
 
         if utt2noise is not None and utt2ratio is not None:
             if set(self.utt2ratio) != set(self.utt2noise):
-                raise RuntimeError(
-                    "The uttids mismatch between {} and {}".format(utt2ratio, utt2noise)
-                )
+                raise RuntimeError("The uttids mismatch between {} and {}".
+                                   format(utt2ratio, utt2noise))
 
     def __repr__(self):
         if self.utt2ratio is None:
             return "{}(lower={}, upper={}, dbunit={})".format(
-                self.__class__.__name__, self.lower, self.upper, self.dbunit
-            )
+                self.__class__.__name__, self.lower, self.upper, self.dbunit)
         else:
             return '{}("{}", dbunit={})'.format(
-                self.__class__.__name__, self.utt2ratio_file, self.dbunit
-            )
+                self.__class__.__name__, self.utt2ratio_file, self.dbunit)
 
     def __call__(self, x, uttid=None, train=True):
         if not train:
@@ -268,8 +275,8 @@ class NoiseInjection():
             ratio = self.state.uniform(self.lower, self.upper)
 
         if self.dbunit:
-            ratio = 10 ** (ratio / 20)
-        scale = ratio * numpy.sqrt((x ** 2).mean())
+            ratio = 10**(ratio / 20)
+        scale = ratio * numpy.sqrt((x**2).mean())
 
         # 2. Get noise
         if self.utt2noise is not None:
@@ -280,16 +287,17 @@ class NoiseInjection():
                 # Randomly select the noise source
                 noise = self.state.choice(list(self.utt2noise.values()))
             # Normalize the level
-            noise /= numpy.sqrt((noise ** 2).mean())
+            noise /= numpy.sqrt((noise**2).mean())
 
             # Adjust the noise length
             diff = abs(len(x) - len(noise))
             offset = self.state.randint(0, diff)
             if len(noise) > len(x):
                 # Truncate noise
-                noise = noise[offset : -(diff - offset)]
+                noise = noise[offset:-(diff - offset)]
             else:
-                noise = numpy.pad(noise, pad_width=[offset, diff - offset], mode="wrap")
+                noise = numpy.pad(
+                    noise, pad_width=[offset, diff - offset], mode="wrap")
 
         else:
             # Generate white noise
@@ -329,15 +337,14 @@ class RIRConvolve():
         if x.ndim != 1:
             # Must be single channel
             raise RuntimeError(
-                "Input x must be one dimensional array, but got {}".format(x.shape)
-            )
+                "Input x must be one dimensional array, but got {}".format(
+                    x.shape))
 
         rir, rate = self.utt2rir[uttid]
         if rir.ndim == 2:
             # FIXME(kamo): Use chainer.convolution_1d?
             # return [Time, Channel]
             return numpy.stack(
-                [scipy.convolve(x, r, mode="same") for r in rir], axis=-1
-            )
+                [scipy.convolve(x, r, mode="same") for r in rir], axis=-1)
         else:
             return scipy.convolve(x, rir, mode="same")
diff --git a/deepspeech/transform/spec_augment.py b/deepspeech/transform/spec_augment.py
index feb712df..0e5324e7 100644
--- a/deepspeech/transform/spec_augment.py
+++ b/deepspeech/transform/spec_augment.py
@@ -1,5 +1,17 @@
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 """Spec Augment module for preprocessing i.e., data augmentation"""
-
 import random
 
 import numpy
@@ -27,10 +39,12 @@ def time_warp(x, max_time_warp=80, inplace=False, mode="PIL"):
             return x
         # NOTE: randrange(a, b) emits a, a + 1, ..., b - 1
         center = random.randrange(window, t - window)
-        warped = random.randrange(center - window, center + window) + 1  # 1 ... t - 1
+        warped = random.randrange(center - window, center +
+                                  window) + 1  # 1 ... t - 1
 
         left = Image.fromarray(x[:center]).resize((x.shape[1], warped), BICUBIC)
-        right = Image.fromarray(x[center:]).resize((x.shape[1], t - warped), BICUBIC)
+        right = Image.fromarray(x[center:]).resize((x.shape[1], t - warped),
+                                                   BICUBIC)
         if inplace:
             x[:warped] = left
             x[warped:] = right
@@ -44,11 +58,8 @@ def time_warp(x, max_time_warp=80, inplace=False, mode="PIL"):
         # TODO(karita): make this differentiable again
         return spec_augment.time_warp(paddle.to_tensor(x), window).numpy()
     else:
-        raise NotImplementedError(
-            "unknown resize mode: "
-            + mode
-            + ", choose one from (PIL, sparse_image_warp)."
-        )
+        raise NotImplementedError("unknown resize mode: " + mode +
+                                  ", choose one from (PIL, sparse_image_warp).")
 
 
 class TimeWarp(FuncTrans):
@@ -145,16 +156,15 @@ class TimeMask(FuncTrans):
 
 
 def spec_augment(
-    x,
-    resize_mode="PIL",
-    max_time_warp=80,
-    max_freq_width=27,
-    n_freq_mask=2,
-    max_time_width=100,
-    n_time_mask=2,
-    inplace=True,
-    replace_with_zero=True,
-):
+        x,
+        resize_mode="PIL",
+        max_time_warp=80,
+        max_freq_width=27,
+        n_freq_mask=2,
+        max_time_width=100,
+        n_time_mask=2,
+        inplace=True,
+        replace_with_zero=True, ):
     """spec agument
 
     apply random time warping and time/freq masking
@@ -180,15 +190,13 @@ def spec_augment(
         max_freq_width,
         n_freq_mask,
         inplace=inplace,
-        replace_with_zero=replace_with_zero,
-    )
+        replace_with_zero=replace_with_zero, )
     x = time_mask(
         x,
         max_time_width,
         n_time_mask,
         inplace=inplace,
-        replace_with_zero=replace_with_zero,
-    )
+        replace_with_zero=replace_with_zero, )
     return x
 
 
diff --git a/deepspeech/transform/spectrogram.py b/deepspeech/transform/spectrogram.py
index 68d47627..e63bd680 100644
--- a/deepspeech/transform/spectrogram.py
+++ b/deepspeech/transform/spectrogram.py
@@ -1,10 +1,27 @@
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 import librosa
 import numpy as np
 
 
-def stft(
-    x, n_fft, n_shift, win_length=None, window="hann", center=True, pad_mode="reflect"
-):
+def stft(x,
+         n_fft,
+         n_shift,
+         win_length=None,
+         window="hann",
+         center=True,
+         pad_mode="reflect"):
     # x: [Time, Channel]
     if x.ndim == 1:
         single_channel = True
@@ -25,12 +42,9 @@ def stft(
                 win_length=win_length,
                 window=window,
                 center=center,
-                pad_mode=pad_mode,
-            ).T
-            for ch in range(x.shape[1])
+                pad_mode=pad_mode, ).T for ch in range(x.shape[1])
         ],
-        axis=1,
-    )
+        axis=1, )
 
     if single_channel:
         # x: [Time, Channel, Freq] -> [Time, Freq]
@@ -55,12 +69,9 @@ def istft(x, n_shift, win_length=None, window="hann", center=True):
                 hop_length=n_shift,
                 win_length=win_length,
                 window=window,
-                center=center,
-            )
-            for ch in range(x.shape[1])
+                center=center, ) for ch in range(x.shape[1])
         ],
-        axis=1,
-    )
+        axis=1, )
 
     if single_channel:
         # x: [Time, Channel] -> [Time]
@@ -68,7 +79,13 @@ def istft(x, n_shift, win_length=None, window="hann", center=True):
     return x
 
 
-def stft2logmelspectrogram(x_stft, fs, n_mels, n_fft, fmin=None, fmax=None, eps=1e-10):
+def stft2logmelspectrogram(x_stft,
+                           fs,
+                           n_mels,
+                           n_fft,
+                           fmin=None,
+                           fmax=None,
+                           eps=1e-10):
     # x_stft: (Time, Channel, Freq) or (Time, Freq)
     fmin = 0 if fmin is None else fmin
     fmax = fs / 2 if fmax is None else fmax
@@ -90,18 +107,17 @@ def spectrogram(x, n_fft, n_shift, win_length=None, window="hann"):
 
 
 def logmelspectrogram(
-    x,
-    fs,
-    n_mels,
-    n_fft,
-    n_shift,
-    win_length=None,
-    window="hann",
-    fmin=None,
-    fmax=None,
-    eps=1e-10,
-    pad_mode="reflect",
-):
+        x,
+        fs,
+        n_mels,
+        n_fft,
+        n_shift,
+        win_length=None,
+        window="hann",
+        fmin=None,
+        fmax=None,
+        eps=1e-10,
+        pad_mode="reflect", ):
     # stft: (Time, Channel, Freq) or (Time, Freq)
     x_stft = stft(
         x,
@@ -109,12 +125,16 @@ def logmelspectrogram(
         n_shift=n_shift,
         win_length=win_length,
         window=window,
-        pad_mode=pad_mode,
-    )
+        pad_mode=pad_mode, )
 
     return stft2logmelspectrogram(
-        x_stft, fs=fs, n_mels=n_mels, n_fft=n_fft, fmin=fmin, fmax=fmax, eps=eps
-    )
+        x_stft,
+        fs=fs,
+        n_mels=n_mels,
+        n_fft=n_fft,
+        fmin=fmin,
+        fmax=fmax,
+        eps=eps)
 
 
 class Spectrogram():
@@ -125,16 +145,13 @@ class Spectrogram():
         self.window = window
 
     def __repr__(self):
-        return (
-            "{name}(n_fft={n_fft}, n_shift={n_shift}, "
-            "win_length={win_length}, window={window})".format(
-                name=self.__class__.__name__,
-                n_fft=self.n_fft,
-                n_shift=self.n_shift,
-                win_length=self.win_length,
-                window=self.window,
-            )
-        )
+        return ("{name}(n_fft={n_fft}, n_shift={n_shift}, "
+                "win_length={win_length}, window={window})".format(
+                    name=self.__class__.__name__,
+                    n_fft=self.n_fft,
+                    n_shift=self.n_shift,
+                    win_length=self.win_length,
+                    window=self.window, ))
 
     def __call__(self, x):
         return spectrogram(
@@ -142,23 +159,21 @@ class Spectrogram():
             n_fft=self.n_fft,
             n_shift=self.n_shift,
             win_length=self.win_length,
-            window=self.window,
-        )
+            window=self.window, )
 
 
 class LogMelSpectrogram():
     def __init__(
-        self,
-        fs,
-        n_mels,
-        n_fft,
-        n_shift,
-        win_length=None,
-        window="hann",
-        fmin=None,
-        fmax=None,
-        eps=1e-10,
-    ):
+            self,
+            fs,
+            n_mels,
+            n_fft,
+            n_shift,
+            win_length=None,
+            window="hann",
+            fmin=None,
+            fmax=None,
+            eps=1e-10, ):
         self.fs = fs
         self.n_mels = n_mels
         self.n_fft = n_fft
@@ -170,22 +185,19 @@ class LogMelSpectrogram():
         self.eps = eps
 
     def __repr__(self):
-        return (
-            "{name}(fs={fs}, n_mels={n_mels}, n_fft={n_fft}, "
-            "n_shift={n_shift}, win_length={win_length}, window={window}, "
-            "fmin={fmin}, fmax={fmax}, eps={eps}))".format(
-                name=self.__class__.__name__,
-                fs=self.fs,
-                n_mels=self.n_mels,
-                n_fft=self.n_fft,
-                n_shift=self.n_shift,
-                win_length=self.win_length,
-                window=self.window,
-                fmin=self.fmin,
-                fmax=self.fmax,
-                eps=self.eps,
-            )
-        )
+        return ("{name}(fs={fs}, n_mels={n_mels}, n_fft={n_fft}, "
+                "n_shift={n_shift}, win_length={win_length}, window={window}, "
+                "fmin={fmin}, fmax={fmax}, eps={eps}))".format(
+                    name=self.__class__.__name__,
+                    fs=self.fs,
+                    n_mels=self.n_mels,
+                    n_fft=self.n_fft,
+                    n_shift=self.n_shift,
+                    win_length=self.win_length,
+                    window=self.window,
+                    fmin=self.fmin,
+                    fmax=self.fmax,
+                    eps=self.eps, ))
 
     def __call__(self, x):
         return logmelspectrogram(
@@ -195,8 +207,7 @@ class LogMelSpectrogram():
             n_fft=self.n_fft,
             n_shift=self.n_shift,
             win_length=self.win_length,
-            window=self.window,
-        )
+            window=self.window, )
 
 
 class Stft2LogMelSpectrogram():
@@ -209,18 +220,15 @@ class Stft2LogMelSpectrogram():
         self.eps = eps
 
     def __repr__(self):
-        return (
-            "{name}(fs={fs}, n_mels={n_mels}, n_fft={n_fft}, "
-            "fmin={fmin}, fmax={fmax}, eps={eps}))".format(
-                name=self.__class__.__name__,
-                fs=self.fs,
-                n_mels=self.n_mels,
-                n_fft=self.n_fft,
-                fmin=self.fmin,
-                fmax=self.fmax,
-                eps=self.eps,
-            )
-        )
+        return ("{name}(fs={fs}, n_mels={n_mels}, n_fft={n_fft}, "
+                "fmin={fmin}, fmax={fmax}, eps={eps}))".format(
+                    name=self.__class__.__name__,
+                    fs=self.fs,
+                    n_mels=self.n_mels,
+                    n_fft=self.n_fft,
+                    fmin=self.fmin,
+                    fmax=self.fmax,
+                    eps=self.eps, ))
 
     def __call__(self, x):
         return stft2logmelspectrogram(
@@ -229,20 +237,18 @@ class Stft2LogMelSpectrogram():
             n_mels=self.n_mels,
             n_fft=self.n_fft,
             fmin=self.fmin,
-            fmax=self.fmax,
-        )
+            fmax=self.fmax, )
 
 
 class Stft():
     def __init__(
-        self,
-        n_fft,
-        n_shift,
-        win_length=None,
-        window="hann",
-        center=True,
-        pad_mode="reflect",
-    ):
+            self,
+            n_fft,
+            n_shift,
+            win_length=None,
+            window="hann",
+            center=True,
+            pad_mode="reflect", ):
         self.n_fft = n_fft
         self.n_shift = n_shift
         self.win_length = win_length
@@ -251,19 +257,16 @@ class Stft():
         self.pad_mode = pad_mode
 
     def __repr__(self):
-        return (
-            "{name}(n_fft={n_fft}, n_shift={n_shift}, "
-            "win_length={win_length}, window={window},"
-            "center={center}, pad_mode={pad_mode})".format(
-                name=self.__class__.__name__,
-                n_fft=self.n_fft,
-                n_shift=self.n_shift,
-                win_length=self.win_length,
-                window=self.window,
-                center=self.center,
-                pad_mode=self.pad_mode,
-            )
-        )
+        return ("{name}(n_fft={n_fft}, n_shift={n_shift}, "
+                "win_length={win_length}, window={window},"
+                "center={center}, pad_mode={pad_mode})".format(
+                    name=self.__class__.__name__,
+                    n_fft=self.n_fft,
+                    n_shift=self.n_shift,
+                    win_length=self.win_length,
+                    window=self.window,
+                    center=self.center,
+                    pad_mode=self.pad_mode, ))
 
     def __call__(self, x):
         return stft(
@@ -273,8 +276,7 @@ class Stft():
             win_length=self.win_length,
             window=self.window,
             center=self.center,
-            pad_mode=self.pad_mode,
-        )
+            pad_mode=self.pad_mode, )
 
 
 class IStft():
@@ -285,17 +287,14 @@ class IStft():
         self.center = center
 
     def __repr__(self):
-        return (
-            "{name}(n_shift={n_shift}, "
-            "win_length={win_length}, window={window},"
-            "center={center})".format(
-                name=self.__class__.__name__,
-                n_shift=self.n_shift,
-                win_length=self.win_length,
-                window=self.window,
-                center=self.center,
-            )
-        )
+        return ("{name}(n_shift={n_shift}, "
+                "win_length={win_length}, window={window},"
+                "center={center})".format(
+                    name=self.__class__.__name__,
+                    n_shift=self.n_shift,
+                    win_length=self.win_length,
+                    window=self.window,
+                    center=self.center, ))
 
     def __call__(self, x):
         return istft(
@@ -303,5 +302,4 @@ class IStft():
             self.n_shift,
             win_length=self.win_length,
             window=self.window,
-            center=self.center,
-        )
+            center=self.center, )
diff --git a/deepspeech/transform/transform_interface.py b/deepspeech/transform/transform_interface.py
index 8a6aba45..7ab29554 100644
--- a/deepspeech/transform/transform_interface.py
+++ b/deepspeech/transform/transform_interface.py
@@ -1,3 +1,16 @@
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 # TODO(karita): add this to all the transform impl.
 class TransformInterface:
     """Transform Interface"""
diff --git a/deepspeech/transform/transformation.py b/deepspeech/transform/transformation.py
index 0f8c39bb..afb1db28 100644
--- a/deepspeech/transform/transformation.py
+++ b/deepspeech/transform/transformation.py
@@ -1,16 +1,28 @@
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 """Transformation module."""
-from collections.abc import Sequence
-from collections import OrderedDict
 import copy
-from inspect import signature
 import io
 import logging
+from collections import OrderedDict
+from collections.abc import Sequence
+from inspect import signature
 
 import yaml
 
 from deepspeech.utils.dynamic_import import dynamic_import
 
-
 # TODO(karita): inherit TransformInterface
 # TODO(karita): register cmd arguments in asr_train.py
 import_alias = dict(
@@ -33,8 +45,7 @@ import_alias = dict(
     istft="deepspeech.transform.spectrogram:IStft",
     stft2fbank="deepspeech.transform.spectrogram:Stft2LogMelSpectrogram",
     wpe="deepspeech.transform.wpe:WPE",
-    channel_selector="deepspeech.transform.channel_selector:ChannelSelector",
-)
+    channel_selector="deepspeech.transform.channel_selector:ChannelSelector", )
 
 
 class Transformation():
@@ -83,21 +94,16 @@ class Transformation():
                         # Some function, e.g. built-in function, are failed
                         pass
                     else:
-                        logging.error(
-                            "Expected signature: {}({})".format(
-                                class_obj.__name__, signa
-                            )
-                        )
+                        logging.error("Expected signature: {}({})".format(
+                            class_obj.__name__, signa))
                     raise
         else:
             raise NotImplementedError(
-                "Not supporting mode={}".format(self.conf["mode"])
-            )
+                "Not supporting mode={}".format(self.conf["mode"]))
 
     def __repr__(self):
-        rep = "\n" + "\n".join(
-            "    {}: {}".format(k, v) for k, v in self.functions.items()
-        )
+        rep = "\n" + "\n".join("    {}: {}".format(k, v)
+                               for k, v in self.functions.items())
         return "{}({})".format(self.__class__.__name__, rep)
 
     def __call__(self, xs, uttid_list=None, **kwargs):
@@ -130,18 +136,19 @@ class Transformation():
                 _kwargs = {k: v for k, v in kwargs.items() if k in param}
                 try:
                     if uttid_list is not None and "uttid" in param:
-                        xs = [func(x, u, **_kwargs) for x, u in zip(xs, uttid_list)]
+                        xs = [
+                            func(x, u, **_kwargs)
+                            for x, u in zip(xs, uttid_list)
+                        ]
                     else:
                         xs = [func(x, **_kwargs) for x in xs]
                 except Exception:
-                    logging.fatal(
-                        "Catch a exception from {}th func: {}".format(idx, func)
-                    )
+                    logging.fatal("Catch a exception from {}th func: {}".format(
+                        idx, func))
                     raise
         else:
             raise NotImplementedError(
-                "Not supporting mode={}".format(self.conf["mode"])
-            )
+                "Not supporting mode={}".format(self.conf["mode"]))
 
         if is_batch:
             return xs
diff --git a/deepspeech/transform/wpe.py b/deepspeech/transform/wpe.py
index 8aed97e6..d82005f6 100644
--- a/deepspeech/transform/wpe.py
+++ b/deepspeech/transform/wpe.py
@@ -1,10 +1,26 @@
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 from nara_wpe.wpe import wpe
 
 
 class WPE(object):
-    def __init__(
-        self, taps=10, delay=3, iterations=3, psd_context=0, statistics_mode="full"
-    ):
+    def __init__(self,
+                 taps=10,
+                 delay=3,
+                 iterations=3,
+                 psd_context=0,
+                 statistics_mode="full"):
         self.taps = taps
         self.delay = delay
         self.iterations = iterations
@@ -12,18 +28,15 @@ class WPE(object):
         self.statistics_mode = statistics_mode
 
     def __repr__(self):
-        return (
-            "{name}(taps={taps}, delay={delay}"
-            "iterations={iterations}, psd_context={psd_context}, "
-            "statistics_mode={statistics_mode})".format(
-                name=self.__class__.__name__,
-                taps=self.taps,
-                delay=self.delay,
-                iterations=self.iterations,
-                psd_context=self.psd_context,
-                statistics_mode=self.statistics_mode,
-            )
-        )
+        return ("{name}(taps={taps}, delay={delay}"
+                "iterations={iterations}, psd_context={psd_context}, "
+                "statistics_mode={statistics_mode})".format(
+                    name=self.__class__.__name__,
+                    taps=self.taps,
+                    delay=self.delay,
+                    iterations=self.iterations,
+                    psd_context=self.psd_context,
+                    statistics_mode=self.statistics_mode, ))
 
     def __call__(self, xs):
         """Return enhanced
@@ -40,6 +53,5 @@ class WPE(object):
             delay=self.delay,
             iterations=self.iterations,
             psd_context=self.psd_context,
-            statistics_mode=self.statistics_mode,
-        )
+            statistics_mode=self.statistics_mode, )
         return xs.transpose(2, 1, 0)
diff --git a/deepspeech/utils/check_kwargs.py b/deepspeech/utils/check_kwargs.py
index 593bfa24..1ee7329b 100644
--- a/deepspeech/utils/check_kwargs.py
+++ b/deepspeech/utils/check_kwargs.py
@@ -1,3 +1,16 @@
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 import inspect
 
 
@@ -17,4 +30,5 @@ def check_kwargs(func, kwargs, name=None):
         name = func.__name__
     for k in kwargs.keys():
         if k not in params:
-            raise TypeError(f"{name}() got an unexpected keyword argument '{k}'")
+            raise TypeError(
+                f"{name}() got an unexpected keyword argument '{k}'")
diff --git a/deepspeech/utils/spec_augment.py b/deepspeech/utils/spec_augment.py
index e69de29b..185a92b8 100644
--- a/deepspeech/utils/spec_augment.py
+++ b/deepspeech/utils/spec_augment.py
@@ -0,0 +1,13 @@
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
diff --git a/examples/librispeech/s2/README.md b/examples/librispeech/s2/README.md
index fc634ff6..9285a183 100644
--- a/examples/librispeech/s2/README.md
+++ b/examples/librispeech/s2/README.md
@@ -23,5 +23,5 @@
 | test-clean | join_ctc_w/o_lm | 2620 | 52576 | 97.2 | 2.6 | 0.3 | 0.4 | 3.2 | 34.9 |  
 | test-clean | join_ctc_w_lm | 2620 | 52576 | 97.9 | 1.8 | 0.2 | 0.3 | 2.4 | 27.8 |  
 
-Compare with [ESPNET](https://github.com/espnet/espnet/blob/master/egs/librispeech/asr1/RESULTS.md#pytorch-large-transformer-with-specaug-4-gpus--transformer-lm-4-gpus) 
+Compare with [ESPNET](https://github.com/espnet/espnet/blob/master/egs/librispeech/asr1/RESULTS.md#pytorch-large-transformer-with-specaug-4-gpus--transformer-lm-4-gpus)
 we using 8gpu, but model size (aheads4-adim256) small than it.
diff --git a/parakeet/data/batch.py b/parakeet/data/batch.py
index 515074d1..5e7ac399 100644
--- a/parakeet/data/batch.py
+++ b/parakeet/data/batch.py
@@ -53,8 +53,8 @@ def batch_text_id(minibatch, pad_id=0, dtype=np.int64):
     peek_example = minibatch[0]
     assert len(peek_example.shape) == 1, "text example is an 1D tensor"
 
-    lengths = [example.shape[0] for example in minibatch
-               ]  # assume (channel, n_samples) or (n_samples, )
+    lengths = [example.shape[0] for example in
+               minibatch]  # assume (channel, n_samples) or (n_samples, )
     max_len = np.max(lengths)
 
     batch = []
diff --git a/parakeet/exps/tacotron2/ljspeech.py b/parakeet/exps/tacotron2/ljspeech.py
index 20dc29d3..59c855eb 100644
--- a/parakeet/exps/tacotron2/ljspeech.py
+++ b/parakeet/exps/tacotron2/ljspeech.py
@@ -67,19 +67,16 @@ class LJSpeechCollector(object):
 
         # Sort by text_len in descending order
         texts = [
-            i
-            for i, _ in sorted(
+            i for i, _ in sorted(
                 zip(texts, text_lens), key=lambda x: x[1], reverse=True)
         ]
         mels = [
-            i
-            for i, _ in sorted(
+            i for i, _ in sorted(
                 zip(mels, text_lens), key=lambda x: x[1], reverse=True)
         ]
 
         mel_lens = [
-            i
-            for i, _ in sorted(
+            i for i, _ in sorted(
                 zip(mel_lens, text_lens), key=lambda x: x[1], reverse=True)
         ]
 
diff --git a/requirements.txt b/requirements.txt
index 4878dbe3..1ead8609 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -13,7 +13,7 @@ librosa
 llvmlite
 loguru
 matplotlib
-nltk
+nara_wpenltk
 numba
 numpy==1.20.0
 pandas
@@ -42,4 +42,3 @@ visualdl==2.2.0
 webrtcvad
 yacs
 yq
-nara_wpe
\ No newline at end of file
diff --git a/utils/feat-to-shape.py b/utils/feat-to-shape.py
index 911bf5cf..7b36b7e5 100755
--- a/utils/feat-to-shape.py
+++ b/utils/feat-to-shape.py
@@ -12,33 +12,32 @@ from deepspeech.utils.cli_utils import is_scipy_wav_style
 def get_parser():
     parser = argparse.ArgumentParser(
         description="convert feature to its shape",
-        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
-    )
-    parser.add_argument("--verbose", "-V", default=0, type=int, help="Verbose option")
+        formatter_class=argparse.ArgumentDefaultsHelpFormatter, )
+    parser.add_argument(
+        "--verbose", "-V", default=0, type=int, help="Verbose option")
     parser.add_argument(
         "--filetype",
         type=str,
         default="mat",
         choices=["mat", "hdf5", "sound.hdf5", "sound"],
         help="Specify the file format for the rspecifier. "
-        '"mat" is the matrix format in kaldi',
-    )
+        '"mat" is the matrix format in kaldi', )
     parser.add_argument(
         "--preprocess-conf",
         type=str,
         default=None,
-        help="The configuration file for the pre-processing",
-    )
+        help="The configuration file for the pre-processing", )
     parser.add_argument(
-        "rspecifier", type=str, help="Read specifier for feats. e.g. ark:some.ark"
-    )
+        "rspecifier",
+        type=str,
+        help="Read specifier for feats. e.g. ark:some.ark")
     parser.add_argument(
         "out",
         nargs="?",
         type=argparse.FileType("w"),
         default=sys.stdout,
-        help="The output filename. " "If omitted, then output to sys.stdout",
-    )
+        help="The output filename. "
+        "If omitted, then output to sys.stdout", )
     return parser
 
 
@@ -64,8 +63,7 @@ def main():
     # so change to file_reader_helper to return shape.
     # This make sense only with filetype="hdf5".
     for utt, mat in file_reader_helper(
-        args.rspecifier, args.filetype, return_shape=preprocessing is None
-    ):
+            args.rspecifier, args.filetype, return_shape=preprocessing is None):
         if preprocessing is not None:
             if is_scipy_wav_style(mat):
                 # If data is sound file, then got as Tuple[int, ndarray]