diff --git a/deepspeech/decoders/README.md b/deepspeech/decoders/README.md
index 11eb5606..ee7d53de 100644
--- a/deepspeech/decoders/README.md
+++ b/deepspeech/decoders/README.md
@@ -10,4 +10,4 @@
 * [Vectorized Beam Search for CTC-Attention-based Speech Recognition](https://www.isca-speech.org/archive/pdfs/interspeech_2019/seki19b_interspeech.pdf)
 
 ### Streaming Join CTC/ATT Beam Search
-* [STREAMING TRANSFORMER ASR WITH BLOCKWISE SYNCHRONOUS BEAM SEARCH](https://arxiv.org/abs/2006.14941)
\ No newline at end of file
+* [STREAMING TRANSFORMER ASR WITH BLOCKWISE SYNCHRONOUS BEAM SEARCH](https://arxiv.org/abs/2006.14941)
diff --git a/deepspeech/decoders/__init__.py b/deepspeech/decoders/__init__.py
index 1ea05143..f04a6d19 100644
--- a/deepspeech/decoders/__init__.py
+++ b/deepspeech/decoders/__init__.py
@@ -1 +1,14 @@
-from .ctcdecoder import swig_wrapper 
\ No newline at end of file
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from .ctcdecoder import swig_wrapper
diff --git a/deepspeech/decoders/scores/__init__.py b/deepspeech/decoders/scores/__init__.py
index e69de29b..185a92b8 100644
--- a/deepspeech/decoders/scores/__init__.py
+++ b/deepspeech/decoders/scores/__init__.py
@@ -0,0 +1,13 @@
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
diff --git a/deepspeech/decoders/scores/ctc.py b/deepspeech/decoders/scores/ctc.py
index aaa3dc86..4871d6e1 100644
--- a/deepspeech/decoders/scores/ctc.py
+++ b/deepspeech/decoders/scores/ctc.py
@@ -1,5 +1,17 @@
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 """ScorerInterface implementation for CTC."""
-
 import numpy as np
 import paddle
 
@@ -81,8 +93,7 @@ class CTCPrefixScorer(BatchPartialScorerInterface):
         prev_score, state = state
         presub_score, new_st = self.impl(y.cpu(), ids.cpu(), state)
         tscore = paddle.to_tensor(
-            presub_score - prev_score, place=x.place, dtype=x.dtype
-        )
+            presub_score - prev_score, place=x.place, dtype=x.dtype)
         return tscore, (presub_score, new_st)
 
     def batch_init_state(self, x: paddle.Tensor):
@@ -115,15 +126,9 @@ class CTCPrefixScorer(BatchPartialScorerInterface):
 
         """
         batch_state = (
-            (
-                paddle.stack([s[0] for s in state], axis=2),
-                paddle.stack([s[1] for s in state]),
-                state[0][2],
-                state[0][3],
-            )
-            if state[0] is not None
-            else None
-        )
+            (paddle.stack([s[0] for s in state], axis=2),
+             paddle.stack([s[1] for s in state]), state[0][2], state[0][3], )
+            if state[0] is not None else None)
         return self.impl(y, batch_state, ids)
 
     def extend_prob(self, x: paddle.Tensor):
diff --git a/deepspeech/decoders/scores/ctc_prefix_score.py b/deepspeech/decoders/scores/ctc_prefix_score.py
index 754e43ae..c85d546d 100644
--- a/deepspeech/decoders/scores/ctc_prefix_score.py
+++ b/deepspeech/decoders/scores/ctc_prefix_score.py
@@ -1,11 +1,8 @@
 #!/usr/bin/env python3
-
 # Copyright 2018 Mitsubishi Electric Research Labs (Takaaki Hori)
 #  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
-
-import paddle
-
 import numpy as np
+import paddle
 import six
 
 
@@ -49,9 +46,10 @@ class CTCPrefixScorePD():
                 x[i, l:, blank] = 0
         # Reshape input x
         xn = x.transpose([1, 0, 2])  # (B, T, O) -> (T, B, O)
-        xb = xn[:, :, self.blank].unsqueeze(2).expand(-1, -1, self.odim) # (T,B,O)
+        xb = xn[:, :, self.blank].unsqueeze(2).expand(-1, -1,
+                                                      self.odim)  # (T,B,O)
         self.x = paddle.stack([xn, xb])  # (2, T, B, O)
-        self.end_frames = paddle.to_tensor(xlens) - 1 # (B,)
+        self.end_frames = paddle.to_tensor(xlens) - 1  # (B,)
 
         # Setup CTC windowing
         self.margin = margin
@@ -59,7 +57,7 @@ class CTCPrefixScorePD():
             self.frame_ids = paddle.arange(self.input_length, dtype=self.dtype)
         # Base indices for index conversion
         # B idx, hyp idx. shape (B*W, 1)
-        self.idx_bh = None 
+        self.idx_bh = None
         # B idx. shape (B,)
         self.idx_b = paddle.arange(self.batch)
         # B idx, O idx. shape (B, 1)
@@ -78,56 +76,59 @@ class CTCPrefixScorePD():
         last_ids = [yi[-1] for yi in y]  # last output label ids
         n_bh = len(last_ids)  # batch * hyps
         n_hyps = n_bh // self.batch  # assuming each utterance has the same # of hyps
-        self.scoring_num = scoring_ids.size(-1) if scoring_ids is not None else 0
+        self.scoring_num = scoring_ids.size(
+            -1) if scoring_ids is not None else 0
         # prepare state info
         if state is None:
             r_prev = paddle.full(
                 (self.input_length, 2, self.batch, n_hyps),
                 self.logzero,
-                dtype=self.dtype,
-            ) # (T, 2, B, W)
-            r_prev[:, 1] = paddle.cumsum(self.x[0, :, :, self.blank], 0).unsqueeze(2)
-            r_prev = r_prev.view(-1, 2, n_bh) # (T, 2, BW)
+                dtype=self.dtype, )  # (T, 2, B, W)
+            r_prev[:, 1] = paddle.cumsum(self.x[0, :, :, self.blank],
+                                         0).unsqueeze(2)
+            r_prev = r_prev.view(-1, 2, n_bh)  # (T, 2, BW)
             s_prev = 0.0  # score
-            f_min_prev = 0 # eq. 22-23
-            f_max_prev = 1 # eq. 22-23
+            f_min_prev = 0  # eq. 22-23
+            f_max_prev = 1  # eq. 22-23
         else:
             r_prev, s_prev, f_min_prev, f_max_prev = state
 
         # select input dimensions for scoring
         if self.scoring_num > 0:
             # (BW, O)
-            scoring_idmap = paddle.full((n_bh, self.odim), -1, dtype=paddle.long)
+            scoring_idmap = paddle.full(
+                (n_bh, self.odim), -1, dtype=paddle.long)
             snum = self.scoring_num
             if self.idx_bh is None or n_bh > len(self.idx_bh):
-                self.idx_bh = paddle.arange(n_bh).view(-1, 1) # (BW, 1)
+                self.idx_bh = paddle.arange(n_bh).view(-1, 1)  # (BW, 1)
             scoring_idmap[self.idx_bh[:n_bh], scoring_ids] = paddle.arange(snum)
             scoring_idx = (
-                scoring_ids + self.idx_bo.repeat(1, n_hyps).view(-1, 1) # (BW,1)
-            ).view(-1) # (BWO)
+                scoring_ids + self.idx_bo.repeat(1, n_hyps).view(-1,
+                                                                 1)  # (BW,1)
+            ).view(-1)  # (BWO)
             # x_ shape (2, T, B*W, O)
             x_ = paddle.index_select(
-                self.x.view(2, -1, self.batch * self.odim), scoring_idx, 2
-            ).view(2, -1, n_bh, snum)
+                self.x.view(2, -1, self.batch * self.odim), scoring_idx,
+                2).view(2, -1, n_bh, snum)
         else:
             scoring_ids = None
             scoring_idmap = None
             snum = self.odim
             # x_ shape (2, T, B*W, O)
-            x_ = self.x.unsqueeze(3).repeat(1, 1, 1, n_hyps, 1).view(2, -1, n_bh, snum)
+            x_ = self.x.unsqueeze(3).repeat(1, 1, 1, n_hyps, 1).view(2, -1,
+                                                                     n_bh, snum)
 
         # new CTC forward probs are prepared as a (T x 2 x BW x S) tensor
         # that corresponds to r_t^n(h) and r_t^b(h) in a batch.
         r = paddle.full(
             (self.input_length, 2, n_bh, snum),
             self.logzero,
-            dtype=self.dtype,
-        )
+            dtype=self.dtype, )
         if output_length == 0:
             r[0, 0] = x_[0, 0]
 
-        r_sum = paddle.logsumexp(r_prev, 1) #(T,BW)
-        log_phi = r_sum.unsqueeze(2).repeat(1, 1, snum) # (T, BW, O)
+        r_sum = paddle.logsumexp(r_prev, 1)  #(T,BW)
+        log_phi = r_sum.unsqueeze(2).repeat(1, 1, snum)  # (T, BW, O)
         if scoring_ids is not None:
             for idx in range(n_bh):
                 pos = scoring_idmap[idx, last_ids[idx]]
@@ -152,27 +153,30 @@ class CTCPrefixScorePD():
 
         # compute forward probabilities log(r_t^n(h)) and log(r_t^b(h))
         for t in range(start, end):
-            rp = r[t - 1] # (2 x BW x O') 
+            rp = r[t - 1]  # (2 x BW x O') 
             rr = paddle.stack([rp[0], log_phi[t - 1], rp[0], rp[1]]).view(
-                2, 2, n_bh, snum
-            ) # (2,2,BW,O')
+                2, 2, n_bh, snum)  # (2,2,BW,O')
             r[t] = paddle.logsumexp(rr, 1) + x_[:, t]
 
         # compute log prefix probabilities log(psi)
-        log_phi_x = paddle.concat((log_phi[0].unsqueeze(0), log_phi[:-1]), axis=0) + x_[0]
+        log_phi_x = paddle.concat(
+            (log_phi[0].unsqueeze(0), log_phi[:-1]), axis=0) + x_[0]
         if scoring_ids is not None:
-            log_psi = paddle.full((n_bh, self.odim), self.logzero, dtype=self.dtype)
+            log_psi = paddle.full(
+                (n_bh, self.odim), self.logzero, dtype=self.dtype)
             log_psi_ = paddle.logsumexp(
-                paddle.concat((log_phi_x[start:end], r[start - 1, 0].unsqueeze(0)), axis=0),
-                axis=0,
-            )
+                paddle.concat(
+                    (log_phi_x[start:end], r[start - 1, 0].unsqueeze(0)),
+                    axis=0),
+                axis=0, )
             for si in range(n_bh):
                 log_psi[si, scoring_ids[si]] = log_psi_[si]
         else:
             log_psi = paddle.logsumexp(
-                paddle.concat((log_phi_x[start:end], r[start - 1, 0].unsqueeze(0)), axis=0),
-                axis=0,
-            )
+                paddle.concat(
+                    (log_phi_x[start:end], r[start - 1, 0].unsqueeze(0)),
+                    axis=0),
+                axis=0, )
 
         for si in range(n_bh):
             log_psi[si, self.eos] = r_sum[self.end_frames[si // n_hyps], si]
@@ -193,16 +197,16 @@ class CTCPrefixScorePD():
         # convert ids to BHO space
         n_bh = len(s)
         n_hyps = n_bh // self.batch
-        vidx = (best_ids + (self.idx_b * (n_hyps * self.odim)).view(-1, 1)).view(-1)
+        vidx = (best_ids + (self.idx_b *
+                            (n_hyps * self.odim)).view(-1, 1)).view(-1)
         # select hypothesis scores
         s_new = paddle.index_select(s.view(-1), vidx, 0)
         s_new = s_new.view(-1, 1).repeat(1, self.odim).view(n_bh, self.odim)
         # convert ids to BHS space (S: scoring_num)
         if scoring_idmap is not None:
             snum = self.scoring_num
-            hyp_idx = (best_ids // self.odim + (self.idx_b * n_hyps).view(-1, 1)).view(
-                -1
-            )
+            hyp_idx = (best_ids // self.odim +
+                       (self.idx_b * n_hyps).view(-1, 1)).view(-1)
             label_ids = paddle.fmod(best_ids, self.odim).view(-1)
             score_idx = scoring_idmap[hyp_idx, label_ids]
             score_idx[score_idx == -1] = 0
@@ -211,8 +215,7 @@ class CTCPrefixScorePD():
             snum = self.odim
         # select forward probabilities
         r_new = paddle.index_select(r.view(-1, 2, n_bh * snum), vidx, 2).view(
-            -1, 2, n_bh
-        )
+            -1, 2, n_bh)
         return r_new, s_new, f_min, f_max
 
     def extend_prob(self, x):
@@ -233,7 +236,7 @@ class CTCPrefixScorePD():
             xn = x.transpose([1, 0, 2])  # (B, T, O) -> (T, B, O)
             xb = xn[:, :, self.blank].unsqueeze(2).expand(-1, -1, self.odim)
             self.x = paddle.stack([xn, xb])  # (2, T, B, O)
-            self.x[:, : tmp_x.shape[1], :, :] = tmp_x
+            self.x[:, :tmp_x.shape[1], :, :] = tmp_x
             self.input_length = x.size(1)
             self.end_frames = paddle.to_tensor(xlens) - 1
 
@@ -254,12 +257,12 @@ class CTCPrefixScorePD():
             r_prev_new = paddle.full(
                 (self.input_length, 2),
                 self.logzero,
-                dtype=self.dtype,
-            )
+                dtype=self.dtype, )
             start = max(r_prev.shape[0], 1)
             r_prev_new[0:start] = r_prev
             for t in range(start, self.input_length):
-                r_prev_new[t, 1] = r_prev_new[t - 1, 1] + self.x[0, t, :, self.blank]
+                r_prev_new[t, 1] = r_prev_new[t - 1, 1] + self.x[0, t, :,
+                                                                 self.blank]
 
             return (r_prev_new, s_prev, f_min_prev, f_max_prev)
 
@@ -279,7 +282,7 @@ class CTCPrefixScore():
         self.blank = blank
         self.eos = eos
         self.input_length = len(x)
-        self.x = x # (T, O)
+        self.x = x  # (T, O)
 
     def initial_state(self):
         """Obtain an initial CTC state
@@ -318,12 +321,12 @@ class CTCPrefixScore():
             r[output_length - 1] = self.logzero
 
         # prepare forward probabilities for the last label
-        r_sum = self.xp.logaddexp(
-            r_prev[:, 0], r_prev[:, 1]
-        )  # log(r_t^n(g) + r_t^b(g))
+        r_sum = self.xp.logaddexp(r_prev[:, 0],
+                                  r_prev[:, 1])  # log(r_t^n(g) + r_t^b(g))
         last = y[-1]
         if output_length > 0 and last in cs:
-            log_phi = self.xp.ndarray((self.input_length, len(cs)), dtype=np.float32)
+            log_phi = self.xp.ndarray(
+                (self.input_length, len(cs)), dtype=np.float32)
             for i in six.moves.range(len(cs)):
                 log_phi[:, i] = r_sum if cs[i] != last else r_prev[:, 1]
         else:
@@ -335,9 +338,8 @@ class CTCPrefixScore():
         log_psi = r[start - 1, 0]
         for t in six.moves.range(start, self.input_length):
             r[t, 0] = self.xp.logaddexp(r[t - 1, 0], log_phi[t - 1]) + xs[t]
-            r[t, 1] = (
-                self.xp.logaddexp(r[t - 1, 0], r[t - 1, 1]) + self.x[t, self.blank]
-            )
+            r[t, 1] = (self.xp.logaddexp(r[t - 1, 0], r[t - 1, 1]) +
+                       self.x[t, self.blank])
             log_psi = self.xp.logaddexp(log_psi, log_phi[t - 1] + xs[t])
 
         # get P(...eos|X) that ends with the prefix itself
diff --git a/deepspeech/decoders/scores/length_bonus.py b/deepspeech/decoders/scores/length_bonus.py
index 76f45f80..864e22d1 100644
--- a/deepspeech/decoders/scores/length_bonus.py
+++ b/deepspeech/decoders/scores/length_bonus.py
@@ -1,3 +1,16 @@
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 """Length bonus module."""
 from typing import Any
 from typing import List
@@ -34,11 +47,13 @@ class LengthBonus(BatchScorerInterface):
                 and None
 
         """
-        return paddle.to_tensor([1.0], place=x.place, dtype=x.dtype).expand(self.n), None
+        return paddle.to_tensor(
+            [1.0], place=x.place, dtype=x.dtype).expand(self.n), None
 
-    def batch_score(
-        self, ys: paddle.Tensor, states: List[Any], xs: paddle.Tensor
-    ) -> Tuple[paddle.Tensor, List[Any]]:
+    def batch_score(self,
+                    ys: paddle.Tensor,
+                    states: List[Any],
+                    xs: paddle.Tensor) -> Tuple[paddle.Tensor, List[Any]]:
         """Score new token batch.
 
         Args:
@@ -53,9 +68,5 @@ class LengthBonus(BatchScorerInterface):
                 and next state list for ys.
 
         """
-        return (
-            paddle.to_tensor([1.0], place=xs.place, dtype=xs.dtype).expand(
-                ys.shape[0], self.n
-            ),
-            None,
-        )
+        return (paddle.to_tensor([1.0], place=xs.place, dtype=xs.dtype).expand(
+            ys.shape[0], self.n), None, )
diff --git a/deepspeech/decoders/scores/ngram.py b/deepspeech/decoders/scores/ngram.py
index 9809427b..050a8c81 100644
--- a/deepspeech/decoders/scores/ngram.py
+++ b/deepspeech/decoders/scores/ngram.py
@@ -1,5 +1,17 @@
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 """Ngram lm implement."""
-
 from abc import ABC
 
 import kenlm
@@ -51,9 +63,8 @@ class Ngrambase(ABC):
         self.lm.BaseScore(state, ys, out_state)
         scores = paddle.empty_like(next_token, dtype=x.dtype)
         for i, j in enumerate(next_token):
-            scores[i] = self.lm.BaseScore(
-                out_state, self.chardict[j], self.tmpkenlmstate
-            )
+            scores[i] = self.lm.BaseScore(out_state, self.chardict[j],
+                                          self.tmpkenlmstate)
         return scores, out_state
 
 
@@ -74,7 +85,8 @@ class NgramFullScorer(Ngrambase, BatchScorerInterface):
                 and next state list for ys.
 
         """
-        return self.score_partial_(y, paddle.to_tensor(range(self.charlen)), state, x)
+        return self.score_partial_(
+            y, paddle.to_tensor(range(self.charlen)), state, x)
 
 
 class NgramPartScorer(Ngrambase, PartialScorerInterface):
diff --git a/deepspeech/decoders/scores/score_interface.py b/deepspeech/decoders/scores/score_interface.py
index c52f8d19..3a9c500b 100644
--- a/deepspeech/decoders/scores/score_interface.py
+++ b/deepspeech/decoders/scores/score_interface.py
@@ -1,11 +1,23 @@
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 """Scorer interface module."""
-
+import warnings
 from typing import Any
 from typing import List
 from typing import Tuple
 
 import paddle
-import warnings
 
 
 class ScorerInterface:
@@ -37,7 +49,7 @@ class ScorerInterface:
         """
         return None
 
-    def select_state(self, state: Any, i: int, new_id: int = None) -> Any:
+    def select_state(self, state: Any, i: int, new_id: int=None) -> Any:
         """Select state with relative ids in the main beam search.
 
         Args:
@@ -51,9 +63,8 @@ class ScorerInterface:
         """
         return None if state is None else state[i]
 
-    def score(
-        self, y: paddle.Tensor, state: Any, x: paddle.Tensor
-    ) -> Tuple[paddle.Tensor, Any]:
+    def score(self, y: paddle.Tensor, state: Any,
+              x: paddle.Tensor) -> Tuple[paddle.Tensor, Any]:
         """Score new token (required).
 
         Args:
@@ -96,9 +107,10 @@ class BatchScorerInterface(ScorerInterface):
         """
         return self.init_state(x)
 
-    def batch_score(
-        self, ys: paddle.Tensor, states: List[Any], xs: paddle.Tensor
-    ) -> Tuple[paddle.Tensor, List[Any]]:
+    def batch_score(self,
+                    ys: paddle.Tensor,
+                    states: List[Any],
+                    xs: paddle.Tensor) -> Tuple[paddle.Tensor, List[Any]]:
         """Score new token batch (required).
 
         Args:
@@ -114,10 +126,8 @@ class BatchScorerInterface(ScorerInterface):
 
         """
         warnings.warn(
-            "{} batch score is implemented through for loop not parallelized".format(
-                self.__class__.__name__
-            )
-        )
+            "{} batch score is implemented through for loop not parallelized".
+            format(self.__class__.__name__))
         scores = list()
         outstates = list()
         for i, (y, state, x) in enumerate(zip(ys, states, xs)):
@@ -141,9 +151,11 @@ class PartialScorerInterface(ScorerInterface):
 
     """
 
-    def score_partial(
-        self, y: paddle.Tensor, next_tokens: paddle.Tensor, state: Any, x: paddle.Tensor
-    ) -> Tuple[paddle.Tensor, Any]:
+    def score_partial(self,
+                      y: paddle.Tensor,
+                      next_tokens: paddle.Tensor,
+                      state: Any,
+                      x: paddle.Tensor) -> Tuple[paddle.Tensor, Any]:
         """Score new token (required).
 
         Args:
@@ -165,12 +177,11 @@ class BatchPartialScorerInterface(BatchScorerInterface, PartialScorerInterface):
     """Batch partial scorer interface for beam search."""
 
     def batch_score_partial(
-        self,
-        ys: paddle.Tensor,
-        next_tokens: paddle.Tensor,
-        states: List[Any],
-        xs: paddle.Tensor,
-    ) -> Tuple[paddle.Tensor, Any]:
+            self,
+            ys: paddle.Tensor,
+            next_tokens: paddle.Tensor,
+            states: List[Any],
+            xs: paddle.Tensor, ) -> Tuple[paddle.Tensor, Any]:
         """Score new token (required).
 
         Args:
diff --git a/deepspeech/decoders/utils.py b/deepspeech/decoders/utils.py
index eec13138..92f65814 100644
--- a/deepspeech/decoders/utils.py
+++ b/deepspeech/decoders/utils.py
@@ -1,6 +1,20 @@
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 
 __all__ = ["end_detect"]
 
+
 def end_detect(ended_hyps, i, M=3, D_end=np.log(1 * np.exp(-10))):
     """End detection.
 
@@ -20,11 +34,12 @@ def end_detect(ended_hyps, i, M=3, D_end=np.log(1 * np.exp(-10))):
     for m in range(M):
         # get ended_hyps with their length is i - m
         hyp_length = i - m
-        hyps_same_length = [x for x in ended_hyps if len(x["yseq"]) == hyp_length]
+        hyps_same_length = [
+            x for x in ended_hyps if len(x["yseq"]) == hyp_length
+        ]
         if len(hyps_same_length) > 0:
             best_hyp_same_length = sorted(
-                hyps_same_length, key=lambda x: x["score"], reverse=True
-            )[0]
+                hyps_same_length, key=lambda x: x["score"], reverse=True)[0]
             if best_hyp_same_length["score"] - best_hyp["score"] < D_end:
                 count += 1
 
diff --git a/deepspeech/modules/ctc.py b/deepspeech/modules/ctc.py
index 7c2fd4ad..1f988675 100644
--- a/deepspeech/modules/ctc.py
+++ b/deepspeech/modules/ctc.py
@@ -125,7 +125,7 @@ class CTCDecoderBase(nn.Layer):
 
 
 class CTCDecoder(CTCDecoderBase):
-    def __init__(self,*args, **kwargs):
+    def __init__(self, *args, **kwargs):
         super().__init__(*args, **kwargs)
         # CTCDecoder LM Score handle
         self._ext_scorer = None