From 6569ce123db8d79cad3163732e0b2d582d1b9359 Mon Sep 17 00:00:00 2001
From: Hui Zhang <zhtclz@foxmail.com>
Date: Mon, 25 Oct 2021 08:50:43 +0000
Subject: [PATCH] lm embed and format code

---
 deepspeech/frontend/augmentor/augmentation.py |  2 +-
 deepspeech/io/reader.py                       |  1 +
 deepspeech/models/lm/transformer.py           |  2 +-
 deepspeech/modules/embedding.py               | 12 +--
 deepspeech/modules/encoder.py                 | 10 +--
 deepspeech/transform/__init__.py              | 13 +++
 deepspeech/transform/cmvn.py                  | 61 +++++++------
 deepspeech/utils/cli_readers.py               | 66 +++++++-------
 deepspeech/utils/cli_utils.py                 | 39 +++++----
 deepspeech/utils/cli_writers.py               | 71 ++++++++-------
 requirements.txt                              |  1 +
 utils/apply-cmvn.py                           | 77 ++++++++---------
 utils/caculate_rtf.py                         | 41 ++++-----
 utils/compute-cmvn-stats.py                   | 52 +++++------
 utils/copy-feats.py                           | 51 ++++++-----
 utils/{data2json.py => data2json.sh}          |  0
 utils/merge_scp2json.py                       | 86 ++++++++-----------
 utils/text2token.py                           | 38 ++++----
 18 files changed, 312 insertions(+), 311 deletions(-)
 rename utils/{data2json.py => data2json.sh} (100%)

diff --git a/deepspeech/frontend/augmentor/augmentation.py b/deepspeech/frontend/augmentor/augmentation.py
index 17eba5b5..d2316ab1 100644
--- a/deepspeech/frontend/augmentor/augmentation.py
+++ b/deepspeech/frontend/augmentor/augmentation.py
@@ -227,4 +227,4 @@ class AugmentationPipeline():
             obj = class_obj(self._rng, **params)
         except Exception:
             raise ValueError("Unknown augmentor type [%s]." % augmentor_type)
-        return obj
\ No newline at end of file
+        return obj
diff --git a/deepspeech/io/reader.py b/deepspeech/io/reader.py
index f6512053..59098752 100644
--- a/deepspeech/io/reader.py
+++ b/deepspeech/io/reader.py
@@ -24,6 +24,7 @@ __all__ = ["LoadInputsAndTargets"]
 
 logger = Log(__name__).getlog()
 
+
 class LoadInputsAndTargets():
     """Create a mini-batch from a list of dicts
 
diff --git a/deepspeech/models/lm/transformer.py b/deepspeech/models/lm/transformer.py
index 72082e52..b5f7580a 100644
--- a/deepspeech/models/lm/transformer.py
+++ b/deepspeech/models/lm/transformer.py
@@ -24,11 +24,11 @@ from deepspeech.decoders.scorers.scorer_interface import BatchScorerInterface
 from deepspeech.models.lm_interface import LMInterface
 from deepspeech.modules.encoder import TransformerEncoder
 from deepspeech.modules.mask import subsequent_mask
-
 from deepspeech.utils.log import Log
 
 logger = Log(__name__).getlog()
 
+
 class TransformerLM(nn.Layer, LMInterface, BatchScorerInterface):
     def __init__(
             self,
diff --git a/deepspeech/modules/embedding.py b/deepspeech/modules/embedding.py
index e154e434..64d594c2 100644
--- a/deepspeech/modules/embedding.py
+++ b/deepspeech/modules/embedding.py
@@ -23,12 +23,14 @@ from deepspeech.utils.log import Log
 logger = Log(__name__).getlog()
 
 __all__ = [
-    "PositionalEncodingInterface", "NoPositionalEncoding", "PositionalEncoding", "RelPositionalEncoding"
+    "PositionalEncodingInterface", "NoPositionalEncoding", "PositionalEncoding",
+    "RelPositionalEncoding"
 ]
 
-class PositionalEncodingInterface:
 
-    def forward(self, x:paddle.Tensor, offset: int=0) -> Tuple[paddle.Tensor, paddle.Tensor]:
+class PositionalEncodingInterface:
+    def forward(self, x: paddle.Tensor,
+                offset: int=0) -> Tuple[paddle.Tensor, paddle.Tensor]:
         """Compute positional encoding.
         Args:
             x (paddle.Tensor): Input tensor (batch, time, `*`).
@@ -37,8 +39,8 @@ class PositionalEncodingInterface:
             paddle.Tensor: Positional embedding tensor (1, time, `*`).
         """
         raise NotImplementedError("forward method is not implemented")
-    
-    def position_encoding(self, offset:int, size:int) -> paddle.Tensor:
+
+    def position_encoding(self, offset: int, size: int) -> paddle.Tensor:
         """ For getting encoding in a streaming fashion
         Args:
             offset (int): start offset
diff --git a/deepspeech/modules/encoder.py b/deepspeech/modules/encoder.py
index 79411771..435b6894 100644
--- a/deepspeech/modules/encoder.py
+++ b/deepspeech/modules/encoder.py
@@ -32,7 +32,6 @@ from deepspeech.modules.encoder_layer import TransformerEncoderLayer
 from deepspeech.modules.mask import add_optional_chunk_mask
 from deepspeech.modules.mask import make_non_pad_mask
 from deepspeech.modules.positionwise_feed_forward import PositionwiseFeedForward
-from deepspeech.modules.subsampling import Conv2dSubsampling
 from deepspeech.modules.subsampling import Conv2dSubsampling4
 from deepspeech.modules.subsampling import Conv2dSubsampling6
 from deepspeech.modules.subsampling import Conv2dSubsampling8
@@ -394,13 +393,8 @@ class TransformerEncoder(BaseEncoder):
         if self.global_cmvn is not None:
             xs = self.global_cmvn(xs)
 
-        if isinstance(self.embed, Conv2dSubsampling):
-            #TODO(Hui Zhang): self.embed(xs, masks, offset=0), stride_slice not support bool tensor
-            xs, pos_emb, masks = self.embed(
-                xs, masks.astype(xs.dtype), offset=0)
-        else:
-            xs, pos_emb, masks = self.embed(
-                xs, masks.astype(xs.dtype), offset=0)
+        #TODO(Hui Zhang): self.embed(xs, masks, offset=0), stride_slice not support bool tensor
+        xs, pos_emb, masks = self.embed(xs, masks.astype(xs.dtype), offset=0)
         #TODO(Hui Zhang): remove mask.astype, stride_slice not support bool tensor
         masks = masks.astype(paddle.bool)
 
diff --git a/deepspeech/transform/__init__.py b/deepspeech/transform/__init__.py
index e69de29b..185a92b8 100644
--- a/deepspeech/transform/__init__.py
+++ b/deepspeech/transform/__init__.py
@@ -0,0 +1,13 @@
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
diff --git a/deepspeech/transform/cmvn.py b/deepspeech/transform/cmvn.py
index 09488229..5d318590 100644
--- a/deepspeech/transform/cmvn.py
+++ b/deepspeech/transform/cmvn.py
@@ -1,3 +1,16 @@
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 import io
 
 import h5py
@@ -9,16 +22,15 @@ class CMVN():
     "Apply Global/Spk CMVN/iverserCMVN."
 
     def __init__(
-        self,
-        stats,
-        norm_means=True,
-        norm_vars=False,
-        filetype="mat",
-        utt2spk=None,
-        spk2utt=None,
-        reverse=False,
-        std_floor=1.0e-20,
-    ):
+            self,
+            stats,
+            norm_means=True,
+            norm_vars=False,
+            filetype="mat",
+            utt2spk=None,
+            spk2utt=None,
+            reverse=False,
+            std_floor=1.0e-20, ):
         self.stats_file = stats
         self.norm_means = norm_means
         self.norm_vars = norm_vars
@@ -84,17 +96,14 @@ class CMVN():
             self.scale[spk] = 1 / std
 
     def __repr__(self):
-        return (
-            "{name}(stats_file={stats_file}, "
-            "norm_means={norm_means}, norm_vars={norm_vars}, "
-            "reverse={reverse})".format(
-                name=self.__class__.__name__,
-                stats_file=self.stats_file,
-                norm_means=self.norm_means,
-                norm_vars=self.norm_vars,
-                reverse=self.reverse,
-            )
-        )
+        return ("{name}(stats_file={stats_file}, "
+                "norm_means={norm_means}, norm_vars={norm_vars}, "
+                "reverse={reverse})".format(
+                    name=self.__class__.__name__,
+                    stats_file=self.stats_file,
+                    norm_means=self.norm_means,
+                    norm_vars=self.norm_vars,
+                    reverse=self.reverse, ))
 
     def __call__(self, x, uttid=None):
         if self.utt2spk is not None:
@@ -121,6 +130,7 @@ class CMVN():
 
 class UtteranceCMVN():
     "Apply Utterance CMVN"
+
     def __init__(self, norm_means=True, norm_vars=False, std_floor=1.0e-20):
         self.norm_means = norm_means
         self.norm_vars = norm_vars
@@ -130,20 +140,19 @@ class UtteranceCMVN():
         return "{name}(norm_means={norm_means}, norm_vars={norm_vars})".format(
             name=self.__class__.__name__,
             norm_means=self.norm_means,
-            norm_vars=self.norm_vars,
-        )
+            norm_vars=self.norm_vars, )
 
     def __call__(self, x, uttid=None):
         # x: [Time, Dim]
-        square_sums = (x ** 2).sum(axis=0)
+        square_sums = (x**2).sum(axis=0)
         mean = x.mean(axis=0)
 
         if self.norm_means:
             x = np.subtract(x, mean)
 
         if self.norm_vars:
-            var = square_sums / x.shape[0] - mean ** 2
+            var = square_sums / x.shape[0] - mean**2
             std = np.maximum(np.sqrt(var), self.std_floor)
             x = np.divide(x, std)
 
-        return x
\ No newline at end of file
+        return x
diff --git a/deepspeech/utils/cli_readers.py b/deepspeech/utils/cli_readers.py
index d744c0d3..72aa2bdb 100644
--- a/deepspeech/utils/cli_readers.py
+++ b/deepspeech/utils/cli_readers.py
@@ -1,3 +1,16 @@
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 import io
 import logging
 import sys
@@ -10,11 +23,10 @@ from deepspeech.io.reader import SoundHDF5File
 
 
 def file_reader_helper(
-    rspecifier: str,
-    filetype: str = "mat",
-    return_shape: bool = False,
-    segments: str = None,
-):
+        rspecifier: str,
+        filetype: str="mat",
+        return_shape: bool=False,
+        segments: str=None, ):
     """Read uttid and array in kaldi style
 
     This function might be a bit confusing as "ark" is used
@@ -44,7 +56,8 @@ def file_reader_helper(
 
     """
     if filetype == "mat":
-        return KaldiReader(rspecifier, return_shape=return_shape, segments=segments)
+        return KaldiReader(
+            rspecifier, return_shape=return_shape, segments=segments)
     elif filetype == "hdf5":
         return HDF5Reader(rspecifier, return_shape=return_shape)
     elif filetype == "sound.hdf5":
@@ -62,7 +75,8 @@ class KaldiReader:
         self.segments = segments
 
     def __iter__(self):
-        with kaldiio.ReadHelper(self.rspecifier, segments=self.segments) as reader:
+        with kaldiio.ReadHelper(
+                self.rspecifier, segments=self.segments) as reader:
             for key, array in reader:
                 if self.return_shape:
                     array = array.shape
@@ -72,9 +86,8 @@ class KaldiReader:
 class HDF5Reader:
     def __init__(self, rspecifier, return_shape=False):
         if ":" not in rspecifier:
-            raise ValueError(
-                'Give "rspecifier" such as "ark:some.ark: {}"'.format(self.rspecifier)
-            )
+            raise ValueError('Give "rspecifier" such as "ark:some.ark: {}"'.
+                             format(self.rspecifier))
         self.rspecifier = rspecifier
         self.ark_or_scp, self.filepath = self.rspecifier.split(":", 1)
         if self.ark_or_scp not in ["ark", "scp"]:
@@ -93,9 +106,7 @@ class HDF5Reader:
                         raise RuntimeError(
                             "scp file for hdf5 should be like: "
                             '"uttid filepath.h5:key": {}({})'.format(
-                                line, self.filepath
-                            )
-                        )
+                                line, self.filepath))
                     path, h5_key = value.split(":", 1)
 
                     hdf5_file = hdf5_dict.get(path)
@@ -110,9 +121,8 @@ class HDF5Reader:
                     try:
                         data = hdf5_file[h5_key]
                     except Exception:
-                        logging.error(
-                            "Error when loading {} with key={}".format(path, h5_key)
-                        )
+                        logging.error("Error when loading {} with key={}".
+                                      format(path, h5_key))
                         raise
 
                     if self.return_shape:
@@ -144,9 +154,8 @@ class HDF5Reader:
 class SoundHDF5Reader:
     def __init__(self, rspecifier, return_shape=False):
         if ":" not in rspecifier:
-            raise ValueError(
-                'Give "rspecifier" such as "ark:some.ark: {}"'.format(rspecifier)
-            )
+            raise ValueError('Give "rspecifier" such as "ark:some.ark: {}"'.
+                             format(rspecifier))
         self.ark_or_scp, self.filepath = rspecifier.split(":", 1)
         if self.ark_or_scp not in ["ark", "scp"]:
             raise ValueError(f"Must be scp or ark: {self.ark_or_scp}")
@@ -163,9 +172,7 @@ class SoundHDF5Reader:
                         raise RuntimeError(
                             "scp file for hdf5 should be like: "
                             '"uttid filepath.h5:key": {}({})'.format(
-                                line, self.filepath
-                            )
-                        )
+                                line, self.filepath))
                     path, h5_key = value.split(":", 1)
 
                     hdf5_file = hdf5_dict.get(path)
@@ -180,9 +187,8 @@ class SoundHDF5Reader:
                     try:
                         data = hdf5_file[h5_key]
                     except Exception:
-                        logging.error(
-                            "Error when loading {} with key={}".format(path, h5_key)
-                        )
+                        logging.error("Error when loading {} with key={}".
+                                      format(path, h5_key))
                         raise
 
                     # Change Tuple[ndarray, int] -> Tuple[int, ndarray]
@@ -214,14 +220,12 @@ class SoundHDF5Reader:
 class SoundReader:
     def __init__(self, rspecifier, return_shape=False):
         if ":" not in rspecifier:
-            raise ValueError(
-                'Give "rspecifier" such as "scp:some.scp: {}"'.format(rspecifier)
-            )
+            raise ValueError('Give "rspecifier" such as "scp:some.scp: {}"'.
+                             format(rspecifier))
         self.ark_or_scp, self.filepath = rspecifier.split(":", 1)
         if self.ark_or_scp != "scp":
-            raise ValueError(
-                'Only supporting "scp" for sound file: {}'.format(self.ark_or_scp)
-            )
+            raise ValueError('Only supporting "scp" for sound file: {}'.format(
+                self.ark_or_scp))
         self.return_shape = return_shape
 
     def __iter__(self):
diff --git a/deepspeech/utils/cli_utils.py b/deepspeech/utils/cli_utils.py
index c4a4cd15..f8e1d60b 100644
--- a/deepspeech/utils/cli_utils.py
+++ b/deepspeech/utils/cli_utils.py
@@ -1,6 +1,19 @@
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import sys
 from collections.abc import Sequence
 from distutils.util import strtobool as dist_strtobool
-import sys
 
 import numpy
 
@@ -36,10 +49,9 @@ def get_commandline_args():
 
     # Escape the extra characters for shell
     argv = [
-        arg.replace("'", "'\\''")
-        if all(char not in arg for char in extra_chars)
-        else "'" + arg.replace("'", "'\\''") + "'"
-        for arg in sys.argv
+        arg.replace("'", "'\\''") if all(char not in arg
+                                         for char in extra_chars) else
+        "'" + arg.replace("'", "'\\''") + "'" for arg in sys.argv
     ]
 
     return sys.executable + " " + " ".join(argv)
@@ -47,19 +59,12 @@ def get_commandline_args():
 
 def is_scipy_wav_style(value):
     # If Tuple[int, numpy.ndarray] or not
-    return (
-        isinstance(value, Sequence)
-        and len(value) == 2
-        and isinstance(value[0], int)
-        and isinstance(value[1], numpy.ndarray)
-    )
+    return (isinstance(value, Sequence) and len(value) == 2 and
+            isinstance(value[0], int) and isinstance(value[1], numpy.ndarray))
 
 
 def assert_scipy_wav_style(value):
     assert is_scipy_wav_style(
-        value
-    ), "Must be Tuple[int, numpy.ndarray], but got {}".format(
-        type(value)
-        if not isinstance(value, Sequence)
-        else "{}[{}]".format(type(value), ", ".join(str(type(v)) for v in value))
-    )
+        value), "Must be Tuple[int, numpy.ndarray], but got {}".format(
+            type(value) if not isinstance(value, Sequence) else "{}[{}]".format(
+                type(value), ", ".join(str(type(v)) for v in value)))
diff --git a/deepspeech/utils/cli_writers.py b/deepspeech/utils/cli_writers.py
index 41e667d3..e0737193 100644
--- a/deepspeech/utils/cli_writers.py
+++ b/deepspeech/utils/cli_writers.py
@@ -1,3 +1,16 @@
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 from pathlib import Path
 from typing import Dict
 
@@ -6,18 +19,17 @@ import kaldiio
 import numpy
 import soundfile
 
-from deepspeech.utils.cli_utils import assert_scipy_wav_style
 from deepspeech.io.reader import SoundHDF5File
+from deepspeech.utils.cli_utils import assert_scipy_wav_style
 
 
 def file_writer_helper(
-    wspecifier: str,
-    filetype: str = "mat",
-    write_num_frames: str = None,
-    compress: bool = False,
-    compression_method: int = 2,
-    pcm_format: str = "wav",
-):
+        wspecifier: str,
+        filetype: str="mat",
+        write_num_frames: str=None,
+        compress: bool=False,
+        compression_method: int=2,
+        pcm_format: str="wav", ):
     """Write matrices in kaldi style
 
     Args:
@@ -61,20 +73,20 @@ def file_writer_helper(
             wspecifier,
             write_num_frames=write_num_frames,
             compress=compress,
-            compression_method=compression_method,
-        )
+            compression_method=compression_method, )
     elif filetype == "hdf5":
         return HDF5Writer(
-            wspecifier, write_num_frames=write_num_frames, compress=compress
-        )
+            wspecifier, write_num_frames=write_num_frames, compress=compress)
     elif filetype == "sound.hdf5":
         return SoundHDF5Writer(
-            wspecifier, write_num_frames=write_num_frames, pcm_format=pcm_format
-        )
+            wspecifier,
+            write_num_frames=write_num_frames,
+            pcm_format=pcm_format)
     elif filetype == "sound":
         return SoundWriter(
-            wspecifier, write_num_frames=write_num_frames, pcm_format=pcm_format
-        )
+            wspecifier,
+            write_num_frames=write_num_frames,
+            pcm_format=pcm_format)
     else:
         raise NotImplementedError(f"filetype={filetype}")
 
@@ -116,29 +128,27 @@ def get_num_frames_writer(write_num_frames: str):
     """
     if write_num_frames is not None:
         if ":" not in write_num_frames:
-            raise ValueError(
-                'Must include ":", write_num_frames={}'.format(write_num_frames)
-            )
+            raise ValueError('Must include ":", write_num_frames={}'.format(
+                write_num_frames))
 
         nframes_type, nframes_file = write_num_frames.split(":", 1)
         if nframes_type != "ark,t":
-            raise ValueError(
-                "Only supporting text mode. "
-                "e.g. --write-num-frames=ark,t:foo.txt :"
-                "{}".format(nframes_type)
-            )
+            raise ValueError("Only supporting text mode. "
+                             "e.g. --write-num-frames=ark,t:foo.txt :"
+                             "{}".format(nframes_type))
 
     return open(nframes_file, "w", encoding="utf-8")
 
 
 class KaldiWriter(BaseWriter):
-    def __init__(
-        self, wspecifier, write_num_frames=None, compress=False, compression_method=2
-    ):
+    def __init__(self,
+                 wspecifier,
+                 write_num_frames=None,
+                 compress=False,
+                 compression_method=2):
         if compress:
             self.writer = kaldiio.WriteHelper(
-                wspecifier, compression_method=compression_method
-            )
+                wspecifier, compression_method=compression_method)
         else:
             self.writer = kaldiio.WriteHelper(wspecifier)
         self.writer_scp = None
@@ -220,7 +230,8 @@ class SoundHDF5Writer(BaseWriter):
         self.pcm_format = pcm_format
         spec_dict = parse_wspecifier(wspecifier)
         self.filename = spec_dict["ark"]
-        self.writer = SoundHDF5File(spec_dict["ark"], "w", format=self.pcm_format)
+        self.writer = SoundHDF5File(
+            spec_dict["ark"], "w", format=self.pcm_format)
         if "scp" in spec_dict:
             self.writer_scp = open(spec_dict["scp"], "w", encoding="utf-8")
         else:
diff --git a/requirements.txt b/requirements.txt
index a7310a02..d654ef3d 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -23,6 +23,7 @@ praatio~=4.1
 pre-commit
 pybind11
 pypinyin
+python-dateutil
 pyworld
 resampy==0.2.2
 sacrebleu
diff --git a/utils/apply-cmvn.py b/utils/apply-cmvn.py
index 2b6631c2..f80053fb 100755
--- a/utils/apply-cmvn.py
+++ b/utils/apply-cmvn.py
@@ -1,7 +1,7 @@
 #!/usr/bin/env python3
 import argparse
-from distutils.util import strtobool
 import logging
+from distutils.util import strtobool
 
 import kaldiio
 import numpy
@@ -16,86 +16,81 @@ from deepspeech.utils.cli_writers import file_writer_helper
 def get_parser():
     parser = argparse.ArgumentParser(
         description="apply mean-variance normalization to files",
-        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
-    )
+        formatter_class=argparse.ArgumentDefaultsHelpFormatter, )
 
-    parser.add_argument("--verbose", "-V", default=0, type=int, help="Verbose option")
+    parser.add_argument(
+        "--verbose", "-V", default=0, type=int, help="Verbose option")
     parser.add_argument(
         "--in-filetype",
         type=str,
         default="mat",
         choices=["mat", "hdf5", "sound.hdf5", "sound"],
         help="Specify the file format for the rspecifier. "
-        '"mat" is the matrix format in kaldi',
-    )
+        '"mat" is the matrix format in kaldi', )
     parser.add_argument(
         "--stats-filetype",
         type=str,
         default="mat",
         choices=["mat", "hdf5", "npy"],
         help="Specify the file format for the rspecifier. "
-        '"mat" is the matrix format in kaldi',
-    )
+        '"mat" is the matrix format in kaldi', )
     parser.add_argument(
         "--out-filetype",
         type=str,
         default="mat",
         choices=["mat", "hdf5"],
         help="Specify the file format for the wspecifier. "
-        '"mat" is the matrix format in kaldi',
-    )
+        '"mat" is the matrix format in kaldi', )
 
     parser.add_argument(
         "--norm-means",
         type=strtobool,
         default=True,
-        help="Do variance normalization or not.",
-    )
+        help="Do variance normalization or not.", )
     parser.add_argument(
         "--norm-vars",
         type=strtobool,
         default=False,
-        help="Do variance normalization or not.",
-    )
+        help="Do variance normalization or not.", )
     parser.add_argument(
-        "--reverse", type=strtobool, default=False, help="Do reverse mode or not"
-    )
+        "--reverse",
+        type=strtobool,
+        default=False,
+        help="Do reverse mode or not")
     parser.add_argument(
         "--spk2utt",
         type=str,
         help="A text file of speaker to utterance-list map. "
         "(Don't give rspecifier format, such as "
-        '"ark:spk2utt")',
-    )
+        '"ark:spk2utt")', )
     parser.add_argument(
         "--utt2spk",
         type=str,
         help="A text file of utterance to speaker map. "
         "(Don't give rspecifier format, such as "
-        '"ark:utt2spk")',
-    )
+        '"ark:utt2spk")', )
     parser.add_argument(
-        "--write-num-frames", type=str, help="Specify wspecifer for utt2num_frames"
-    )
+        "--write-num-frames",
+        type=str,
+        help="Specify wspecifer for utt2num_frames")
     parser.add_argument(
-        "--compress", type=strtobool, default=False, help="Save in compressed format"
-    )
+        "--compress",
+        type=strtobool,
+        default=False,
+        help="Save in compressed format")
     parser.add_argument(
         "--compression-method",
         type=int,
         default=2,
-        help="Specify the method(if mat) or " "gzip-level(if hdf5)",
-    )
+        help="Specify the method(if mat) or "
+        "gzip-level(if hdf5)", )
     parser.add_argument(
         "stats_rspecifier_or_rxfilename",
-        help="Input stats. e.g. ark:stats.ark or stats.mat",
-    )
+        help="Input stats. e.g. ark:stats.ark or stats.mat", )
     parser.add_argument(
-        "rspecifier", type=str, help="Read specifier id. e.g. ark:some.ark"
-    )
+        "rspecifier", type=str, help="Read specifier id. e.g. ark:some.ark")
     parser.add_argument(
-        "wspecifier", type=str, help="Write specifier id. e.g. ark:some.ark"
-    )
+        "wspecifier", type=str, help="Write specifier id. e.g. ark:some.ark")
     return parser
 
 
@@ -118,8 +113,8 @@ def main():
             stats_filetype = args.stats_filetype
 
         stats_dict = dict(
-            file_reader_helper(args.stats_rspecifier_or_rxfilename, stats_filetype)
-        )
+            file_reader_helper(args.stats_rspecifier_or_rxfilename,
+                               stats_filetype))
     else:
         is_rspcifier = False
         if args.stats_filetype == "mat":
@@ -134,16 +129,14 @@ def main():
         norm_vars=args.norm_vars,
         utt2spk=args.utt2spk,
         spk2utt=args.spk2utt,
-        reverse=args.reverse,
-    )
+        reverse=args.reverse, )
 
     with file_writer_helper(
-        args.wspecifier,
-        filetype=args.out_filetype,
-        write_num_frames=args.write_num_frames,
-        compress=args.compress,
-        compression_method=args.compression_method,
-    ) as writer:
+            args.wspecifier,
+            filetype=args.out_filetype,
+            write_num_frames=args.write_num_frames,
+            compress=args.compress,
+            compression_method=args.compression_method, ) as writer:
         for utt, mat in file_reader_helper(args.rspecifier, args.in_filetype):
             if is_scipy_wav_style(mat):
                 # If data is sound file, then got as Tuple[int, ndarray]
diff --git a/utils/caculate_rtf.py b/utils/caculate_rtf.py
index 6be8dffd..fcc155ed 100755
--- a/utils/caculate_rtf.py
+++ b/utils/caculate_rtf.py
@@ -1,24 +1,23 @@
 #!/usr/bin/env python3
 # encoding: utf-8
-
 # Copyright 2021 Kyoto University (Hirofumi Inaguma)
 #  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
-
 import argparse
 import codecs
-from dateutil import parser
 import glob
 import os
 
+from dateutil import parser
+
 
 def get_parser():
-    parser = argparse.ArgumentParser(description="calculate real time factor (RTF)")
+    parser = argparse.ArgumentParser(
+        description="calculate real time factor (RTF)")
     parser.add_argument(
         "--log-dir",
         type=str,
         default=None,
-        help="path to logging directory",
-    )
+        help="path to logging directory", )
     return parser
 
 
@@ -37,23 +36,21 @@ def main():
         with codecs.open(x, "r", "utf-8") as f:
             for line in f:
                 x = line.strip()
-                if "INFO: input lengths" in x:
-                    audio_durations += [int(x.split("input lengths: ")[1])]
-                    start_times += [parser.parse(x.split("(")[0])]
-                elif "INFO: prediction" in x:
-                    end_times += [parser.parse(x.split("(")[0])]
-        assert len(audio_durations) == len(end_times), (
-            len(audio_durations),
-            len(end_times),
-        )
-        assert len(start_times) == len(end_times), (len(start_times), len(end_times))
+                # 2021-10-25 08:22:04.052 | INFO | xxx:recog_v2:188 - feat: (1570, 83)
+                if "feat:" in x:
+                    dur = int(x.split("(")[1].split(',')[0])
+                    audio_durations += [dur]
+                    start_times += [parser.parse(x.split("|")[0])]
+                elif "total log probability:" in x:
+                    end_times += [parser.parse(x.split("|")[0])]
+        assert len(audio_durations) == len(end_times), (len(audio_durations),
+                                                        len(end_times), )
+        assert len(start_times) == len(end_times), (len(start_times),
+                                                    len(end_times))
+
         audio_sec += sum(audio_durations) / 100  # [sec]
-        decode_sec += sum(
-            [
-                (end - start).total_seconds()
-                for start, end in zip(start_times, end_times)
-            ]
-        )
+        decode_sec += sum([(end - start).total_seconds()
+                           for start, end in zip(start_times, end_times)])
         n_utt += len(audio_durations)
 
     print("Total audio duration: %.3f [sec]" % audio_sec)
diff --git a/utils/compute-cmvn-stats.py b/utils/compute-cmvn-stats.py
index d239d21d..706d8cd5 100755
--- a/utils/compute-cmvn-stats.py
+++ b/utils/compute-cmvn-stats.py
@@ -19,44 +19,42 @@ def get_parser():
         "If wspecifier provided: per-utterance by default, "
         "or per-speaker if"
         "spk2utt option provided; if wxfilename: global",
-        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
-    )
+        formatter_class=argparse.ArgumentDefaultsHelpFormatter, )
     parser.add_argument(
         "--spk2utt",
         type=str,
         help="A text file of speaker to utterance-list map. "
         "(Don't give rspecifier format, such as "
-        '"ark:utt2spk")',
-    )
-    parser.add_argument("--verbose", "-V", default=0, type=int, help="Verbose option")
+        '"ark:utt2spk")', )
+    parser.add_argument(
+        "--verbose", "-V", default=0, type=int, help="Verbose option")
     parser.add_argument(
         "--in-filetype",
         type=str,
         default="mat",
         choices=["mat", "hdf5", "sound.hdf5", "sound"],
         help="Specify the file format for the rspecifier. "
-        '"mat" is the matrix format in kaldi',
-    )
+        '"mat" is the matrix format in kaldi', )
     parser.add_argument(
         "--out-filetype",
         type=str,
         default="mat",
         choices=["mat", "hdf5", "npy"],
         help="Specify the file format for the wspecifier. "
-        '"mat" is the matrix format in kaldi',
-    )
+        '"mat" is the matrix format in kaldi', )
     parser.add_argument(
         "--preprocess-conf",
         type=str,
         default=None,
-        help="The configuration file for the pre-processing",
-    )
+        help="The configuration file for the pre-processing", )
     parser.add_argument(
-        "rspecifier", type=str, help="Read specifier for feats. e.g. ark:some.ark"
-    )
+        "rspecifier",
+        type=str,
+        help="Read specifier for feats. e.g. ark:some.ark")
     parser.add_argument(
-        "wspecifier_or_wxfilename", type=str, help="Write specifier. e.g. ark:some.ark"
-    )
+        "wspecifier_or_wxfilename",
+        type=str,
+        help="Write specifier. e.g. ark:some.ark")
     return parser
 
 
@@ -92,10 +90,8 @@ def main():
                 return x
 
         if args.out_filetype == "npy":
-            logging.warning(
-                "--out-filetype npy is allowed only for "
-                "Global CMVN mode, changing to hdf5"
-            )
+            logging.warning("--out-filetype npy is allowed only for "
+                            "Global CMVN mode, changing to hdf5")
             args.out_filetype = "hdf5"
 
     else:
@@ -107,10 +103,8 @@ def main():
             return None
 
         if args.out_filetype == "hdf5":
-            logging.warning(
-                "--out-filetype hdf5 is not allowed for "
-                "Global CMVN mode, changing to npy"
-            )
+            logging.warning("--out-filetype hdf5 is not allowed for "
+                            "Global CMVN mode, changing to npy")
             args.out_filetype = "npy"
 
     if args.preprocess_conf is not None:
@@ -126,8 +120,7 @@ def main():
 
     idx = 0
     for idx, (utt, matrix) in enumerate(
-        file_reader_helper(args.rspecifier, args.in_filetype), 1
-    ):
+            file_reader_helper(args.rspecifier, args.in_filetype), 1):
         if is_scipy_wav_style(matrix):
             # If data is sound file, then got as Tuple[int, ndarray]
             rate, matrix = matrix
@@ -146,7 +139,7 @@ def main():
 
         counts[spk] += matrix.shape[0]
         sum_feats[spk] += matrix.sum(axis=0)
-        square_sum_feats[spk] += (matrix ** 2).sum(axis=0)
+        square_sum_feats[spk] += (matrix**2).sum(axis=0)
     logging.info("Processed {} utterances".format(idx))
     assert idx > 0, idx
 
@@ -171,8 +164,8 @@ def main():
     # Per utterance or speaker CMVN
     if is_wspecifier:
         with file_writer_helper(
-            args.wspecifier_or_wxfilename, filetype=args.out_filetype
-        ) as writer:
+                args.wspecifier_or_wxfilename,
+                filetype=args.out_filetype) as writer:
             for spk, mat in cmvn_stats.items():
                 writer[spk] = mat
 
@@ -186,8 +179,7 @@ def main():
             kaldiio.save_mat(args.wspecifier_or_wxfilename, matrix)
         else:
             raise RuntimeError(
-                "Not supporting: --out-filetype {}".format(args.out_filetype)
-            )
+                "Not supporting: --out-filetype {}".format(args.out_filetype))
 
 
 if __name__ == "__main__":
diff --git a/utils/copy-feats.py b/utils/copy-feats.py
index 13c6b543..7d1b8589 100755
--- a/utils/copy-feats.py
+++ b/utils/copy-feats.py
@@ -1,7 +1,7 @@
 #!/usr/bin/env python3
 import argparse
-from distutils.util import strtobool
 import logging
+from distutils.util import strtobool
 
 from deepspeech.transform.transformation import Transformation
 from deepspeech.utils.cli_readers import file_reader_helper
@@ -13,50 +13,50 @@ from deepspeech.utils.cli_writers import file_writer_helper
 def get_parser():
     parser = argparse.ArgumentParser(
         description="copy feature with preprocessing",
-        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
-    )
+        formatter_class=argparse.ArgumentDefaultsHelpFormatter, )
 
-    parser.add_argument("--verbose", "-V", default=0, type=int, help="Verbose option")
+    parser.add_argument(
+        "--verbose", "-V", default=0, type=int, help="Verbose option")
     parser.add_argument(
         "--in-filetype",
         type=str,
         default="mat",
         choices=["mat", "hdf5", "sound.hdf5", "sound"],
         help="Specify the file format for the rspecifier. "
-        '"mat" is the matrix format in kaldi',
-    )
+        '"mat" is the matrix format in kaldi', )
     parser.add_argument(
         "--out-filetype",
         type=str,
         default="mat",
         choices=["mat", "hdf5", "sound.hdf5", "sound"],
         help="Specify the file format for the wspecifier. "
-        '"mat" is the matrix format in kaldi',
-    )
+        '"mat" is the matrix format in kaldi', )
     parser.add_argument(
-        "--write-num-frames", type=str, help="Specify wspecifer for utt2num_frames"
-    )
+        "--write-num-frames",
+        type=str,
+        help="Specify wspecifer for utt2num_frames")
     parser.add_argument(
-        "--compress", type=strtobool, default=False, help="Save in compressed format"
-    )
+        "--compress",
+        type=strtobool,
+        default=False,
+        help="Save in compressed format")
     parser.add_argument(
         "--compression-method",
         type=int,
         default=2,
-        help="Specify the method(if mat) or " "gzip-level(if hdf5)",
-    )
+        help="Specify the method(if mat) or "
+        "gzip-level(if hdf5)", )
     parser.add_argument(
         "--preprocess-conf",
         type=str,
         default=None,
-        help="The configuration file for the pre-processing",
-    )
+        help="The configuration file for the pre-processing", )
     parser.add_argument(
-        "rspecifier", type=str, help="Read specifier for feats. e.g. ark:some.ark"
-    )
+        "rspecifier",
+        type=str,
+        help="Read specifier for feats. e.g. ark:some.ark")
     parser.add_argument(
-        "wspecifier", type=str, help="Write specifier. e.g. ark:some.ark"
-    )
+        "wspecifier", type=str, help="Write specifier. e.g. ark:some.ark")
     return parser
 
 
@@ -79,12 +79,11 @@ def main():
         preprocessing = None
 
     with file_writer_helper(
-        args.wspecifier,
-        filetype=args.out_filetype,
-        write_num_frames=args.write_num_frames,
-        compress=args.compress,
-        compression_method=args.compression_method,
-    ) as writer:
+            args.wspecifier,
+            filetype=args.out_filetype,
+            write_num_frames=args.write_num_frames,
+            compress=args.compress,
+            compression_method=args.compression_method, ) as writer:
         for utt, mat in file_reader_helper(args.rspecifier, args.in_filetype):
             if is_scipy_wav_style(mat):
                 # If data is sound file, then got as Tuple[int, ndarray]
diff --git a/utils/data2json.py b/utils/data2json.sh
similarity index 100%
rename from utils/data2json.py
rename to utils/data2json.sh
diff --git a/utils/merge_scp2json.py b/utils/merge_scp2json.py
index 02d912a5..b724a7dd 100755
--- a/utils/merge_scp2json.py
+++ b/utils/merge_scp2json.py
@@ -1,14 +1,12 @@
 #!/usr/bin/env python3
 # encoding: utf-8
-
-
 import argparse
 import codecs
-from distutils.util import strtobool
-from io import open
 import json
 import logging
 import sys
+from distutils.util import strtobool
+from io import open
 
 from deepspeech.utils.cli_utils import get_commandline_args
 
@@ -47,45 +45,41 @@ def get_parser():
         "--input-scps feat:data/feats2.scp shape:data/utt2feat2_shape:shape "
         "--output-scps text:data/text shape:data/utt2text_shape:shape "
         "--scps utt2spk:data/utt2spk".format(sys.argv[0]),
-        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
-    )
+        formatter_class=argparse.ArgumentDefaultsHelpFormatter, )
     parser.add_argument(
         "--input-scps",
         type=str,
         nargs="*",
         action="append",
         default=[],
-        help="Json files for the inputs",
-    )
+        help="Json files for the inputs", )
     parser.add_argument(
         "--output-scps",
         type=str,
         nargs="*",
         action="append",
         default=[],
-        help="Json files for the outputs",
-    )
+        help="Json files for the outputs", )
     parser.add_argument(
         "--scps",
         type=str,
         nargs="+",
         default=[],
-        help="The json files except for the input and outputs",
-    )
-    parser.add_argument("--verbose", "-V", default=1, type=int, help="Verbose option")
+        help="The json files except for the input and outputs", )
+    parser.add_argument(
+        "--verbose", "-V", default=1, type=int, help="Verbose option")
     parser.add_argument(
         "--allow-one-column",
         type=strtobool,
         default=False,
         help="Allow one column in input scp files. "
-        "In this case, the value will be empty string.",
-    )
+        "In this case, the value will be empty string.", )
     parser.add_argument(
         "--out",
         "-O",
         type=str,
-        help="The output filename. " "If omitted, then output to sys.stdout",
-    )
+        help="The output filename. "
+        "If omitted, then output to sys.stdout", )
     return parser
 
 
@@ -128,37 +122,33 @@ if __name__ == "__main__":
                         # e.g. type_func_str = "int" -> type_func = int
                         type_func = eval(type_func_str)
                     except Exception:
-                        raise RuntimeError("Unknown type: {}".format(type_func_str))
+                        raise RuntimeError(
+                            "Unknown type: {}".format(type_func_str))
 
                     if not callable(type_func):
-                        raise RuntimeError("Unknown type: {}".format(type_func_str))
+                        raise RuntimeError(
+                            "Unknown type: {}".format(type_func_str))
 
                 else:
                     raise RuntimeError(
                         "Format <key>:<filepath> "
                         "or <key>:<filepath>:<type>  "
                         "e.g. feat:data/feat.scp "
-                        "or shape:data/feat.scp:shape: {}".format(key_scp)
-                    )
+                        "or shape:data/feat.scp:shape: {}".format(key_scp))
 
                 for item in lis:
                     if key == item[0]:
-                        raise RuntimeError(
-                            'The key "{}" is duplicated: {} {}'.format(
-                                key, item[3], key_scp
-                            )
-                        )
+                        raise RuntimeError('The key "{}" is duplicated: {} {}'.
+                                           format(key, item[3], key_scp))
 
                 lis.append((key, scp, type_func, key_scp, type_func_str))
             lis_list.append(lis)
 
     # Open  scp files
-    input_fscps = [
-        [open(i[1], "r", encoding="utf-8") for i in il] for il in input_infos
-    ]
-    output_fscps = [
-        [open(i[1], "r", encoding="utf-8") for i in il] for il in output_infos
-    ]
+    input_fscps = [[open(i[1], "r", encoding="utf-8") for i in il]
+                   for il in input_infos]
+    output_fscps = [[open(i[1], "r", encoding="utf-8") for i in il]
+                    for il in output_infos]
     fscps = [[open(i[1], "r", encoding="utf-8") for i in il] for il in infos]
 
     # Note(kamo): What is done here?
@@ -200,12 +190,10 @@ if __name__ == "__main__":
                     if line == "" or first == "":
                         if line != first:
                             concat = sum(input_infos + output_infos + infos, [])
-                            raise RuntimeError(
-                                "The number of lines mismatch "
-                                'between: "{}" and "{}"'.format(
-                                    concat[0][1], concat[count][1]
-                                )
-                            )
+                            raise RuntimeError("The number of lines mismatch "
+                                               'between: "{}" and "{}"'.format(
+                                                   concat[0][1],
+                                                   concat[count][1]))
 
                     elif line.split()[0] != first.split()[0]:
                         concat = sum(input_infos + output_infos + infos, [])
@@ -216,9 +204,7 @@ if __name__ == "__main__":
                                 concat[0][1],
                                 concat[count][1],
                                 first.rstrip(),
-                                line.rstrip(),
-                            )
-                        )
+                                line.rstrip(), ))
                     count += 1
 
         # The end of file
@@ -237,7 +223,8 @@ if __name__ == "__main__":
         ]:
 
             lis = []
-            for idx, (line_list, info_list) in enumerate(zip(_lines, _infos), 1):
+            for idx, (line_list, info_list) in enumerate(
+                    zip(_lines, _infos), 1):
                 if inout == "input":
                     d = {"name": "input{}".format(idx)}
                 elif inout == "output":
@@ -254,9 +241,7 @@ if __name__ == "__main__":
                             raise RuntimeError(
                                 "Format error {}th line in {}: "
                                 ' Expecting "<key> <value>":\n>>> {}'.format(
-                                    nutt, info[1], line
-                                )
-                            )
+                                    nutt, info[1], line))
                         uttid = sps[0]
                         value = ""
                     else:
@@ -274,9 +259,7 @@ if __name__ == "__main__":
                             logging.error(
                                 '"{}" is an invalid function '
                                 "for the {} th line in {}: \n>>> {}".format(
-                                    info[4], nutt, info[1], line
-                                )
-                            )
+                                    info[4], nutt, info[1], line))
                             raise
 
                     d[key] = value
@@ -289,8 +272,11 @@ if __name__ == "__main__":
                 entry.update(lis[0])
 
         entry = json.dumps(
-            entry, indent=4, ensure_ascii=False, sort_keys=True, separators=(",", ": ")
-        )
+            entry,
+            indent=4,
+            ensure_ascii=False,
+            sort_keys=True,
+            separators=(",", ": "))
         # Add indent
         indent = "    " * 2
         entry = ("\n" + indent).join(entry.split("\n"))
diff --git a/utils/text2token.py b/utils/text2token.py
index 56c39138..4b25612e 100755
--- a/utils/text2token.py
+++ b/utils/text2token.py
@@ -1,9 +1,6 @@
 #!/usr/bin/env python3
-
 # Copyright 2017 Johns Hopkins University (Shinji Watanabe)
 #  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
-
-
 import argparse
 import codecs
 import re
@@ -27,28 +24,26 @@ def exist_or_not(i, match_pos):
 def get_parser():
     parser = argparse.ArgumentParser(
         description="convert raw text to tokenized text",
-        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
-    )
+        formatter_class=argparse.ArgumentDefaultsHelpFormatter, )
     parser.add_argument(
         "--nchar",
         "-n",
         default=1,
         type=int,
         help="number of characters to split, i.e., \
-                        aabb -> a a b b with -n 1 and aa bb with -n 2",
-    )
+                        aabb -> a a b b with -n 1 and aa bb with -n 2", )
     parser.add_argument(
-        "--skip-ncols", "-s", default=0, type=int, help="skip first n columns"
-    )
-    parser.add_argument("--space", default="<space>", type=str, help="space symbol")
+        "--skip-ncols", "-s", default=0, type=int, help="skip first n columns")
+    parser.add_argument(
+        "--space", default="<space>", type=str, help="space symbol")
     parser.add_argument(
         "--non-lang-syms",
         "-l",
         default=None,
         type=str,
-        help="list of non-linguistic symobles, e.g., <NOISE> etc.",
-    )
-    parser.add_argument("text", type=str, default=False, nargs="?", help="input text")
+        help="list of non-linguistic symobles, e.g., <NOISE> etc.", )
+    parser.add_argument(
+        "text", type=str, default=False, nargs="?", help="input text")
     parser.add_argument(
         "--trans_type",
         "-t",
@@ -60,8 +55,7 @@ def get_parser():
                         read from SI1279.WRD file -> "bricks are an alternative"
                         Else if trans_type is phn,
                         read from SI1279.PHN file -> "sil b r ih sil k s aa r er n aa l
-                        sil t er n ih sil t ih v sil" """,
-    )
+                        sil t er n ih sil t ih v sil" """, )
     return parser
 
 
@@ -78,17 +72,17 @@ def main():
     if args.text:
         f = codecs.open(args.text, encoding="utf-8")
     else:
-        f = codecs.getreader("utf-8")(sys.stdin if is_python2 else sys.stdin.buffer)
+        f = codecs.getreader("utf-8")(sys.stdin
+                                      if is_python2 else sys.stdin.buffer)
 
-    sys.stdout = codecs.getwriter("utf-8")(
-        sys.stdout if is_python2 else sys.stdout.buffer
-    )
+    sys.stdout = codecs.getwriter("utf-8")(sys.stdout
+                                           if is_python2 else sys.stdout.buffer)
     line = f.readline()
     n = args.nchar
     while line:
         x = line.split()
-        print(" ".join(x[: args.skip_ncols]), end=" ")
-        a = " ".join(x[args.skip_ncols :])
+        print(" ".join(x[:args.skip_ncols]), end=" ")
+        a = " ".join(x[args.skip_ncols:])
 
         # get all matched positions
         match_pos = []
@@ -118,7 +112,7 @@ def main():
                         i += 1
                 a = chars
 
-            a = [a[j : j + n] for j in range(0, len(a), n)]
+            a = [a[j:j + n] for j in range(0, len(a), n)]
 
         a_flat = []
         for z in a: