From 0a5624fe614c8316e85572c6f180c1214ef7fd10 Mon Sep 17 00:00:00 2001
From: Hui Zhang <zhtclz@foxmail.com>
Date: Thu, 24 Feb 2022 10:58:31 +0000
Subject: [PATCH 1/2] update ctc loss compare

---
 docs/topic/ctc/ctc_loss_compare.ipynb | 150 +++++++++++++-------------
 1 file changed, 73 insertions(+), 77 deletions(-)

diff --git a/docs/topic/ctc/ctc_loss_compare.ipynb b/docs/topic/ctc/ctc_loss_compare.ipynb
index 95b2af508..c313710c2 100644
--- a/docs/topic/ctc/ctc_loss_compare.ipynb
+++ b/docs/topic/ctc/ctc_loss_compare.ipynb
@@ -30,12 +30,7 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Cloning into 'warp-ctc'...\n",
-      "remote: Enumerating objects: 829, done.\u001b[K\n",
-      "remote: Total 829 (delta 0), reused 0 (delta 0), pack-reused 829\u001b[K\n",
-      "Receiving objects: 100% (829/829), 388.85 KiB | 140.00 KiB/s, done.\n",
-      "Resolving deltas: 100% (419/419), done.\n",
-      "Checking connectivity... done.\n"
+      "fatal: destination path 'warp-ctc' already exists and is not an empty directory.\r\n"
      ]
     }
    ],
@@ -99,30 +94,6 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "-- The C compiler identification is GNU 5.4.0\n",
-      "-- The CXX compiler identification is GNU 5.4.0\n",
-      "-- Check for working C compiler: /usr/bin/cc\n",
-      "-- Check for working C compiler: /usr/bin/cc -- works\n",
-      "-- Detecting C compiler ABI info\n",
-      "-- Detecting C compiler ABI info - done\n",
-      "-- Detecting C compile features\n",
-      "-- Detecting C compile features - done\n",
-      "-- Check for working CXX compiler: /usr/bin/c++\n",
-      "-- Check for working CXX compiler: /usr/bin/c++ -- works\n",
-      "-- Detecting CXX compiler ABI info\n",
-      "-- Detecting CXX compiler ABI info - done\n",
-      "-- Detecting CXX compile features\n",
-      "-- Detecting CXX compile features - done\n",
-      "-- Looking for pthread.h\n",
-      "-- Looking for pthread.h - found\n",
-      "-- Performing Test CMAKE_HAVE_LIBC_PTHREAD\n",
-      "-- Performing Test CMAKE_HAVE_LIBC_PTHREAD - Failed\n",
-      "-- Looking for pthread_create in pthreads\n",
-      "-- Looking for pthread_create in pthreads - not found\n",
-      "-- Looking for pthread_create in pthread\n",
-      "-- Looking for pthread_create in pthread - found\n",
-      "-- Found Threads: TRUE  \n",
-      "-- Found CUDA: /usr/local/cuda (found suitable version \"10.2\", minimum required is \"6.5\") \n",
       "-- cuda found TRUE\n",
       "-- Building shared library with GPU support\n",
       "-- Configuring done\n",
@@ -145,20 +116,11 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "[ 11%] \u001b[34m\u001b[1mBuilding NVCC (Device) object CMakeFiles/warpctc.dir/src/warpctc_generated_reduce.cu.o\u001b[0m\n",
-      "[ 22%] \u001b[34m\u001b[1mBuilding NVCC (Device) object CMakeFiles/warpctc.dir/src/warpctc_generated_ctc_entrypoint.cu.o\u001b[0m\n",
-      "\u001b[35m\u001b[1mScanning dependencies of target warpctc\u001b[0m\n",
-      "[ 33%] \u001b[32m\u001b[1mLinking CXX shared library libwarpctc.so\u001b[0m\n",
+      "[ 11%] \u001b[32m\u001b[1mLinking CXX shared library libwarpctc.so\u001b[0m\n",
       "[ 33%] Built target warpctc\n",
-      "[ 44%] \u001b[34m\u001b[1mBuilding NVCC (Device) object CMakeFiles/test_gpu.dir/tests/test_gpu_generated_test_gpu.cu.o\u001b[0m\n",
-      "\u001b[35m\u001b[1mScanning dependencies of target test_cpu\u001b[0m\n",
-      "[ 55%] \u001b[32mBuilding CXX object CMakeFiles/test_cpu.dir/tests/test_cpu.cpp.o\u001b[0m\n",
-      "[ 66%] \u001b[32mBuilding CXX object CMakeFiles/test_cpu.dir/tests/random.cpp.o\u001b[0m\n",
-      "[ 77%] \u001b[32m\u001b[1mLinking CXX executable test_cpu\u001b[0m\n",
+      "[ 44%] \u001b[32m\u001b[1mLinking CXX executable test_cpu\u001b[0m\n",
+      "[ 55%] \u001b[32m\u001b[1mLinking CXX executable test_gpu\u001b[0m\n",
       "[ 77%] Built target test_cpu\n",
-      "\u001b[35m\u001b[1mScanning dependencies of target test_gpu\u001b[0m\n",
-      "[ 88%] \u001b[32mBuilding CXX object CMakeFiles/test_gpu.dir/tests/random.cpp.o\u001b[0m\n",
-      "[100%] \u001b[32m\u001b[1mLinking CXX executable test_gpu\u001b[0m\n",
       "[100%] Built target test_gpu\n"
      ]
     }
@@ -169,7 +131,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 10,
+   "execution_count": 9,
    "id": "31761a31",
    "metadata": {},
    "outputs": [
@@ -187,7 +149,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 11,
+   "execution_count": 10,
    "id": "f53316f6",
    "metadata": {},
    "outputs": [
@@ -205,7 +167,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 12,
+   "execution_count": 11,
    "id": "084f1e49",
    "metadata": {},
    "outputs": [
@@ -216,29 +178,20 @@
       "running install\n",
       "running bdist_egg\n",
       "running egg_info\n",
-      "creating warpctc_pytorch.egg-info\n",
       "writing warpctc_pytorch.egg-info/PKG-INFO\n",
       "writing dependency_links to warpctc_pytorch.egg-info/dependency_links.txt\n",
       "writing top-level names to warpctc_pytorch.egg-info/top_level.txt\n",
       "writing manifest file 'warpctc_pytorch.egg-info/SOURCES.txt'\n",
-      "writing manifest file 'warpctc_pytorch.egg-info/SOURCES.txt'\n",
       "installing library code to build/bdist.linux-x86_64/egg\n",
       "running install_lib\n",
       "running build_py\n",
-      "creating build\n",
-      "creating build/lib.linux-x86_64-3.9\n",
-      "creating build/lib.linux-x86_64-3.9/warpctc_pytorch\n",
-      "copying warpctc_pytorch/__init__.py -> build/lib.linux-x86_64-3.9/warpctc_pytorch\n",
       "running build_ext\n",
       "building 'warpctc_pytorch._warp_ctc' extension\n",
-      "creating /workspace/zhanghui/DeepSpeech-2.x/docs/topic/ctc/warp-ctc/pytorch_binding/build/temp.linux-x86_64-3.9\n",
-      "creating /workspace/zhanghui/DeepSpeech-2.x/docs/topic/ctc/warp-ctc/pytorch_binding/build/temp.linux-x86_64-3.9/src\n",
       "Emitting ninja build file /workspace/zhanghui/DeepSpeech-2.x/docs/topic/ctc/warp-ctc/pytorch_binding/build/temp.linux-x86_64-3.9/build.ninja...\n",
       "Compiling objects...\n",
       "Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N)\n",
-      "[1/1] c++ -MMD -MF /workspace/zhanghui/DeepSpeech-2.x/docs/topic/ctc/warp-ctc/pytorch_binding/build/temp.linux-x86_64-3.9/src/binding.o.d -pthread -B /workspace/zhanghui/DeepSpeech-2.x/tools/venv/compiler_compat -Wl,--sysroot=/ -Wno-unused-result -Wsign-compare -DNDEBUG -O2 -Wall -fPIC -O2 -isystem /workspace/zhanghui/DeepSpeech-2.x/tools/venv/include -fPIC -O2 -isystem /workspace/zhanghui/DeepSpeech-2.x/tools/venv/include -fPIC -I/workspace/zhanghui/DeepSpeech-2.x/docs/topic/ctc/warp-ctc/include -I/workspace/zhanghui/DeepSpeech-2.x/tools/venv/lib/python3.9/site-packages/torch/include -I/workspace/zhanghui/DeepSpeech-2.x/tools/venv/lib/python3.9/site-packages/torch/include/torch/csrc/api/include -I/workspace/zhanghui/DeepSpeech-2.x/tools/venv/lib/python3.9/site-packages/torch/include/TH -I/workspace/zhanghui/DeepSpeech-2.x/tools/venv/lib/python3.9/site-packages/torch/include/THC -I/usr/local/cuda/include -I/workspace/zhanghui/DeepSpeech-2.x/tools/venv/include/python3.9 -c -c /workspace/zhanghui/DeepSpeech-2.x/docs/topic/ctc/warp-ctc/pytorch_binding/src/binding.cpp -o /workspace/zhanghui/DeepSpeech-2.x/docs/topic/ctc/warp-ctc/pytorch_binding/build/temp.linux-x86_64-3.9/src/binding.o -std=c++14 -fPIC -DWARPCTC_ENABLE_GPU -DTORCH_API_INCLUDE_EXTENSION_H '-DPYBIND11_COMPILER_TYPE=\"_gcc\"' '-DPYBIND11_STDLIB=\"_libstdcpp\"' '-DPYBIND11_BUILD_ABI=\"_cxxabi1011\"' -DTORCH_EXTENSION_NAME=_warp_ctc -D_GLIBCXX_USE_CXX11_ABI=0\n",
+      "ninja: no work to do.\n",
       "g++ -pthread -B /workspace/zhanghui/DeepSpeech-2.x/tools/venv/compiler_compat -Wl,--sysroot=/ -shared -Wl,-rpath,/workspace/zhanghui/DeepSpeech-2.x/tools/venv/lib -Wl,-rpath-link,/workspace/zhanghui/DeepSpeech-2.x/tools/venv/lib -L/workspace/zhanghui/DeepSpeech-2.x/tools/venv/lib -Wl,-rpath,/workspace/zhanghui/DeepSpeech-2.x/tools/venv/lib -Wl,-rpath-link,/workspace/zhanghui/DeepSpeech-2.x/tools/venv/lib -L/workspace/zhanghui/DeepSpeech-2.x/tools/venv/lib /workspace/zhanghui/DeepSpeech-2.x/docs/topic/ctc/warp-ctc/pytorch_binding/build/temp.linux-x86_64-3.9/src/binding.o -L/workspace/zhanghui/DeepSpeech-2.x/docs/topic/ctc/warp-ctc/build -L/workspace/zhanghui/DeepSpeech-2.x/tools/venv/lib/python3.9/site-packages/torch/lib -L/usr/local/cuda/lib64 -lwarpctc -lc10 -ltorch -ltorch_cpu -ltorch_python -lcudart -lc10_cuda -ltorch_cuda -o build/lib.linux-x86_64-3.9/warpctc_pytorch/_warp_ctc.cpython-39-x86_64-linux-gnu.so -Wl,-rpath,/workspace/zhanghui/DeepSpeech-2.x/docs/topic/ctc/warp-ctc/build\n",
-      "creating build/bdist.linux-x86_64\n",
       "creating build/bdist.linux-x86_64/egg\n",
       "creating build/bdist.linux-x86_64/egg/warpctc_pytorch\n",
       "copying build/lib.linux-x86_64-3.9/warpctc_pytorch/__init__.py -> build/bdist.linux-x86_64/egg/warpctc_pytorch\n",
@@ -254,7 +207,6 @@
       "writing build/bdist.linux-x86_64/egg/EGG-INFO/native_libs.txt\n",
       "zip_safe flag not set; analyzing archive contents...\n",
       "warpctc_pytorch.__pycache__._warp_ctc.cpython-39: module references __file__\n",
-      "creating dist\n",
       "creating 'dist/warpctc_pytorch-0.1-py3.9-linux-x86_64.egg' and adding 'build/bdist.linux-x86_64/egg' to it\n",
       "removing 'build/bdist.linux-x86_64/egg' (and everything under it)\n",
       "Processing warpctc_pytorch-0.1-py3.9-linux-x86_64.egg\n",
@@ -275,7 +227,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 13,
+   "execution_count": 12,
    "id": "ee4ca9e3",
    "metadata": {},
    "outputs": [
@@ -293,7 +245,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 14,
+   "execution_count": 13,
    "id": "59255ed8",
    "metadata": {},
    "outputs": [
@@ -311,21 +263,14 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 15,
+   "execution_count": 22,
    "id": "1dae09b9",
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "grep: warning: GREP_OPTIONS is deprecated; please use an alias or script\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "import torch\n",
     "import torch.nn as nn\n",
+    "import torch.nn.functional as F\n",
     "import warpctc_pytorch as wp\n",
     "import paddle.nn as pn\n",
     "import paddle"
@@ -333,7 +278,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 16,
+   "execution_count": 15,
    "id": "83d0762e",
    "metadata": {},
    "outputs": [
@@ -343,7 +288,7 @@
        "'1.10.0+cu102'"
       ]
      },
-     "execution_count": 16,
+     "execution_count": 15,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -354,17 +299,17 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 17,
+   "execution_count": 16,
    "id": "62501e2c",
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "'2.2.0'"
+       "'2.2.1'"
       ]
      },
-     "execution_count": 17,
+     "execution_count": 16,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -375,7 +320,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 18,
+   "execution_count": 17,
    "id": "9e8e0f40",
    "metadata": {},
    "outputs": [
@@ -392,6 +337,7 @@
     }
    ],
    "source": [
+    "# warpctc_pytorch CTCLoss\n",
     "probs = torch.FloatTensor([[\n",
     "        [0.1, 0.6, 0.1, 0.1, 0.1], [0.1, 0.1, 0.6, 0.1, 0.1]\n",
     "    ]]).transpose(0, 1).contiguous()\n",
@@ -412,7 +358,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 19,
+   "execution_count": 18,
    "id": "2cd46569",
    "metadata": {},
    "outputs": [
@@ -428,6 +374,7 @@
     }
    ],
    "source": [
+    "# pytorch CTCLoss\n",
     "probs = torch.FloatTensor([[\n",
     "        [0.1, 0.6, 0.1, 0.1, 0.1], [0.1, 0.1, 0.6, 0.1, 0.1]\n",
     "    ]]).transpose(0, 1).contiguous()\n",
@@ -449,7 +396,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 22,
+   "execution_count": 27,
    "id": "85c3461a",
    "metadata": {},
    "outputs": [
@@ -467,6 +414,7 @@
     }
    ],
    "source": [
+    "# Paddle CTCLoss\n",
     "paddle.set_device('cpu')\n",
     "probs = paddle.to_tensor([[\n",
     "        [0.1, 0.6, 0.1, 0.1, 0.1], [0.1, 0.1, 0.6, 0.1, 0.1],\n",
@@ -490,7 +438,55 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "d390cd91",
+   "id": "8cdf76c2",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 26,
+   "id": "2c305eaf",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "torch.Size([2, 1, 5])\n",
+      "2.4628584384918213\n",
+      "[[[ 0.17703117 -0.7081247   0.17703117  0.17703117  0.17703117]]\n",
+      "\n",
+      " [[ 0.17703117  0.17703117 -0.7081247   0.17703117  0.17703117]]]\n"
+     ]
+    }
+   ],
+   "source": [
+    "# warpctc_pytorch CTCLoss, log_softmax idempotent\n",
+    "probs = torch.FloatTensor([[\n",
+    "        [0.1, 0.6, 0.1, 0.1, 0.1], [0.1, 0.1, 0.6, 0.1, 0.1]\n",
+    "    ]]).transpose(0, 1).contiguous()\n",
+    "print(probs.size())\n",
+    "labels = torch.IntTensor([1, 2])\n",
+    "label_sizes = torch.IntTensor([2])\n",
+    "probs_sizes = torch.IntTensor([2])\n",
+    "probs.requires_grad_(True)\n",
+    "bs = probs.size(1)\n",
+    "\n",
+    "ctc_loss = wp.CTCLoss(size_average=False, length_average=False)\n",
+    "\n",
+    "log_probs = torch.log_softmax(probs, axis=-1)\n",
+    "cost = ctc_loss(log_probs, labels, probs_sizes, label_sizes)\n",
+    "cost = cost.sum() / bs\n",
+    "print(cost.item())\n",
+    "cost.backward()\n",
+    "print(probs.grad.numpy())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "443336f0",
    "metadata": {},
    "outputs": [],
    "source": []

From 6b1fe701008de6d344576eb4e56b66250102380b Mon Sep 17 00:00:00 2001
From: Hui Zhang <zhtclz@foxmail.com>
Date: Thu, 24 Feb 2022 11:14:30 +0000
Subject: [PATCH 2/2] format code,test=doc

---
 .pre-commit-config.yaml                       |  3 +-
 dataset/voxceleb/voxceleb1.py                 |  4 +++
 examples/ami/sd0/local/ami_prepare.py         | 14 ++++-----
 .../sv0/local/make_voxceleb_kaldi_trial.py    | 31 +++++++++++--------
 paddlespeech/__init__.py                      | 11 -------
 paddlespeech/cli/asr/infer.py                 |  3 +-
 paddlespeech/s2t/io/utility.py                |  2 +-
 paddlespeech/t2s/datasets/dataset.py          |  2 +-
 utils/DER.py                                  |  3 +-
 9 files changed, 36 insertions(+), 37 deletions(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 60f0b92f6..7fb01708a 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -50,12 +50,13 @@ repos:
         entry: bash .pre-commit-hooks/clang-format.hook -i
         language: system
         files: \.(c|cc|cxx|cpp|cu|h|hpp|hxx|cuh|proto)$
+        exclude: (?=speechx/speechx/kaldi).*(\.cpp|\.cc|\.h|\.py)$
     -   id: copyright_checker
         name: copyright_checker
         entry: python .pre-commit-hooks/copyright-check.hook
         language: system
         files: \.(c|cc|cxx|cpp|cu|h|hpp|hxx|proto|py)$
-        exclude: (?=third_party|pypinyin).*(\.cpp|\.h|\.py)$
+        exclude: (?=third_party|pypinyin|speechx/speechx/kaldi).*(\.cpp|\.cc|\.h|\.py)$
 -   repo: https://github.com/asottile/reorder_python_imports
     rev: v2.4.0
     hooks:
diff --git a/dataset/voxceleb/voxceleb1.py b/dataset/voxceleb/voxceleb1.py
index ce7447516..e50c91bc1 100644
--- a/dataset/voxceleb/voxceleb1.py
+++ b/dataset/voxceleb/voxceleb1.py
@@ -80,6 +80,7 @@ parser.add_argument(
 
 args = parser.parse_args()
 
+
 def create_manifest(data_dir, manifest_path_prefix):
     print("Creating manifest %s ..." % manifest_path_prefix)
     json_lines = []
@@ -128,6 +129,7 @@ def create_manifest(data_dir, manifest_path_prefix):
         print(f"{total_text / total_sec} text/sec", file=f)
         print(f"{total_sec / total_num} sec/utt", file=f)
 
+
 def prepare_dataset(base_url, data_list, target_dir, manifest_path,
                     target_data):
     if not os.path.exists(target_dir):
@@ -164,6 +166,7 @@ def prepare_dataset(base_url, data_list, target_dir, manifest_path,
     # create the manifest file
     create_manifest(data_dir=target_dir, manifest_path_prefix=manifest_path)
 
+
 def main():
     if args.target_dir.startswith('~'):
         args.target_dir = os.path.expanduser(args.target_dir)
@@ -184,5 +187,6 @@ def main():
 
     print("Manifest prepare done!")
 
+
 if __name__ == '__main__':
     main()
diff --git a/examples/ami/sd0/local/ami_prepare.py b/examples/ami/sd0/local/ami_prepare.py
index b7bb8e67e..d03810a77 100644
--- a/examples/ami/sd0/local/ami_prepare.py
+++ b/examples/ami/sd0/local/ami_prepare.py
@@ -22,19 +22,17 @@ Authors
  * qingenz123@126.com (Qingen ZHAO) 2022
 
 """
-
-import os
-import logging
 import argparse
-import xml.etree.ElementTree as et
 import glob
 import json
-from ami_splits import get_AMI_split
+import logging
+import os
+import xml.etree.ElementTree as et
 from distutils.util import strtobool
 
-from dataio import (
-    load_pkl,
-    save_pkl, )
+from ami_splits import get_AMI_split
+from dataio import load_pkl
+from dataio import save_pkl
 
 logger = logging.getLogger(__name__)
 SAMPLERATE = 16000
diff --git a/examples/voxceleb/sv0/local/make_voxceleb_kaldi_trial.py b/examples/voxceleb/sv0/local/make_voxceleb_kaldi_trial.py
index c92ede1ab..4e9639dc7 100644
--- a/examples/voxceleb/sv0/local/make_voxceleb_kaldi_trial.py
+++ b/examples/voxceleb/sv0/local/make_voxceleb_kaldi_trial.py
@@ -12,28 +12,30 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
 """
 Make VoxCeleb1 trial of kaldi format
 this script creat the test trial from kaldi trial voxceleb1_test_v2.txt or official trial veri_test2.txt 
 to kaldi trial format
 """
-
 import argparse
 import codecs
 import os
 
 parser = argparse.ArgumentParser(description=__doc__)
-parser.add_argument("--voxceleb_trial",
-                    default="voxceleb1_test_v2",
-                    type=str,
-                    help="VoxCeleb trial file. Default we use the kaldi trial voxceleb1_test_v2.txt")
-parser.add_argument("--trial",
-                    default="data/test/trial",
-                    type=str,
-                    help="Kaldi format trial file")
+parser.add_argument(
+    "--voxceleb_trial",
+    default="voxceleb1_test_v2",
+    type=str,
+    help="VoxCeleb trial file. Default we use the kaldi trial voxceleb1_test_v2.txt"
+)
+parser.add_argument(
+    "--trial",
+    default="data/test/trial",
+    type=str,
+    help="Kaldi format trial file")
 args = parser.parse_args()
 
+
 def main(voxceleb_trial, trial):
     """
         VoxCeleb provide several trial file, which format is different with kaldi format.
@@ -58,7 +60,9 @@ def main(voxceleb_trial, trial):
     """
     print("Start convert the voxceleb trial to kaldi format")
     if not os.path.exists(voxceleb_trial):
-        raise RuntimeError("{} does not exist. Pleas input the correct file path".format(voxceleb_trial))
+        raise RuntimeError(
+            "{} does not exist. Pleas input the correct file path".format(
+                voxceleb_trial))
 
     trial_dirname = os.path.dirname(trial)
     if not os.path.exists(trial_dirname):
@@ -66,9 +70,9 @@ def main(voxceleb_trial, trial):
 
     with codecs.open(voxceleb_trial, 'r', encoding='utf-8') as f, \
          codecs.open(trial, 'w', encoding='utf-8') as w:
-         for line in f:
+        for line in f:
             target_or_nontarget, path1, path2 = line.strip().split()
-             
+
             utt_id1 = "-".join(path1.split("/"))
             utt_id2 = "-".join(path2.split("/"))
             target = "nontarget"
@@ -77,5 +81,6 @@ def main(voxceleb_trial, trial):
             w.write("{} {} {}\n".format(utt_id1, utt_id2, target))
     print("Convert the voxceleb trial to kaldi format successfully")
 
+
 if __name__ == "__main__":
     main(args.voxceleb_trial, args.trial)
diff --git a/paddlespeech/__init__.py b/paddlespeech/__init__.py
index 42537b159..185a92b8d 100644
--- a/paddlespeech/__init__.py
+++ b/paddlespeech/__init__.py
@@ -11,14 +11,3 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
-
-
-
-
-
-
-
-
-
-
diff --git a/paddlespeech/cli/asr/infer.py b/paddlespeech/cli/asr/infer.py
index 7f648b4c3..1fb4be434 100644
--- a/paddlespeech/cli/asr/infer.py
+++ b/paddlespeech/cli/asr/infer.py
@@ -413,7 +413,8 @@ class ASRExecutor(BaseExecutor):
     def _check(self, audio_file: str, sample_rate: int, force_yes: bool):
         self.sample_rate = sample_rate
         if self.sample_rate != 16000 and self.sample_rate != 8000:
-            logger.error("invalid sample rate, please input --sr 8000 or --sr 16000")
+            logger.error(
+                "invalid sample rate, please input --sr 8000 or --sr 16000")
             return False
 
         if isinstance(audio_file, (str, os.PathLike)):
diff --git a/paddlespeech/s2t/io/utility.py b/paddlespeech/s2t/io/utility.py
index ce5e77230..c08b5535a 100644
--- a/paddlespeech/s2t/io/utility.py
+++ b/paddlespeech/s2t/io/utility.py
@@ -11,8 +11,8 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from typing import List
 from io import BytesIO
+from typing import List
 
 import numpy as np
 
diff --git a/paddlespeech/t2s/datasets/dataset.py b/paddlespeech/t2s/datasets/dataset.py
index f81c2877c..2d6c03cb1 100644
--- a/paddlespeech/t2s/datasets/dataset.py
+++ b/paddlespeech/t2s/datasets/dataset.py
@@ -258,4 +258,4 @@ class ChainDataset(Dataset):
                 return dataset[i]
             i -= len(dataset)
 
-        raise IndexError("dataset index out of range")
\ No newline at end of file
+        raise IndexError("dataset index out of range")
diff --git a/utils/DER.py b/utils/DER.py
index 5b62094df..d6ab695d8 100755
--- a/utils/DER.py
+++ b/utils/DER.py
@@ -23,10 +23,11 @@ Credits
  This code is adapted from https://github.com/nryant/dscore
 """
 import argparse
-from distutils.util import strtobool
 import os
 import re
 import subprocess
+from distutils.util import strtobool
+
 import numpy as np
 
 FILE_IDS = re.compile(r"(?<=Speaker Diarization for).+(?=\*\*\*)")