From 0a5624fe614c8316e85572c6f180c1214ef7fd10 Mon Sep 17 00:00:00 2001 From: Hui Zhang Date: Thu, 24 Feb 2022 10:58:31 +0000 Subject: [PATCH 1/8] update ctc loss compare --- docs/topic/ctc/ctc_loss_compare.ipynb | 150 +++++++++++++------------- 1 file changed, 73 insertions(+), 77 deletions(-) diff --git a/docs/topic/ctc/ctc_loss_compare.ipynb b/docs/topic/ctc/ctc_loss_compare.ipynb index 95b2af508..c313710c2 100644 --- a/docs/topic/ctc/ctc_loss_compare.ipynb +++ b/docs/topic/ctc/ctc_loss_compare.ipynb @@ -30,12 +30,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "Cloning into 'warp-ctc'...\n", - "remote: Enumerating objects: 829, done.\u001b[K\n", - "remote: Total 829 (delta 0), reused 0 (delta 0), pack-reused 829\u001b[K\n", - "Receiving objects: 100% (829/829), 388.85 KiB | 140.00 KiB/s, done.\n", - "Resolving deltas: 100% (419/419), done.\n", - "Checking connectivity... done.\n" + "fatal: destination path 'warp-ctc' already exists and is not an empty directory.\r\n" ] } ], @@ -99,30 +94,6 @@ "name": "stdout", "output_type": "stream", "text": [ - "-- The C compiler identification is GNU 5.4.0\n", - "-- The CXX compiler identification is GNU 5.4.0\n", - "-- Check for working C compiler: /usr/bin/cc\n", - "-- Check for working C compiler: /usr/bin/cc -- works\n", - "-- Detecting C compiler ABI info\n", - "-- Detecting C compiler ABI info - done\n", - "-- Detecting C compile features\n", - "-- Detecting C compile features - done\n", - "-- Check for working CXX compiler: /usr/bin/c++\n", - "-- Check for working CXX compiler: /usr/bin/c++ -- works\n", - "-- Detecting CXX compiler ABI info\n", - "-- Detecting CXX compiler ABI info - done\n", - "-- Detecting CXX compile features\n", - "-- Detecting CXX compile features - done\n", - "-- Looking for pthread.h\n", - "-- Looking for pthread.h - found\n", - "-- Performing Test CMAKE_HAVE_LIBC_PTHREAD\n", - "-- Performing Test CMAKE_HAVE_LIBC_PTHREAD - Failed\n", - "-- Looking for pthread_create in pthreads\n", - "-- Looking for pthread_create in pthreads - not found\n", - "-- Looking for pthread_create in pthread\n", - "-- Looking for pthread_create in pthread - found\n", - "-- Found Threads: TRUE \n", - "-- Found CUDA: /usr/local/cuda (found suitable version \"10.2\", minimum required is \"6.5\") \n", "-- cuda found TRUE\n", "-- Building shared library with GPU support\n", "-- Configuring done\n", @@ -145,20 +116,11 @@ "name": "stdout", "output_type": "stream", "text": [ - "[ 11%] \u001b[34m\u001b[1mBuilding NVCC (Device) object CMakeFiles/warpctc.dir/src/warpctc_generated_reduce.cu.o\u001b[0m\n", - "[ 22%] \u001b[34m\u001b[1mBuilding NVCC (Device) object CMakeFiles/warpctc.dir/src/warpctc_generated_ctc_entrypoint.cu.o\u001b[0m\n", - "\u001b[35m\u001b[1mScanning dependencies of target warpctc\u001b[0m\n", - "[ 33%] \u001b[32m\u001b[1mLinking CXX shared library libwarpctc.so\u001b[0m\n", + "[ 11%] \u001b[32m\u001b[1mLinking CXX shared library libwarpctc.so\u001b[0m\n", "[ 33%] Built target warpctc\n", - "[ 44%] \u001b[34m\u001b[1mBuilding NVCC (Device) object CMakeFiles/test_gpu.dir/tests/test_gpu_generated_test_gpu.cu.o\u001b[0m\n", - "\u001b[35m\u001b[1mScanning dependencies of target test_cpu\u001b[0m\n", - "[ 55%] \u001b[32mBuilding CXX object CMakeFiles/test_cpu.dir/tests/test_cpu.cpp.o\u001b[0m\n", - "[ 66%] \u001b[32mBuilding CXX object CMakeFiles/test_cpu.dir/tests/random.cpp.o\u001b[0m\n", - "[ 77%] \u001b[32m\u001b[1mLinking CXX executable test_cpu\u001b[0m\n", + "[ 44%] \u001b[32m\u001b[1mLinking CXX executable test_cpu\u001b[0m\n", + "[ 55%] \u001b[32m\u001b[1mLinking CXX executable test_gpu\u001b[0m\n", "[ 77%] Built target test_cpu\n", - "\u001b[35m\u001b[1mScanning dependencies of target test_gpu\u001b[0m\n", - "[ 88%] \u001b[32mBuilding CXX object CMakeFiles/test_gpu.dir/tests/random.cpp.o\u001b[0m\n", - "[100%] \u001b[32m\u001b[1mLinking CXX executable test_gpu\u001b[0m\n", "[100%] Built target test_gpu\n" ] } @@ -169,7 +131,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 9, "id": "31761a31", "metadata": {}, "outputs": [ @@ -187,7 +149,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 10, "id": "f53316f6", "metadata": {}, "outputs": [ @@ -205,7 +167,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 11, "id": "084f1e49", "metadata": {}, "outputs": [ @@ -216,29 +178,20 @@ "running install\n", "running bdist_egg\n", "running egg_info\n", - "creating warpctc_pytorch.egg-info\n", "writing warpctc_pytorch.egg-info/PKG-INFO\n", "writing dependency_links to warpctc_pytorch.egg-info/dependency_links.txt\n", "writing top-level names to warpctc_pytorch.egg-info/top_level.txt\n", "writing manifest file 'warpctc_pytorch.egg-info/SOURCES.txt'\n", - "writing manifest file 'warpctc_pytorch.egg-info/SOURCES.txt'\n", "installing library code to build/bdist.linux-x86_64/egg\n", "running install_lib\n", "running build_py\n", - "creating build\n", - "creating build/lib.linux-x86_64-3.9\n", - "creating build/lib.linux-x86_64-3.9/warpctc_pytorch\n", - "copying warpctc_pytorch/__init__.py -> build/lib.linux-x86_64-3.9/warpctc_pytorch\n", "running build_ext\n", "building 'warpctc_pytorch._warp_ctc' extension\n", - "creating /workspace/zhanghui/DeepSpeech-2.x/docs/topic/ctc/warp-ctc/pytorch_binding/build/temp.linux-x86_64-3.9\n", - "creating /workspace/zhanghui/DeepSpeech-2.x/docs/topic/ctc/warp-ctc/pytorch_binding/build/temp.linux-x86_64-3.9/src\n", "Emitting ninja build file /workspace/zhanghui/DeepSpeech-2.x/docs/topic/ctc/warp-ctc/pytorch_binding/build/temp.linux-x86_64-3.9/build.ninja...\n", "Compiling objects...\n", "Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N)\n", - "[1/1] c++ -MMD -MF /workspace/zhanghui/DeepSpeech-2.x/docs/topic/ctc/warp-ctc/pytorch_binding/build/temp.linux-x86_64-3.9/src/binding.o.d -pthread -B /workspace/zhanghui/DeepSpeech-2.x/tools/venv/compiler_compat -Wl,--sysroot=/ -Wno-unused-result -Wsign-compare -DNDEBUG -O2 -Wall -fPIC -O2 -isystem /workspace/zhanghui/DeepSpeech-2.x/tools/venv/include -fPIC -O2 -isystem /workspace/zhanghui/DeepSpeech-2.x/tools/venv/include -fPIC -I/workspace/zhanghui/DeepSpeech-2.x/docs/topic/ctc/warp-ctc/include -I/workspace/zhanghui/DeepSpeech-2.x/tools/venv/lib/python3.9/site-packages/torch/include -I/workspace/zhanghui/DeepSpeech-2.x/tools/venv/lib/python3.9/site-packages/torch/include/torch/csrc/api/include -I/workspace/zhanghui/DeepSpeech-2.x/tools/venv/lib/python3.9/site-packages/torch/include/TH -I/workspace/zhanghui/DeepSpeech-2.x/tools/venv/lib/python3.9/site-packages/torch/include/THC -I/usr/local/cuda/include -I/workspace/zhanghui/DeepSpeech-2.x/tools/venv/include/python3.9 -c -c /workspace/zhanghui/DeepSpeech-2.x/docs/topic/ctc/warp-ctc/pytorch_binding/src/binding.cpp -o /workspace/zhanghui/DeepSpeech-2.x/docs/topic/ctc/warp-ctc/pytorch_binding/build/temp.linux-x86_64-3.9/src/binding.o -std=c++14 -fPIC -DWARPCTC_ENABLE_GPU -DTORCH_API_INCLUDE_EXTENSION_H '-DPYBIND11_COMPILER_TYPE=\"_gcc\"' '-DPYBIND11_STDLIB=\"_libstdcpp\"' '-DPYBIND11_BUILD_ABI=\"_cxxabi1011\"' -DTORCH_EXTENSION_NAME=_warp_ctc -D_GLIBCXX_USE_CXX11_ABI=0\n", + "ninja: no work to do.\n", "g++ -pthread -B /workspace/zhanghui/DeepSpeech-2.x/tools/venv/compiler_compat -Wl,--sysroot=/ -shared -Wl,-rpath,/workspace/zhanghui/DeepSpeech-2.x/tools/venv/lib -Wl,-rpath-link,/workspace/zhanghui/DeepSpeech-2.x/tools/venv/lib -L/workspace/zhanghui/DeepSpeech-2.x/tools/venv/lib -Wl,-rpath,/workspace/zhanghui/DeepSpeech-2.x/tools/venv/lib -Wl,-rpath-link,/workspace/zhanghui/DeepSpeech-2.x/tools/venv/lib -L/workspace/zhanghui/DeepSpeech-2.x/tools/venv/lib /workspace/zhanghui/DeepSpeech-2.x/docs/topic/ctc/warp-ctc/pytorch_binding/build/temp.linux-x86_64-3.9/src/binding.o -L/workspace/zhanghui/DeepSpeech-2.x/docs/topic/ctc/warp-ctc/build -L/workspace/zhanghui/DeepSpeech-2.x/tools/venv/lib/python3.9/site-packages/torch/lib -L/usr/local/cuda/lib64 -lwarpctc -lc10 -ltorch -ltorch_cpu -ltorch_python -lcudart -lc10_cuda -ltorch_cuda -o build/lib.linux-x86_64-3.9/warpctc_pytorch/_warp_ctc.cpython-39-x86_64-linux-gnu.so -Wl,-rpath,/workspace/zhanghui/DeepSpeech-2.x/docs/topic/ctc/warp-ctc/build\n", - "creating build/bdist.linux-x86_64\n", "creating build/bdist.linux-x86_64/egg\n", "creating build/bdist.linux-x86_64/egg/warpctc_pytorch\n", "copying build/lib.linux-x86_64-3.9/warpctc_pytorch/__init__.py -> build/bdist.linux-x86_64/egg/warpctc_pytorch\n", @@ -254,7 +207,6 @@ "writing build/bdist.linux-x86_64/egg/EGG-INFO/native_libs.txt\n", "zip_safe flag not set; analyzing archive contents...\n", "warpctc_pytorch.__pycache__._warp_ctc.cpython-39: module references __file__\n", - "creating dist\n", "creating 'dist/warpctc_pytorch-0.1-py3.9-linux-x86_64.egg' and adding 'build/bdist.linux-x86_64/egg' to it\n", "removing 'build/bdist.linux-x86_64/egg' (and everything under it)\n", "Processing warpctc_pytorch-0.1-py3.9-linux-x86_64.egg\n", @@ -275,7 +227,7 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 12, "id": "ee4ca9e3", "metadata": {}, "outputs": [ @@ -293,7 +245,7 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 13, "id": "59255ed8", "metadata": {}, "outputs": [ @@ -311,21 +263,14 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 22, "id": "1dae09b9", "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "grep: warning: GREP_OPTIONS is deprecated; please use an alias or script\n" - ] - } - ], + "outputs": [], "source": [ "import torch\n", "import torch.nn as nn\n", + "import torch.nn.functional as F\n", "import warpctc_pytorch as wp\n", "import paddle.nn as pn\n", "import paddle" @@ -333,7 +278,7 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 15, "id": "83d0762e", "metadata": {}, "outputs": [ @@ -343,7 +288,7 @@ "'1.10.0+cu102'" ] }, - "execution_count": 16, + "execution_count": 15, "metadata": {}, "output_type": "execute_result" } @@ -354,17 +299,17 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 16, "id": "62501e2c", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "'2.2.0'" + "'2.2.1'" ] }, - "execution_count": 17, + "execution_count": 16, "metadata": {}, "output_type": "execute_result" } @@ -375,7 +320,7 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 17, "id": "9e8e0f40", "metadata": {}, "outputs": [ @@ -392,6 +337,7 @@ } ], "source": [ + "# warpctc_pytorch CTCLoss\n", "probs = torch.FloatTensor([[\n", " [0.1, 0.6, 0.1, 0.1, 0.1], [0.1, 0.1, 0.6, 0.1, 0.1]\n", " ]]).transpose(0, 1).contiguous()\n", @@ -412,7 +358,7 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 18, "id": "2cd46569", "metadata": {}, "outputs": [ @@ -428,6 +374,7 @@ } ], "source": [ + "# pytorch CTCLoss\n", "probs = torch.FloatTensor([[\n", " [0.1, 0.6, 0.1, 0.1, 0.1], [0.1, 0.1, 0.6, 0.1, 0.1]\n", " ]]).transpose(0, 1).contiguous()\n", @@ -449,7 +396,7 @@ }, { "cell_type": "code", - "execution_count": 22, + "execution_count": 27, "id": "85c3461a", "metadata": {}, "outputs": [ @@ -467,6 +414,7 @@ } ], "source": [ + "# Paddle CTCLoss\n", "paddle.set_device('cpu')\n", "probs = paddle.to_tensor([[\n", " [0.1, 0.6, 0.1, 0.1, 0.1], [0.1, 0.1, 0.6, 0.1, 0.1],\n", @@ -490,7 +438,55 @@ { "cell_type": "code", "execution_count": null, - "id": "d390cd91", + "id": "8cdf76c2", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 26, + "id": "2c305eaf", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "torch.Size([2, 1, 5])\n", + "2.4628584384918213\n", + "[[[ 0.17703117 -0.7081247 0.17703117 0.17703117 0.17703117]]\n", + "\n", + " [[ 0.17703117 0.17703117 -0.7081247 0.17703117 0.17703117]]]\n" + ] + } + ], + "source": [ + "# warpctc_pytorch CTCLoss, log_softmax idempotent\n", + "probs = torch.FloatTensor([[\n", + " [0.1, 0.6, 0.1, 0.1, 0.1], [0.1, 0.1, 0.6, 0.1, 0.1]\n", + " ]]).transpose(0, 1).contiguous()\n", + "print(probs.size())\n", + "labels = torch.IntTensor([1, 2])\n", + "label_sizes = torch.IntTensor([2])\n", + "probs_sizes = torch.IntTensor([2])\n", + "probs.requires_grad_(True)\n", + "bs = probs.size(1)\n", + "\n", + "ctc_loss = wp.CTCLoss(size_average=False, length_average=False)\n", + "\n", + "log_probs = torch.log_softmax(probs, axis=-1)\n", + "cost = ctc_loss(log_probs, labels, probs_sizes, label_sizes)\n", + "cost = cost.sum() / bs\n", + "print(cost.item())\n", + "cost.backward()\n", + "print(probs.grad.numpy())" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "443336f0", "metadata": {}, "outputs": [], "source": [] From 6b1fe701008de6d344576eb4e56b66250102380b Mon Sep 17 00:00:00 2001 From: Hui Zhang Date: Thu, 24 Feb 2022 11:14:30 +0000 Subject: [PATCH 2/8] format code,test=doc --- .pre-commit-config.yaml | 3 +- dataset/voxceleb/voxceleb1.py | 4 +++ examples/ami/sd0/local/ami_prepare.py | 14 ++++----- .../sv0/local/make_voxceleb_kaldi_trial.py | 31 +++++++++++-------- paddlespeech/__init__.py | 11 ------- paddlespeech/cli/asr/infer.py | 3 +- paddlespeech/s2t/io/utility.py | 2 +- paddlespeech/t2s/datasets/dataset.py | 2 +- utils/DER.py | 3 +- 9 files changed, 36 insertions(+), 37 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 60f0b92f6..7fb01708a 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -50,12 +50,13 @@ repos: entry: bash .pre-commit-hooks/clang-format.hook -i language: system files: \.(c|cc|cxx|cpp|cu|h|hpp|hxx|cuh|proto)$ + exclude: (?=speechx/speechx/kaldi).*(\.cpp|\.cc|\.h|\.py)$ - id: copyright_checker name: copyright_checker entry: python .pre-commit-hooks/copyright-check.hook language: system files: \.(c|cc|cxx|cpp|cu|h|hpp|hxx|proto|py)$ - exclude: (?=third_party|pypinyin).*(\.cpp|\.h|\.py)$ + exclude: (?=third_party|pypinyin|speechx/speechx/kaldi).*(\.cpp|\.cc|\.h|\.py)$ - repo: https://github.com/asottile/reorder_python_imports rev: v2.4.0 hooks: diff --git a/dataset/voxceleb/voxceleb1.py b/dataset/voxceleb/voxceleb1.py index ce7447516..e50c91bc1 100644 --- a/dataset/voxceleb/voxceleb1.py +++ b/dataset/voxceleb/voxceleb1.py @@ -80,6 +80,7 @@ parser.add_argument( args = parser.parse_args() + def create_manifest(data_dir, manifest_path_prefix): print("Creating manifest %s ..." % manifest_path_prefix) json_lines = [] @@ -128,6 +129,7 @@ def create_manifest(data_dir, manifest_path_prefix): print(f"{total_text / total_sec} text/sec", file=f) print(f"{total_sec / total_num} sec/utt", file=f) + def prepare_dataset(base_url, data_list, target_dir, manifest_path, target_data): if not os.path.exists(target_dir): @@ -164,6 +166,7 @@ def prepare_dataset(base_url, data_list, target_dir, manifest_path, # create the manifest file create_manifest(data_dir=target_dir, manifest_path_prefix=manifest_path) + def main(): if args.target_dir.startswith('~'): args.target_dir = os.path.expanduser(args.target_dir) @@ -184,5 +187,6 @@ def main(): print("Manifest prepare done!") + if __name__ == '__main__': main() diff --git a/examples/ami/sd0/local/ami_prepare.py b/examples/ami/sd0/local/ami_prepare.py index b7bb8e67e..d03810a77 100644 --- a/examples/ami/sd0/local/ami_prepare.py +++ b/examples/ami/sd0/local/ami_prepare.py @@ -22,19 +22,17 @@ Authors * qingenz123@126.com (Qingen ZHAO) 2022 """ - -import os -import logging import argparse -import xml.etree.ElementTree as et import glob import json -from ami_splits import get_AMI_split +import logging +import os +import xml.etree.ElementTree as et from distutils.util import strtobool -from dataio import ( - load_pkl, - save_pkl, ) +from ami_splits import get_AMI_split +from dataio import load_pkl +from dataio import save_pkl logger = logging.getLogger(__name__) SAMPLERATE = 16000 diff --git a/examples/voxceleb/sv0/local/make_voxceleb_kaldi_trial.py b/examples/voxceleb/sv0/local/make_voxceleb_kaldi_trial.py index c92ede1ab..4e9639dc7 100644 --- a/examples/voxceleb/sv0/local/make_voxceleb_kaldi_trial.py +++ b/examples/voxceleb/sv0/local/make_voxceleb_kaldi_trial.py @@ -12,28 +12,30 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - """ Make VoxCeleb1 trial of kaldi format this script creat the test trial from kaldi trial voxceleb1_test_v2.txt or official trial veri_test2.txt to kaldi trial format """ - import argparse import codecs import os parser = argparse.ArgumentParser(description=__doc__) -parser.add_argument("--voxceleb_trial", - default="voxceleb1_test_v2", - type=str, - help="VoxCeleb trial file. Default we use the kaldi trial voxceleb1_test_v2.txt") -parser.add_argument("--trial", - default="data/test/trial", - type=str, - help="Kaldi format trial file") +parser.add_argument( + "--voxceleb_trial", + default="voxceleb1_test_v2", + type=str, + help="VoxCeleb trial file. Default we use the kaldi trial voxceleb1_test_v2.txt" +) +parser.add_argument( + "--trial", + default="data/test/trial", + type=str, + help="Kaldi format trial file") args = parser.parse_args() + def main(voxceleb_trial, trial): """ VoxCeleb provide several trial file, which format is different with kaldi format. @@ -58,7 +60,9 @@ def main(voxceleb_trial, trial): """ print("Start convert the voxceleb trial to kaldi format") if not os.path.exists(voxceleb_trial): - raise RuntimeError("{} does not exist. Pleas input the correct file path".format(voxceleb_trial)) + raise RuntimeError( + "{} does not exist. Pleas input the correct file path".format( + voxceleb_trial)) trial_dirname = os.path.dirname(trial) if not os.path.exists(trial_dirname): @@ -66,9 +70,9 @@ def main(voxceleb_trial, trial): with codecs.open(voxceleb_trial, 'r', encoding='utf-8') as f, \ codecs.open(trial, 'w', encoding='utf-8') as w: - for line in f: + for line in f: target_or_nontarget, path1, path2 = line.strip().split() - + utt_id1 = "-".join(path1.split("/")) utt_id2 = "-".join(path2.split("/")) target = "nontarget" @@ -77,5 +81,6 @@ def main(voxceleb_trial, trial): w.write("{} {} {}\n".format(utt_id1, utt_id2, target)) print("Convert the voxceleb trial to kaldi format successfully") + if __name__ == "__main__": main(args.voxceleb_trial, args.trial) diff --git a/paddlespeech/__init__.py b/paddlespeech/__init__.py index 42537b159..185a92b8d 100644 --- a/paddlespeech/__init__.py +++ b/paddlespeech/__init__.py @@ -11,14 +11,3 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - - - - - - - - - - - diff --git a/paddlespeech/cli/asr/infer.py b/paddlespeech/cli/asr/infer.py index 7f648b4c3..1fb4be434 100644 --- a/paddlespeech/cli/asr/infer.py +++ b/paddlespeech/cli/asr/infer.py @@ -413,7 +413,8 @@ class ASRExecutor(BaseExecutor): def _check(self, audio_file: str, sample_rate: int, force_yes: bool): self.sample_rate = sample_rate if self.sample_rate != 16000 and self.sample_rate != 8000: - logger.error("invalid sample rate, please input --sr 8000 or --sr 16000") + logger.error( + "invalid sample rate, please input --sr 8000 or --sr 16000") return False if isinstance(audio_file, (str, os.PathLike)): diff --git a/paddlespeech/s2t/io/utility.py b/paddlespeech/s2t/io/utility.py index ce5e77230..c08b5535a 100644 --- a/paddlespeech/s2t/io/utility.py +++ b/paddlespeech/s2t/io/utility.py @@ -11,8 +11,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from typing import List from io import BytesIO +from typing import List import numpy as np diff --git a/paddlespeech/t2s/datasets/dataset.py b/paddlespeech/t2s/datasets/dataset.py index f81c2877c..2d6c03cb1 100644 --- a/paddlespeech/t2s/datasets/dataset.py +++ b/paddlespeech/t2s/datasets/dataset.py @@ -258,4 +258,4 @@ class ChainDataset(Dataset): return dataset[i] i -= len(dataset) - raise IndexError("dataset index out of range") \ No newline at end of file + raise IndexError("dataset index out of range") diff --git a/utils/DER.py b/utils/DER.py index 5b62094df..d6ab695d8 100755 --- a/utils/DER.py +++ b/utils/DER.py @@ -23,10 +23,11 @@ Credits This code is adapted from https://github.com/nryant/dscore """ import argparse -from distutils.util import strtobool import os import re import subprocess +from distutils.util import strtobool + import numpy as np FILE_IDS = re.compile(r"(?<=Speaker Diarization for).+(?=\*\*\*)") From cbcbddf93e3ad49148f33ff74a1dadc856e843ae Mon Sep 17 00:00:00 2001 From: Hui Zhang Date: Thu, 24 Feb 2022 11:38:54 +0000 Subject: [PATCH 3/8] format, test=doct --- setup.py | 80 ++++++++++++++++++++++++++++++-------------------------- 1 file changed, 43 insertions(+), 37 deletions(-) diff --git a/setup.py b/setup.py index 9bb11d0dd..31dfa0bcf 100644 --- a/setup.py +++ b/setup.py @@ -29,44 +29,50 @@ HERE = Path(os.path.abspath(os.path.dirname(__file__))) VERSION = '0.1.1' +base = [ + "editdistance", + "g2p_en", + "g2pM", + "h5py", + "inflect", + "jieba", + "jsonlines", + "kaldiio", + "librosa", + "loguru", + "matplotlib", + "nara_wpe", + "pandas", + "paddleaudio", + "paddlenlp", + "paddlespeech_feat", + "praatio==5.0.0", + "pypinyin", + "python-dateutil", + "pyworld", + "resampy==0.2.2", + "sacrebleu", + "scipy", + "sentencepiece~=0.1.96", + "soundfile~=0.10", + "textgrid", + "timer", + "tqdm", + "typeguard", + "visualdl", + "webrtcvad", + "yacs~=0.1.8", +] + +server = [ + "fastapi", + "uvicorn", + "pattern_singleton", +] + requirements = { - "install": [ - "editdistance", - "g2p_en", - "g2pM", - "h5py", - "inflect", - "jieba", - "jsonlines", - "kaldiio", - "librosa", - "loguru", - "matplotlib", - "nara_wpe", - "pandas", - "paddleaudio", - "paddlenlp", - "paddlespeech_feat", - "praatio==5.0.0", - "pypinyin", - "python-dateutil", - "pyworld", - "resampy==0.2.2", - "sacrebleu", - "scipy", - "sentencepiece~=0.1.96", - "soundfile~=0.10", - "textgrid", - "timer", - "tqdm", - "typeguard", - "visualdl", - "webrtcvad", - "yacs~=0.1.8", - # fastapi server - "fastapi", - "uvicorn", - ], + "install": + base + server, "develop": [ "ConfigArgParse", "coverage", From 920b2c808cd489aeae590c71dbfbcb1f2373665f Mon Sep 17 00:00:00 2001 From: lym0302 Date: Thu, 24 Feb 2022 19:58:16 +0800 Subject: [PATCH 4/8] paras required, test=doc --- paddlespeech/server/bin/paddlespeech_client.py | 12 +++++++----- paddlespeech/server/bin/paddlespeech_server.py | 3 ++- 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/paddlespeech/server/bin/paddlespeech_client.py b/paddlespeech/server/bin/paddlespeech_client.py index 3730d6070..889df8d52 100644 --- a/paddlespeech/server/bin/paddlespeech_client.py +++ b/paddlespeech/server/bin/paddlespeech_client.py @@ -48,8 +48,9 @@ class TTSClientExecutor(BaseExecutor): self.parser.add_argument( '--input', type=str, - default="你好,欢迎使用语音合成服务", - help='A sentence to be synthesized.') + default=None, + help='Text to be synthesized.', + required=True) self.parser.add_argument( '--spk_id', type=int, default=0, help='Speaker id') self.parser.add_argument( @@ -181,8 +182,9 @@ class ASRClientExecutor(BaseExecutor): self.parser.add_argument( '--input', type=str, - default="./paddlespeech/server/tests/16_audio.wav", - help='Audio file to be recognized') + default=None, + help='Audio file to be recognized', + required=True) self.parser.add_argument( '--sample_rate', type=int, default=16000, help='audio sample rate') self.parser.add_argument( @@ -241,4 +243,4 @@ class ASRClientExecutor(BaseExecutor): print(r.json()) print("time cost %f s." % (time_end - time_start)) except: - print("Failed to speech recognition.") \ No newline at end of file + print("Failed to speech recognition.") diff --git a/paddlespeech/server/bin/paddlespeech_server.py b/paddlespeech/server/bin/paddlespeech_server.py index ad62d3f6c..aff77d544 100644 --- a/paddlespeech/server/bin/paddlespeech_server.py +++ b/paddlespeech/server/bin/paddlespeech_server.py @@ -41,7 +41,8 @@ class ServerExecutor(BaseExecutor): "--config_file", action="store", help="yaml file of the app", - default="./conf/application.yaml") + default=None, + required=True) self.parser.add_argument( "--log_file", From 434708cff4edb8ca157c6fb7e39aa1926fb25223 Mon Sep 17 00:00:00 2001 From: lym0302 Date: Thu, 24 Feb 2022 20:14:07 +0800 Subject: [PATCH 5/8] set device cpu, test=doc --- demos/speech_server/conf/asr/asr.yaml | 2 +- demos/speech_server/conf/asr/asr_pd.yaml | 2 +- demos/speech_server/conf/tts/tts.yaml | 2 +- demos/speech_server/conf/tts/tts_pd.yaml | 4 ++-- paddlespeech/server/conf/asr/asr.yaml | 2 +- paddlespeech/server/conf/asr/asr_pd.yaml | 2 +- paddlespeech/server/conf/tts/tts.yaml | 2 +- paddlespeech/server/conf/tts/tts_pd.yaml | 4 ++-- paddlespeech/server/engine/asr/python/asr_engine.py | 5 ++++- paddlespeech/server/engine/tts/python/tts_engine.py | 5 ++++- 10 files changed, 18 insertions(+), 12 deletions(-) diff --git a/demos/speech_server/conf/asr/asr.yaml b/demos/speech_server/conf/asr/asr.yaml index b1ef558dc..1a805142a 100644 --- a/demos/speech_server/conf/asr/asr.yaml +++ b/demos/speech_server/conf/asr/asr.yaml @@ -5,4 +5,4 @@ cfg_path: # [optional] ckpt_path: # [optional] decode_method: 'attention_rescoring' force_yes: True -device: 'gpu:3' # set 'gpu:id' or 'cpu' +device: 'cpu' # set 'gpu:id' or 'cpu' diff --git a/demos/speech_server/conf/asr/asr_pd.yaml b/demos/speech_server/conf/asr/asr_pd.yaml index 21bf71772..6cddb4503 100644 --- a/demos/speech_server/conf/asr/asr_pd.yaml +++ b/demos/speech_server/conf/asr/asr_pd.yaml @@ -15,7 +15,7 @@ decode_method: force_yes: True am_predictor_conf: - device: 'gpu:3' # set 'gpu:id' or 'cpu' + device: 'cpu' # set 'gpu:id' or 'cpu' enable_mkldnn: True switch_ir_optim: True diff --git a/demos/speech_server/conf/tts/tts.yaml b/demos/speech_server/conf/tts/tts.yaml index 8d45aec52..19e8874e3 100644 --- a/demos/speech_server/conf/tts/tts.yaml +++ b/demos/speech_server/conf/tts/tts.yaml @@ -29,4 +29,4 @@ voc_stat: # OTHERS # ################################################################## lang: 'zh' -device: 'gpu:3' # set 'gpu:id' or 'cpu' +device: 'cpu' # set 'gpu:id' or 'cpu' diff --git a/demos/speech_server/conf/tts/tts_pd.yaml b/demos/speech_server/conf/tts/tts_pd.yaml index ecfa3a3ba..97df52613 100644 --- a/demos/speech_server/conf/tts/tts_pd.yaml +++ b/demos/speech_server/conf/tts/tts_pd.yaml @@ -15,7 +15,7 @@ speaker_dict: spk_id: 0 am_predictor_conf: - device: 'gpu:3' # set 'gpu:id' or 'cpu' + device: 'cpu' # set 'gpu:id' or 'cpu' enable_mkldnn: False switch_ir_optim: False @@ -30,7 +30,7 @@ voc_params: # the pdiparams file of your vocoder static model (XX.pdipparams) voc_sample_rate: 24000 voc_predictor_conf: - device: 'gpu:3' # set 'gpu:id' or 'cpu' + device: 'cpu' # set 'gpu:id' or 'cpu' enable_mkldnn: False switch_ir_optim: False diff --git a/paddlespeech/server/conf/asr/asr.yaml b/paddlespeech/server/conf/asr/asr.yaml index b1ef558dc..1a805142a 100644 --- a/paddlespeech/server/conf/asr/asr.yaml +++ b/paddlespeech/server/conf/asr/asr.yaml @@ -5,4 +5,4 @@ cfg_path: # [optional] ckpt_path: # [optional] decode_method: 'attention_rescoring' force_yes: True -device: 'gpu:3' # set 'gpu:id' or 'cpu' +device: 'cpu' # set 'gpu:id' or 'cpu' diff --git a/paddlespeech/server/conf/asr/asr_pd.yaml b/paddlespeech/server/conf/asr/asr_pd.yaml index 21bf71772..6cddb4503 100644 --- a/paddlespeech/server/conf/asr/asr_pd.yaml +++ b/paddlespeech/server/conf/asr/asr_pd.yaml @@ -15,7 +15,7 @@ decode_method: force_yes: True am_predictor_conf: - device: 'gpu:3' # set 'gpu:id' or 'cpu' + device: 'cpu' # set 'gpu:id' or 'cpu' enable_mkldnn: True switch_ir_optim: True diff --git a/paddlespeech/server/conf/tts/tts.yaml b/paddlespeech/server/conf/tts/tts.yaml index 8d45aec52..19e8874e3 100644 --- a/paddlespeech/server/conf/tts/tts.yaml +++ b/paddlespeech/server/conf/tts/tts.yaml @@ -29,4 +29,4 @@ voc_stat: # OTHERS # ################################################################## lang: 'zh' -device: 'gpu:3' # set 'gpu:id' or 'cpu' +device: 'cpu' # set 'gpu:id' or 'cpu' diff --git a/paddlespeech/server/conf/tts/tts_pd.yaml b/paddlespeech/server/conf/tts/tts_pd.yaml index cd4b8583c..019c7ed6a 100644 --- a/paddlespeech/server/conf/tts/tts_pd.yaml +++ b/paddlespeech/server/conf/tts/tts_pd.yaml @@ -15,7 +15,7 @@ speaker_dict: spk_id: 0 am_predictor_conf: - device: 'gpu:3' # set 'gpu:id' or 'cpu' + device: 'cpu' # set 'gpu:id' or 'cpu' enable_mkldnn: False switch_ir_optim: False @@ -30,7 +30,7 @@ voc_params: # the pdiparams file of your vocoder static model (XX.pdipparams) voc_sample_rate: 24000 #must match the model voc_predictor_conf: - device: 'gpu:3' # set 'gpu:id' or 'cpu' + device: 'cpu' # set 'gpu:id' or 'cpu' enable_mkldnn: False switch_ir_optim: False diff --git a/paddlespeech/server/engine/asr/python/asr_engine.py b/paddlespeech/server/engine/asr/python/asr_engine.py index 60040051c..9fac487d7 100644 --- a/paddlespeech/server/engine/asr/python/asr_engine.py +++ b/paddlespeech/server/engine/asr/python/asr_engine.py @@ -53,7 +53,10 @@ class ASREngine(BaseEngine): self.executor = ASRServerExecutor() self.config = get_config(config_file) - paddle.set_device(self.config.device) + if self.config.device is None: + paddle.set_device(paddle.get_device()) + else: + paddle.set_device(self.config.device) self.executor._init_from_path( self.config.model, self.config.lang, self.config.sample_rate, self.config.cfg_path, self.config.decode_method, diff --git a/paddlespeech/server/engine/tts/python/tts_engine.py b/paddlespeech/server/engine/tts/python/tts_engine.py index e11cfb1d1..508a1f35e 100644 --- a/paddlespeech/server/engine/tts/python/tts_engine.py +++ b/paddlespeech/server/engine/tts/python/tts_engine.py @@ -54,7 +54,10 @@ class TTSEngine(BaseEngine): try: self.config = get_config(config_file) - paddle.set_device(self.config.device) + if self.config.device is None: + paddle.set_device(paddle.get_device()) + else: + paddle.set_device(self.config.device) self.executor._init_from_path( am=self.config.am, From 162361d878030308d0cfea1f7e8b88067fcb794c Mon Sep 17 00:00:00 2001 From: lym0302 Date: Fri, 25 Feb 2022 14:15:27 +0800 Subject: [PATCH 6/8] format code, test=doc --- paddlespeech/server/bin/main.py | 2 +- .../server/bin/paddlespeech_client.py | 8 ++-- .../engine/asr/paddleinference/asr_engine.py | 38 ++++++++----------- paddlespeech/server/engine/base_engine.py | 2 - paddlespeech/server/engine/engine_factory.py | 1 - paddlespeech/server/engine/engine_pool.py | 6 ++- .../engine/tts/paddleinference/tts_engine.py | 16 +++++--- .../server/engine/tts/python/tts_engine.py | 19 +++++++--- paddlespeech/server/restful/asr_api.py | 3 +- paddlespeech/server/restful/request.py | 1 - paddlespeech/server/restful/response.py | 3 -- paddlespeech/server/restful/tts_api.py | 2 +- paddlespeech/server/tests/asr/http_client.py | 22 +++++------ paddlespeech/server/tests/tts/test_client.py | 3 +- paddlespeech/server/util.py | 2 +- 15 files changed, 65 insertions(+), 63 deletions(-) diff --git a/paddlespeech/server/bin/main.py b/paddlespeech/server/bin/main.py index dda0bbd7f..360d295ef 100644 --- a/paddlespeech/server/bin/main.py +++ b/paddlespeech/server/bin/main.py @@ -12,8 +12,8 @@ # See the License for the specific language governing permissions and # limitations under the License. import argparse + import uvicorn -import yaml from fastapi import FastAPI from paddlespeech.server.engine.engine_pool import init_engine_pool diff --git a/paddlespeech/server/bin/paddlespeech_client.py b/paddlespeech/server/bin/paddlespeech_client.py index 889df8d52..853d272fb 100644 --- a/paddlespeech/server/bin/paddlespeech_client.py +++ b/paddlespeech/server/bin/paddlespeech_client.py @@ -124,7 +124,7 @@ class TTSClientExecutor(BaseExecutor): logger.info("RTF: %f " % (time_consume / duration)) return True - except: + except BaseException: logger.error("Failed to synthesized audio.") return False @@ -164,7 +164,7 @@ class TTSClientExecutor(BaseExecutor): print("Audio duration: %f s." % (duration)) print("Response time: %f s." % (time_consume)) print("RTF: %f " % (time_consume / duration)) - except: + except BaseException: print("Failed to synthesized audio.") @@ -211,7 +211,7 @@ class ASRClientExecutor(BaseExecutor): logger.info(r.json()) logger.info("time cost %f s." % (time_end - time_start)) return True - except: + except BaseException: logger.error("Failed to speech recognition.") return False @@ -242,5 +242,5 @@ class ASRClientExecutor(BaseExecutor): time_end = time.time() print(r.json()) print("time cost %f s." % (time_end - time_start)) - except: + except BaseException: print("Failed to speech recognition.") diff --git a/paddlespeech/server/engine/asr/paddleinference/asr_engine.py b/paddlespeech/server/engine/asr/paddleinference/asr_engine.py index 6d0723229..5d4c4fa6a 100644 --- a/paddlespeech/server/engine/asr/paddleinference/asr_engine.py +++ b/paddlespeech/server/engine/asr/paddleinference/asr_engine.py @@ -13,31 +13,24 @@ # limitations under the License. import io import os -from typing import List from typing import Optional -from typing import Union -import librosa import paddle -import soundfile from yacs.config import CfgNode -from paddlespeech.cli.utils import MODEL_HOME -from paddlespeech.s2t.modules.ctc import CTCDecoder from paddlespeech.cli.asr.infer import ASRExecutor from paddlespeech.cli.log import logger +from paddlespeech.cli.utils import MODEL_HOME from paddlespeech.s2t.frontend.featurizer.text_featurizer import TextFeaturizer -from paddlespeech.s2t.transform.transformation import Transformation -from paddlespeech.s2t.utils.dynamic_import import dynamic_import +from paddlespeech.s2t.modules.ctc import CTCDecoder from paddlespeech.s2t.utils.utility import UpdateConfig +from paddlespeech.server.engine.base_engine import BaseEngine from paddlespeech.server.utils.config import get_config from paddlespeech.server.utils.paddle_predictor import init_predictor from paddlespeech.server.utils.paddle_predictor import run_model -from paddlespeech.server.engine.base_engine import BaseEngine __all__ = ['ASREngine'] - pretrained_models = { "deepspeech2offline_aishell-zh-16k": { 'url': @@ -143,7 +136,6 @@ class ASRServerExecutor(ASRExecutor): batch_average=True, # sum / batch_size grad_norm_type=self.config.get('ctc_grad_norm_type', None)) - @paddle.no_grad() def infer(self, model_type: str): """ @@ -161,9 +153,8 @@ class ASRServerExecutor(ASRExecutor): cfg.beam_size, cfg.cutoff_prob, cfg.cutoff_top_n, cfg.num_proc_bsearch) - output_data = run_model( - self.am_predictor, - [audio.numpy(), audio_len.numpy()]) + output_data = run_model(self.am_predictor, + [audio.numpy(), audio_len.numpy()]) probs = output_data[0] eouts_len = output_data[1] @@ -208,14 +199,14 @@ class ASREngine(BaseEngine): paddle.set_device(paddle.get_device()) self.executor._init_from_path( - model_type=self.config.model_type, - am_model=self.config.am_model, - am_params=self.config.am_params, - lang=self.config.lang, - sample_rate=self.config.sample_rate, - cfg_path=self.config.cfg_path, - decode_method=self.config.decode_method, - am_predictor_conf=self.config.am_predictor_conf) + model_type=self.config.model_type, + am_model=self.config.am_model, + am_params=self.config.am_params, + lang=self.config.lang, + sample_rate=self.config.sample_rate, + cfg_path=self.config.cfg_path, + decode_method=self.config.decode_method, + am_predictor_conf=self.config.am_predictor_conf) logger.info("Initialize ASR server engine successfully.") return True @@ -230,7 +221,8 @@ class ASREngine(BaseEngine): io.BytesIO(audio_data), self.config.sample_rate, self.config.force_yes): logger.info("start running asr engine") - self.executor.preprocess(self.config.model_type, io.BytesIO(audio_data)) + self.executor.preprocess(self.config.model_type, + io.BytesIO(audio_data)) self.executor.infer(self.config.model_type) self.output = self.executor.postprocess() # Retrieve result of asr. logger.info("end inferring asr engine") diff --git a/paddlespeech/server/engine/base_engine.py b/paddlespeech/server/engine/base_engine.py index 0cc202094..0f020d1c7 100644 --- a/paddlespeech/server/engine/base_engine.py +++ b/paddlespeech/server/engine/base_engine.py @@ -12,8 +12,6 @@ # See the License for the specific language governing permissions and # limitations under the License. import os -from typing import Any -from typing import List from typing import Union from pattern_singleton import Singleton diff --git a/paddlespeech/server/engine/engine_factory.py b/paddlespeech/server/engine/engine_factory.py index 05f135681..546541edf 100644 --- a/paddlespeech/server/engine/engine_factory.py +++ b/paddlespeech/server/engine/engine_factory.py @@ -13,7 +13,6 @@ # limitations under the License. from typing import Text - __all__ = ['EngineFactory'] diff --git a/paddlespeech/server/engine/engine_pool.py b/paddlespeech/server/engine/engine_pool.py index 0198bd80a..f6a4d2aab 100644 --- a/paddlespeech/server/engine/engine_pool.py +++ b/paddlespeech/server/engine/engine_pool.py @@ -29,8 +29,10 @@ def init_engine_pool(config) -> bool: """ global ENGINE_POOL for engine in config.engine_backend: - ENGINE_POOL[engine] = EngineFactory.get_engine(engine_name=engine, engine_type=config.engine_type[engine]) - if not ENGINE_POOL[engine].init(config_file=config.engine_backend[engine]): + ENGINE_POOL[engine] = EngineFactory.get_engine( + engine_name=engine, engine_type=config.engine_type[engine]) + if not ENGINE_POOL[engine].init( + config_file=config.engine_backend[engine]): return False return True diff --git a/paddlespeech/server/engine/tts/paddleinference/tts_engine.py b/paddlespeech/server/engine/tts/paddleinference/tts_engine.py index ecd2b0b64..a9dc5f4ea 100644 --- a/paddlespeech/server/engine/tts/paddleinference/tts_engine.py +++ b/paddlespeech/server/engine/tts/paddleinference/tts_engine.py @@ -360,8 +360,8 @@ class TTSEngine(BaseEngine): am_predictor_conf=self.config.am_predictor_conf, voc_predictor_conf=self.config.voc_predictor_conf, ) - except: - logger.info("Initialize TTS server engine Failed.") + except BaseException: + logger.error("Initialize TTS server engine Failed.") return False logger.info("Initialize TTS server engine successfully.") @@ -405,11 +405,13 @@ class TTSEngine(BaseEngine): # transform speed try: # windows not support soxbindings wav_speed = change_speed(wav_vol, speed, target_fs) - except: + except ServerBaseException: raise ServerBaseException( ErrorCode.SERVER_INTERNAL_ERR, "Transform speed failed. Can not install soxbindings on your system. \ You need to set speed value 1.0.") + except BaseException: + logger.error("Transform speed failed.") # wav to base64 buf = io.BytesIO() @@ -462,9 +464,11 @@ class TTSEngine(BaseEngine): try: self.executor.infer( text=sentence, lang=lang, am=self.config.am, spk_id=spk_id) - except: + except ServerBaseException: raise ServerBaseException(ErrorCode.SERVER_INTERNAL_ERR, "tts infer failed.") + except BaseException: + logger.error("tts infer failed.") try: target_sample_rate, wav_base64 = self.postprocess( @@ -474,8 +478,10 @@ class TTSEngine(BaseEngine): volume=volume, speed=speed, audio_path=save_path) - except: + except ServerBaseException: raise ServerBaseException(ErrorCode.SERVER_INTERNAL_ERR, "tts postprocess failed.") + except BaseException: + logger.error("tts postprocess failed.") return lang, target_sample_rate, wav_base64 diff --git a/paddlespeech/server/engine/tts/python/tts_engine.py b/paddlespeech/server/engine/tts/python/tts_engine.py index 508a1f35e..20b4e0fe9 100644 --- a/paddlespeech/server/engine/tts/python/tts_engine.py +++ b/paddlespeech/server/engine/tts/python/tts_engine.py @@ -72,8 +72,8 @@ class TTSEngine(BaseEngine): voc_ckpt=self.config.voc_ckpt, voc_stat=self.config.voc_stat, lang=self.config.lang) - except: - logger.info("Initialize TTS server engine Failed.") + except BaseException: + logger.error("Initialize TTS server engine Failed.") return False logger.info("Initialize TTS server engine successfully.") @@ -117,10 +117,13 @@ class TTSEngine(BaseEngine): # transform speed try: # windows not support soxbindings wav_speed = change_speed(wav_vol, speed, target_fs) - except: + except ServerBaseException: raise ServerBaseException( ErrorCode.SERVER_INTERNAL_ERR, - "Can not install soxbindings on your system.") + "Transform speed failed. Can not install soxbindings on your system. \ + You need to set speed value 1.0.") + except BaseException: + logger.error("Transform speed failed.") # wav to base64 buf = io.BytesIO() @@ -173,9 +176,11 @@ class TTSEngine(BaseEngine): try: self.executor.infer( text=sentence, lang=lang, am=self.config.am, spk_id=spk_id) - except: + except ServerBaseException: raise ServerBaseException(ErrorCode.SERVER_INTERNAL_ERR, "tts infer failed.") + except BaseException: + logger.error("tts infer failed.") try: target_sample_rate, wav_base64 = self.postprocess( @@ -185,8 +190,10 @@ class TTSEngine(BaseEngine): volume=volume, speed=speed, audio_path=save_path) - except: + except ServerBaseException: raise ServerBaseException(ErrorCode.SERVER_INTERNAL_ERR, "tts postprocess failed.") + except BaseException: + logger.error("tts postprocess failed.") return lang, target_sample_rate, wav_base64 diff --git a/paddlespeech/server/restful/asr_api.py b/paddlespeech/server/restful/asr_api.py index 4806c0421..cf46735dc 100644 --- a/paddlespeech/server/restful/asr_api.py +++ b/paddlespeech/server/restful/asr_api.py @@ -14,6 +14,7 @@ import base64 import traceback from typing import Union + from fastapi import APIRouter from paddlespeech.server.engine.engine_pool import get_engine_pool @@ -83,7 +84,7 @@ def asr(request_body: ASRRequest): except ServerBaseException as e: response = failed_response(e.error_code, e.msg) - except: + except BaseException: response = failed_response(ErrorCode.SERVER_UNKOWN_ERR) traceback.print_exc() diff --git a/paddlespeech/server/restful/request.py b/paddlespeech/server/restful/request.py index 2be5f0e54..289088019 100644 --- a/paddlespeech/server/restful/request.py +++ b/paddlespeech/server/restful/request.py @@ -11,7 +11,6 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from typing import List from typing import Optional from pydantic import BaseModel diff --git a/paddlespeech/server/restful/response.py b/paddlespeech/server/restful/response.py index ab5e395ba..4e18ee0d7 100644 --- a/paddlespeech/server/restful/response.py +++ b/paddlespeech/server/restful/response.py @@ -11,9 +11,6 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from typing import List -from typing import Optional - from pydantic import BaseModel __all__ = ['ASRResponse', 'TTSResponse'] diff --git a/paddlespeech/server/restful/tts_api.py b/paddlespeech/server/restful/tts_api.py index 111051473..c7e91300d 100644 --- a/paddlespeech/server/restful/tts_api.py +++ b/paddlespeech/server/restful/tts_api.py @@ -114,7 +114,7 @@ def tts(request_body: TTSRequest): } except ServerBaseException as e: response = failed_response(e.error_code, e.msg) - except: + except BaseException: response = failed_response(ErrorCode.SERVER_UNKOWN_ERR) traceback.print_exc() diff --git a/paddlespeech/server/tests/asr/http_client.py b/paddlespeech/server/tests/asr/http_client.py index 14adb5741..49f2adf7c 100644 --- a/paddlespeech/server/tests/asr/http_client.py +++ b/paddlespeech/server/tests/asr/http_client.py @@ -10,11 +10,11 @@ # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the -import requests +import base64 import json import time -import base64 -import io + +import requests def readwav2base64(wav_file): @@ -34,23 +34,23 @@ def main(): url = "http://127.0.0.1:8090/paddlespeech/asr" # start Timestamp - time_start=time.time() + time_start = time.time() test_audio_dir = "./16_audio.wav" audio = readwav2base64(test_audio_dir) data = { - "audio": audio, - "audio_format": "wav", - "sample_rate": 16000, - "lang": "zh_cn", - } + "audio": audio, + "audio_format": "wav", + "sample_rate": 16000, + "lang": "zh_cn", + } r = requests.post(url=url, data=json.dumps(data)) # ending Timestamp - time_end=time.time() - print('time cost',time_end - time_start, 's') + time_end = time.time() + print('time cost', time_end - time_start, 's') print(r.json()) diff --git a/paddlespeech/server/tests/tts/test_client.py b/paddlespeech/server/tests/tts/test_client.py index 65f4ccfec..e42c9bcfa 100644 --- a/paddlespeech/server/tests/tts/test_client.py +++ b/paddlespeech/server/tests/tts/test_client.py @@ -25,6 +25,7 @@ import soundfile from paddlespeech.server.utils.audio_process import wav2pcm + # Request and response def tts_client(args): """ Request and response @@ -99,5 +100,5 @@ if __name__ == "__main__": print("Inference time: %f" % (time_consume)) print("The duration of synthesized audio: %f" % (duration)) print("The RTF is: %f" % (rtf)) - except: + except BaseException: print("Failed to synthesized audio.") diff --git a/paddlespeech/server/util.py b/paddlespeech/server/util.py index 48c4b8cbd..1f1b0be1b 100644 --- a/paddlespeech/server/util.py +++ b/paddlespeech/server/util.py @@ -219,7 +219,7 @@ class ConfigCache: try: cfg = yaml.load(file, Loader=yaml.FullLoader) self._data.update(cfg) - except: + except BaseException: self.flush() @property From 2ecab4e08f8fe36c3518be59cd6c9b06d99f0d4b Mon Sep 17 00:00:00 2001 From: TianYuan Date: Fri, 25 Feb 2022 14:45:50 +0800 Subject: [PATCH 7/8] Update setup.py --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 31dfa0bcf..86ac964d9 100644 --- a/setup.py +++ b/setup.py @@ -38,7 +38,7 @@ base = [ "jieba", "jsonlines", "kaldiio", - "librosa", + "librosa==0.8.1", "loguru", "matplotlib", "nara_wpe", From e8fea28384595e016ed7e081a91292d5fe57d3d5 Mon Sep 17 00:00:00 2001 From: huangyuxin Date: Fri, 25 Feb 2022 06:58:02 +0000 Subject: [PATCH 8/8] fix setup --- requirements.txt | 48 ------------------------------------------------ setup.py | 2 +- 2 files changed, 1 insertion(+), 49 deletions(-) delete mode 100644 requirements.txt diff --git a/requirements.txt b/requirements.txt deleted file mode 100644 index 760821662..000000000 --- a/requirements.txt +++ /dev/null @@ -1,48 +0,0 @@ -ConfigArgParse -coverage -editdistance -g2p_en -g2pM -gpustat -h5py -inflect -jieba -jsonlines -kaldiio -librosa -loguru -matplotlib -nara_wpe -nltk -paddleaudio -paddlenlp -paddlespeech_ctcdecoders -paddlespeech_feat -pandas -phkit -Pillow -praatio==5.0.0 -pre-commit -pybind11 -pypi-kenlm -pypinyin -python-dateutil -pyworld -resampy==0.2.2 -sacrebleu -scipy -sentencepiece~=0.1.96 -snakeviz -soundfile~=0.10 -sox -soxbindings -textgrid -timer -tqdm -typeguard -unidecode -visualdl -webrtcvad -yacs~=0.1.8 -yq -zhon diff --git a/setup.py b/setup.py index 86ac964d9..3f3632b37 100644 --- a/setup.py +++ b/setup.py @@ -27,7 +27,7 @@ from setuptools.command.install import install HERE = Path(os.path.abspath(os.path.dirname(__file__))) -VERSION = '0.1.1' +VERSION = '0.1.2' base = [ "editdistance",