From 0a5624fe614c8316e85572c6f180c1214ef7fd10 Mon Sep 17 00:00:00 2001 From: Hui Zhang Date: Thu, 24 Feb 2022 10:58:31 +0000 Subject: [PATCH] update ctc loss compare --- docs/topic/ctc/ctc_loss_compare.ipynb | 150 +++++++++++++------------- 1 file changed, 73 insertions(+), 77 deletions(-) diff --git a/docs/topic/ctc/ctc_loss_compare.ipynb b/docs/topic/ctc/ctc_loss_compare.ipynb index 95b2af508..c313710c2 100644 --- a/docs/topic/ctc/ctc_loss_compare.ipynb +++ b/docs/topic/ctc/ctc_loss_compare.ipynb @@ -30,12 +30,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "Cloning into 'warp-ctc'...\n", - "remote: Enumerating objects: 829, done.\u001b[K\n", - "remote: Total 829 (delta 0), reused 0 (delta 0), pack-reused 829\u001b[K\n", - "Receiving objects: 100% (829/829), 388.85 KiB | 140.00 KiB/s, done.\n", - "Resolving deltas: 100% (419/419), done.\n", - "Checking connectivity... done.\n" + "fatal: destination path 'warp-ctc' already exists and is not an empty directory.\r\n" ] } ], @@ -99,30 +94,6 @@ "name": "stdout", "output_type": "stream", "text": [ - "-- The C compiler identification is GNU 5.4.0\n", - "-- The CXX compiler identification is GNU 5.4.0\n", - "-- Check for working C compiler: /usr/bin/cc\n", - "-- Check for working C compiler: /usr/bin/cc -- works\n", - "-- Detecting C compiler ABI info\n", - "-- Detecting C compiler ABI info - done\n", - "-- Detecting C compile features\n", - "-- Detecting C compile features - done\n", - "-- Check for working CXX compiler: /usr/bin/c++\n", - "-- Check for working CXX compiler: /usr/bin/c++ -- works\n", - "-- Detecting CXX compiler ABI info\n", - "-- Detecting CXX compiler ABI info - done\n", - "-- Detecting CXX compile features\n", - "-- Detecting CXX compile features - done\n", - "-- Looking for pthread.h\n", - "-- Looking for pthread.h - found\n", - "-- Performing Test CMAKE_HAVE_LIBC_PTHREAD\n", - "-- Performing Test CMAKE_HAVE_LIBC_PTHREAD - Failed\n", - "-- Looking for pthread_create in pthreads\n", - "-- Looking for pthread_create in pthreads - not found\n", - "-- Looking for pthread_create in pthread\n", - "-- Looking for pthread_create in pthread - found\n", - "-- Found Threads: TRUE \n", - "-- Found CUDA: /usr/local/cuda (found suitable version \"10.2\", minimum required is \"6.5\") \n", "-- cuda found TRUE\n", "-- Building shared library with GPU support\n", "-- Configuring done\n", @@ -145,20 +116,11 @@ "name": "stdout", "output_type": "stream", "text": [ - "[ 11%] \u001b[34m\u001b[1mBuilding NVCC (Device) object CMakeFiles/warpctc.dir/src/warpctc_generated_reduce.cu.o\u001b[0m\n", - "[ 22%] \u001b[34m\u001b[1mBuilding NVCC (Device) object CMakeFiles/warpctc.dir/src/warpctc_generated_ctc_entrypoint.cu.o\u001b[0m\n", - "\u001b[35m\u001b[1mScanning dependencies of target warpctc\u001b[0m\n", - "[ 33%] \u001b[32m\u001b[1mLinking CXX shared library libwarpctc.so\u001b[0m\n", + "[ 11%] \u001b[32m\u001b[1mLinking CXX shared library libwarpctc.so\u001b[0m\n", "[ 33%] Built target warpctc\n", - "[ 44%] \u001b[34m\u001b[1mBuilding NVCC (Device) object CMakeFiles/test_gpu.dir/tests/test_gpu_generated_test_gpu.cu.o\u001b[0m\n", - "\u001b[35m\u001b[1mScanning dependencies of target test_cpu\u001b[0m\n", - "[ 55%] \u001b[32mBuilding CXX object CMakeFiles/test_cpu.dir/tests/test_cpu.cpp.o\u001b[0m\n", - "[ 66%] \u001b[32mBuilding CXX object CMakeFiles/test_cpu.dir/tests/random.cpp.o\u001b[0m\n", - "[ 77%] \u001b[32m\u001b[1mLinking CXX executable test_cpu\u001b[0m\n", + "[ 44%] \u001b[32m\u001b[1mLinking CXX executable test_cpu\u001b[0m\n", + "[ 55%] \u001b[32m\u001b[1mLinking CXX executable test_gpu\u001b[0m\n", "[ 77%] Built target test_cpu\n", - "\u001b[35m\u001b[1mScanning dependencies of target test_gpu\u001b[0m\n", - "[ 88%] \u001b[32mBuilding CXX object CMakeFiles/test_gpu.dir/tests/random.cpp.o\u001b[0m\n", - "[100%] \u001b[32m\u001b[1mLinking CXX executable test_gpu\u001b[0m\n", "[100%] Built target test_gpu\n" ] } @@ -169,7 +131,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 9, "id": "31761a31", "metadata": {}, "outputs": [ @@ -187,7 +149,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 10, "id": "f53316f6", "metadata": {}, "outputs": [ @@ -205,7 +167,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 11, "id": "084f1e49", "metadata": {}, "outputs": [ @@ -216,29 +178,20 @@ "running install\n", "running bdist_egg\n", "running egg_info\n", - "creating warpctc_pytorch.egg-info\n", "writing warpctc_pytorch.egg-info/PKG-INFO\n", "writing dependency_links to warpctc_pytorch.egg-info/dependency_links.txt\n", "writing top-level names to warpctc_pytorch.egg-info/top_level.txt\n", "writing manifest file 'warpctc_pytorch.egg-info/SOURCES.txt'\n", - "writing manifest file 'warpctc_pytorch.egg-info/SOURCES.txt'\n", "installing library code to build/bdist.linux-x86_64/egg\n", "running install_lib\n", "running build_py\n", - "creating build\n", - "creating build/lib.linux-x86_64-3.9\n", - "creating build/lib.linux-x86_64-3.9/warpctc_pytorch\n", - "copying warpctc_pytorch/__init__.py -> build/lib.linux-x86_64-3.9/warpctc_pytorch\n", "running build_ext\n", "building 'warpctc_pytorch._warp_ctc' extension\n", - "creating /workspace/zhanghui/DeepSpeech-2.x/docs/topic/ctc/warp-ctc/pytorch_binding/build/temp.linux-x86_64-3.9\n", - "creating /workspace/zhanghui/DeepSpeech-2.x/docs/topic/ctc/warp-ctc/pytorch_binding/build/temp.linux-x86_64-3.9/src\n", "Emitting ninja build file /workspace/zhanghui/DeepSpeech-2.x/docs/topic/ctc/warp-ctc/pytorch_binding/build/temp.linux-x86_64-3.9/build.ninja...\n", "Compiling objects...\n", "Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N)\n", - "[1/1] c++ -MMD -MF /workspace/zhanghui/DeepSpeech-2.x/docs/topic/ctc/warp-ctc/pytorch_binding/build/temp.linux-x86_64-3.9/src/binding.o.d -pthread -B /workspace/zhanghui/DeepSpeech-2.x/tools/venv/compiler_compat -Wl,--sysroot=/ -Wno-unused-result -Wsign-compare -DNDEBUG -O2 -Wall -fPIC -O2 -isystem /workspace/zhanghui/DeepSpeech-2.x/tools/venv/include -fPIC -O2 -isystem /workspace/zhanghui/DeepSpeech-2.x/tools/venv/include -fPIC -I/workspace/zhanghui/DeepSpeech-2.x/docs/topic/ctc/warp-ctc/include -I/workspace/zhanghui/DeepSpeech-2.x/tools/venv/lib/python3.9/site-packages/torch/include -I/workspace/zhanghui/DeepSpeech-2.x/tools/venv/lib/python3.9/site-packages/torch/include/torch/csrc/api/include -I/workspace/zhanghui/DeepSpeech-2.x/tools/venv/lib/python3.9/site-packages/torch/include/TH -I/workspace/zhanghui/DeepSpeech-2.x/tools/venv/lib/python3.9/site-packages/torch/include/THC -I/usr/local/cuda/include -I/workspace/zhanghui/DeepSpeech-2.x/tools/venv/include/python3.9 -c -c /workspace/zhanghui/DeepSpeech-2.x/docs/topic/ctc/warp-ctc/pytorch_binding/src/binding.cpp -o /workspace/zhanghui/DeepSpeech-2.x/docs/topic/ctc/warp-ctc/pytorch_binding/build/temp.linux-x86_64-3.9/src/binding.o -std=c++14 -fPIC -DWARPCTC_ENABLE_GPU -DTORCH_API_INCLUDE_EXTENSION_H '-DPYBIND11_COMPILER_TYPE=\"_gcc\"' '-DPYBIND11_STDLIB=\"_libstdcpp\"' '-DPYBIND11_BUILD_ABI=\"_cxxabi1011\"' -DTORCH_EXTENSION_NAME=_warp_ctc -D_GLIBCXX_USE_CXX11_ABI=0\n", + "ninja: no work to do.\n", "g++ -pthread -B /workspace/zhanghui/DeepSpeech-2.x/tools/venv/compiler_compat -Wl,--sysroot=/ -shared -Wl,-rpath,/workspace/zhanghui/DeepSpeech-2.x/tools/venv/lib -Wl,-rpath-link,/workspace/zhanghui/DeepSpeech-2.x/tools/venv/lib -L/workspace/zhanghui/DeepSpeech-2.x/tools/venv/lib -Wl,-rpath,/workspace/zhanghui/DeepSpeech-2.x/tools/venv/lib -Wl,-rpath-link,/workspace/zhanghui/DeepSpeech-2.x/tools/venv/lib -L/workspace/zhanghui/DeepSpeech-2.x/tools/venv/lib /workspace/zhanghui/DeepSpeech-2.x/docs/topic/ctc/warp-ctc/pytorch_binding/build/temp.linux-x86_64-3.9/src/binding.o -L/workspace/zhanghui/DeepSpeech-2.x/docs/topic/ctc/warp-ctc/build -L/workspace/zhanghui/DeepSpeech-2.x/tools/venv/lib/python3.9/site-packages/torch/lib -L/usr/local/cuda/lib64 -lwarpctc -lc10 -ltorch -ltorch_cpu -ltorch_python -lcudart -lc10_cuda -ltorch_cuda -o build/lib.linux-x86_64-3.9/warpctc_pytorch/_warp_ctc.cpython-39-x86_64-linux-gnu.so -Wl,-rpath,/workspace/zhanghui/DeepSpeech-2.x/docs/topic/ctc/warp-ctc/build\n", - "creating build/bdist.linux-x86_64\n", "creating build/bdist.linux-x86_64/egg\n", "creating build/bdist.linux-x86_64/egg/warpctc_pytorch\n", "copying build/lib.linux-x86_64-3.9/warpctc_pytorch/__init__.py -> build/bdist.linux-x86_64/egg/warpctc_pytorch\n", @@ -254,7 +207,6 @@ "writing build/bdist.linux-x86_64/egg/EGG-INFO/native_libs.txt\n", "zip_safe flag not set; analyzing archive contents...\n", "warpctc_pytorch.__pycache__._warp_ctc.cpython-39: module references __file__\n", - "creating dist\n", "creating 'dist/warpctc_pytorch-0.1-py3.9-linux-x86_64.egg' and adding 'build/bdist.linux-x86_64/egg' to it\n", "removing 'build/bdist.linux-x86_64/egg' (and everything under it)\n", "Processing warpctc_pytorch-0.1-py3.9-linux-x86_64.egg\n", @@ -275,7 +227,7 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 12, "id": "ee4ca9e3", "metadata": {}, "outputs": [ @@ -293,7 +245,7 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 13, "id": "59255ed8", "metadata": {}, "outputs": [ @@ -311,21 +263,14 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 22, "id": "1dae09b9", "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "grep: warning: GREP_OPTIONS is deprecated; please use an alias or script\n" - ] - } - ], + "outputs": [], "source": [ "import torch\n", "import torch.nn as nn\n", + "import torch.nn.functional as F\n", "import warpctc_pytorch as wp\n", "import paddle.nn as pn\n", "import paddle" @@ -333,7 +278,7 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 15, "id": "83d0762e", "metadata": {}, "outputs": [ @@ -343,7 +288,7 @@ "'1.10.0+cu102'" ] }, - "execution_count": 16, + "execution_count": 15, "metadata": {}, "output_type": "execute_result" } @@ -354,17 +299,17 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 16, "id": "62501e2c", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "'2.2.0'" + "'2.2.1'" ] }, - "execution_count": 17, + "execution_count": 16, "metadata": {}, "output_type": "execute_result" } @@ -375,7 +320,7 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 17, "id": "9e8e0f40", "metadata": {}, "outputs": [ @@ -392,6 +337,7 @@ } ], "source": [ + "# warpctc_pytorch CTCLoss\n", "probs = torch.FloatTensor([[\n", " [0.1, 0.6, 0.1, 0.1, 0.1], [0.1, 0.1, 0.6, 0.1, 0.1]\n", " ]]).transpose(0, 1).contiguous()\n", @@ -412,7 +358,7 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 18, "id": "2cd46569", "metadata": {}, "outputs": [ @@ -428,6 +374,7 @@ } ], "source": [ + "# pytorch CTCLoss\n", "probs = torch.FloatTensor([[\n", " [0.1, 0.6, 0.1, 0.1, 0.1], [0.1, 0.1, 0.6, 0.1, 0.1]\n", " ]]).transpose(0, 1).contiguous()\n", @@ -449,7 +396,7 @@ }, { "cell_type": "code", - "execution_count": 22, + "execution_count": 27, "id": "85c3461a", "metadata": {}, "outputs": [ @@ -467,6 +414,7 @@ } ], "source": [ + "# Paddle CTCLoss\n", "paddle.set_device('cpu')\n", "probs = paddle.to_tensor([[\n", " [0.1, 0.6, 0.1, 0.1, 0.1], [0.1, 0.1, 0.6, 0.1, 0.1],\n", @@ -490,7 +438,55 @@ { "cell_type": "code", "execution_count": null, - "id": "d390cd91", + "id": "8cdf76c2", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 26, + "id": "2c305eaf", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "torch.Size([2, 1, 5])\n", + "2.4628584384918213\n", + "[[[ 0.17703117 -0.7081247 0.17703117 0.17703117 0.17703117]]\n", + "\n", + " [[ 0.17703117 0.17703117 -0.7081247 0.17703117 0.17703117]]]\n" + ] + } + ], + "source": [ + "# warpctc_pytorch CTCLoss, log_softmax idempotent\n", + "probs = torch.FloatTensor([[\n", + " [0.1, 0.6, 0.1, 0.1, 0.1], [0.1, 0.1, 0.6, 0.1, 0.1]\n", + " ]]).transpose(0, 1).contiguous()\n", + "print(probs.size())\n", + "labels = torch.IntTensor([1, 2])\n", + "label_sizes = torch.IntTensor([2])\n", + "probs_sizes = torch.IntTensor([2])\n", + "probs.requires_grad_(True)\n", + "bs = probs.size(1)\n", + "\n", + "ctc_loss = wp.CTCLoss(size_average=False, length_average=False)\n", + "\n", + "log_probs = torch.log_softmax(probs, axis=-1)\n", + "cost = ctc_loss(log_probs, labels, probs_sizes, label_sizes)\n", + "cost = cost.sum() / bs\n", + "print(cost.item())\n", + "cost.backward()\n", + "print(probs.grad.numpy())" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "443336f0", "metadata": {}, "outputs": [], "source": []