diff --git a/docs/topic/ctc/ctc_loss_speed_compare.ipynb b/docs/topic/ctc/ctc_loss_speed_compare.ipynb new file mode 100644 index 00000000..eb7a030c --- /dev/null +++ b/docs/topic/ctc/ctc_loss_speed_compare.ipynb @@ -0,0 +1,369 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "a1e738e0", + "metadata": {}, + "source": [ + "## 获取测试的 logit 数据" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "29d3368b", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "hlens.npy\n", + "logits.npy\n", + "ys_lens.npy\n", + "ys_pad.npy\n" + ] + } + ], + "source": [ + "!mkdir -p ./test_data\n", + "!test -f ./test_data/ctc_loss_compare_data.tgz || wget -P ./test_data https://paddlespeech.bj.bcebos.com/datasets/unit_test/asr/ctc_loss_compare_data.tgz\n", + "!tar xzvf test_data/ctc_loss_compare_data.tgz -C ./test_data\n" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "240caf1d", + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "import numpy as np\n", + "import time\n", + "\n", + "data_dir=\"./test_data\"\n" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "91bad949", + "metadata": {}, + "outputs": [], + "source": [ + "logits_np = np.load(os.path.join(data_dir, \"logits.npy\"))\n", + "ys_pad_np = np.load(os.path.join(data_dir, \"ys_pad.npy\"))\n", + "hlens_np = np.load(os.path.join(data_dir, \"hlens.npy\"))\n", + "ys_lens_np = np.load(os.path.join(data_dir, \"ys_lens.npy\"))" + ] + }, + { + "cell_type": "markdown", + "id": "4cef2f15", + "metadata": {}, + "source": [ + "## 使用 torch 的 ctc loss" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "90612004", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'1.10.1+cu102'" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import torch\n", + "torch.__version__" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "00799f97", + "metadata": {}, + "outputs": [], + "source": [ + "def torch_ctc_loss(use_cpu):\n", + " if use_cpu:\n", + " device = torch.device(\"cpu\")\n", + " else:\n", + " device = torch.device(\"cuda\")\n", + "\n", + " reduction_type = \"sum\" \n", + "\n", + " ctc_loss = torch.nn.CTCLoss(reduction=reduction_type)\n", + "\n", + " ys_hat = torch.tensor(logits_np, device = device)\n", + " ys_pad = torch.tensor(ys_pad_np, device = device)\n", + " hlens = torch.tensor(hlens_np, device = device)\n", + " ys_lens = torch.tensor(ys_lens_np, device = device)\n", + "\n", + " ys_hat = ys_hat.transpose(0, 1)\n", + " \n", + " # 开始计算时间\n", + " start_time = time.time()\n", + " ys_hat = ys_hat.log_softmax(2)\n", + " loss = ctc_loss(ys_hat, ys_pad, hlens, ys_lens)\n", + " end_time = time.time()\n", + " \n", + " loss = loss / ys_hat.size(1)\n", + " return end_time - start_time, loss.item()" + ] + }, + { + "cell_type": "markdown", + "id": "ba47b5a4", + "metadata": {}, + "source": [ + "## 使用 paddle 的 ctc loss" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "6882a06e", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'2.2.2'" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import paddle\n", + "paddle.__version__" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "3cfa3b7c", + "metadata": {}, + "outputs": [], + "source": [ + "def paddle_ctc_loss(use_cpu): \n", + " import paddle.nn as pn\n", + " if use_cpu:\n", + " device = \"cpu\"\n", + " else:\n", + " device = \"gpu\"\n", + "\n", + " paddle.set_device(device)\n", + "\n", + " logits = paddle.to_tensor(logits_np)\n", + " ys_pad = paddle.to_tensor(ys_pad_np,dtype='int32')\n", + " hlens = paddle.to_tensor(hlens_np, dtype='int64')\n", + " ys_lens = paddle.to_tensor(ys_lens_np, dtype='int64')\n", + "\n", + " logits = logits.transpose([1,0,2])\n", + "\n", + " ctc_loss = pn.CTCLoss(reduction='sum')\n", + " # 开始计算时间\n", + " start_time = time.time()\n", + " pn_loss = ctc_loss(logits, ys_pad, hlens, ys_lens)\n", + " end_time = time.time()\n", + " \n", + " pn_loss = pn_loss / logits.shape[1]\n", + " return end_time - start_time, pn_loss.item()" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "40413ef9", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "CPU, iteration 10\n", + "torch_ctc_loss 159.17137145996094\n", + "paddle_ctc_loss 159.16574096679688\n", + "paddle average time 1.718252992630005\n", + "torch average time 0.17536230087280275\n", + "paddle time / torch time (cpu) 9.798303193320452\n", + "\n", + "GPU, iteration 10\n", + "torch_ctc_loss 159.172119140625\n", + "paddle_ctc_loss 159.17205810546875\n", + "paddle average time 0.018606925010681154\n", + "torch average time 0.0026710033416748047\n", + "paddle time / torch time (gpu) 6.966267963938231\n" + ] + } + ], + "source": [ + "# 使用 CPU\n", + "\n", + "iteration = 10\n", + "use_cpu = True\n", + "torch_total_time = 0\n", + "paddle_total_time = 0\n", + "for _ in range(iteration):\n", + " cost_time, torch_loss = torch_ctc_loss(use_cpu)\n", + " torch_total_time += cost_time\n", + "for _ in range(iteration):\n", + " cost_time, paddle_loss = paddle_ctc_loss(use_cpu)\n", + " paddle_total_time += cost_time\n", + "print (\"CPU, iteration\", iteration)\n", + "print (\"torch_ctc_loss\", torch_loss)\n", + "print (\"paddle_ctc_loss\", paddle_loss)\n", + "print (\"paddle average time\", paddle_total_time / iteration)\n", + "print (\"torch average time\", torch_total_time / iteration)\n", + "print (\"paddle time / torch time (cpu)\" , paddle_total_time/ torch_total_time)\n", + "\n", + "print (\"\")\n", + "\n", + "# 使用 GPU\n", + "\n", + "use_cpu = False\n", + "torch_total_time = 0\n", + "paddle_total_time = 0\n", + "for _ in range(iteration):\n", + " cost_time, torch_loss = torch_ctc_loss(use_cpu)\n", + " torch_total_time += cost_time\n", + "for _ in range(iteration):\n", + " cost_time, paddle_loss = paddle_ctc_loss(use_cpu)\n", + " paddle_total_time += cost_time\n", + "print (\"GPU, iteration\", iteration)\n", + "print (\"torch_ctc_loss\", torch_loss)\n", + "print (\"paddle_ctc_loss\", paddle_loss)\n", + "print (\"paddle average time\", paddle_total_time / iteration)\n", + "print (\"torch average time\", torch_total_time / iteration)\n", + "print (\"paddle time / torch time (gpu)\" , paddle_total_time/ torch_total_time)" + ] + }, + { + "cell_type": "markdown", + "id": "7cdf8697", + "metadata": {}, + "source": [ + "## 其他: 使用 PaddleSpeech 中的 ctcloss 查一下loss值" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "73fad81d", + "metadata": {}, + "outputs": [], + "source": [ + "logits_np = np.load(os.path.join(data_dir, \"logits.npy\"))\n", + "ys_pad_np = np.load(os.path.join(data_dir, \"ys_pad.npy\"))\n", + "hlens_np = np.load(os.path.join(data_dir, \"hlens.npy\"))\n", + "ys_lens_np = np.load(os.path.join(data_dir, \"ys_lens.npy\"))" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "2b41e45d", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "2022-02-25 11:34:34.143 | INFO | paddlespeech.s2t.modules.loss:__init__:41 - CTCLoss Loss reduction: sum, div-bs: True\n", + "2022-02-25 11:34:34.143 | INFO | paddlespeech.s2t.modules.loss:__init__:42 - CTCLoss Grad Norm Type: instance\n", + "2022-02-25 11:34:34.144 | INFO | paddlespeech.s2t.modules.loss:__init__:73 - CTCLoss() kwargs:{'norm_by_times': True}, not support: {'norm_by_batchsize': False, 'norm_by_total_logits_len': False}\n", + "loss 159.17205810546875\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/root/miniconda3/lib/python3.7/site-packages/paddle/fluid/dygraph/math_op_patch.py:253: UserWarning: The dtype of left and right variables are not the same, left dtype is paddle.float32, but right dtype is paddle.int32, the right dtype will convert to paddle.float32\n", + " format(lhs_dtype, rhs_dtype, lhs_dtype))\n" + ] + } + ], + "source": [ + "use_cpu = False\n", + "\n", + "from paddlespeech.s2t.modules.loss import CTCLoss\n", + "\n", + "if use_cpu:\n", + " device = \"cpu\"\n", + "else:\n", + " device = \"gpu\"\n", + "\n", + "paddle.set_device(device)\n", + "\n", + "blank_id=0\n", + "reduction_type='sum'\n", + "batch_average= True\n", + "grad_norm_type='instance'\n", + "\n", + "criterion = CTCLoss(\n", + " blank=blank_id,\n", + " reduction=reduction_type,\n", + " batch_average=batch_average,\n", + " grad_norm_type=grad_norm_type)\n", + "\n", + "logits = paddle.to_tensor(logits_np)\n", + "ys_pad = paddle.to_tensor(ys_pad_np,dtype='int32')\n", + "hlens = paddle.to_tensor(hlens_np, dtype='int64')\n", + "ys_lens = paddle.to_tensor(ys_lens_np, dtype='int64')\n", + "\n", + "pn_ctc_loss = criterion(logits, ys_pad, hlens, ys_lens)\n", + "print(\"loss\", pn_ctc_loss.item())\n", + " " + ] + }, + { + "cell_type": "markdown", + "id": "de525d38", + "metadata": {}, + "source": [ + "## 结论\n", + "在 CPU 环境下: torch 的 CTC loss 的计算速度是 paddle 的 9.8 倍 \n", + "在 GPU 环境下: torch 的 CTC loss 的计算速度是 paddle 的 6.87 倍\n", + "\n", + "## 其他结论\n", + "torch 的 ctc loss 在 CPU 和 GPU 下 都没有完全对齐。其中CPU的前向对齐精度大约为 1e-2。 GPU 的前向对齐精度大约为 1e-4 。" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.10" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}