You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
370 lines
10 KiB
370 lines
10 KiB
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "a1e738e0",
|
|
"metadata": {},
|
|
"source": [
|
|
"## 获取测试的 logit 数据"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 1,
|
|
"id": "29d3368b",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"hlens.npy\n",
|
|
"logits.npy\n",
|
|
"ys_lens.npy\n",
|
|
"ys_pad.npy\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"!mkdir -p ./test_data\n",
|
|
"!test -f ./test_data/ctc_loss_compare_data.tgz || wget -P ./test_data https://paddlespeech.bj.bcebos.com/datasets/unit_test/asr/ctc_loss_compare_data.tgz\n",
|
|
"!tar xzvf test_data/ctc_loss_compare_data.tgz -C ./test_data\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 2,
|
|
"id": "240caf1d",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"import os\n",
|
|
"import numpy as np\n",
|
|
"import time\n",
|
|
"\n",
|
|
"data_dir=\"./test_data\"\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 3,
|
|
"id": "91bad949",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"logits_np = np.load(os.path.join(data_dir, \"logits.npy\"))\n",
|
|
"ys_pad_np = np.load(os.path.join(data_dir, \"ys_pad.npy\"))\n",
|
|
"hlens_np = np.load(os.path.join(data_dir, \"hlens.npy\"))\n",
|
|
"ys_lens_np = np.load(os.path.join(data_dir, \"ys_lens.npy\"))"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "4cef2f15",
|
|
"metadata": {},
|
|
"source": [
|
|
"## 使用 torch 的 ctc loss"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 4,
|
|
"id": "90612004",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/plain": [
|
|
"'1.10.1+cu102'"
|
|
]
|
|
},
|
|
"execution_count": 4,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"import torch\n",
|
|
"torch.__version__"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 5,
|
|
"id": "00799f97",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"def torch_ctc_loss(use_cpu):\n",
|
|
" if use_cpu:\n",
|
|
" device = torch.device(\"cpu\")\n",
|
|
" else:\n",
|
|
" device = torch.device(\"cuda\")\n",
|
|
"\n",
|
|
" reduction_type = \"sum\" \n",
|
|
"\n",
|
|
" ctc_loss = torch.nn.CTCLoss(reduction=reduction_type)\n",
|
|
"\n",
|
|
" ys_hat = torch.tensor(logits_np, device = device)\n",
|
|
" ys_pad = torch.tensor(ys_pad_np, device = device)\n",
|
|
" hlens = torch.tensor(hlens_np, device = device)\n",
|
|
" ys_lens = torch.tensor(ys_lens_np, device = device)\n",
|
|
"\n",
|
|
" ys_hat = ys_hat.transpose(0, 1)\n",
|
|
" \n",
|
|
" # 开始计算时间\n",
|
|
" start_time = time.time()\n",
|
|
" ys_hat = ys_hat.log_softmax(2)\n",
|
|
" loss = ctc_loss(ys_hat, ys_pad, hlens, ys_lens)\n",
|
|
" end_time = time.time()\n",
|
|
" \n",
|
|
" loss = loss / ys_hat.size(1)\n",
|
|
" return end_time - start_time, loss.item()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "ba47b5a4",
|
|
"metadata": {},
|
|
"source": [
|
|
"## 使用 paddle 的 ctc loss"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 6,
|
|
"id": "6882a06e",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/plain": [
|
|
"'2.2.2'"
|
|
]
|
|
},
|
|
"execution_count": 6,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"import paddle\n",
|
|
"paddle.__version__"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 7,
|
|
"id": "3cfa3b7c",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"def paddle_ctc_loss(use_cpu): \n",
|
|
" import paddle.nn as pn\n",
|
|
" if use_cpu:\n",
|
|
" device = \"cpu\"\n",
|
|
" else:\n",
|
|
" device = \"gpu\"\n",
|
|
"\n",
|
|
" paddle.set_device(device)\n",
|
|
"\n",
|
|
" logits = paddle.to_tensor(logits_np)\n",
|
|
" ys_pad = paddle.to_tensor(ys_pad_np,dtype='int32')\n",
|
|
" hlens = paddle.to_tensor(hlens_np, dtype='int64')\n",
|
|
" ys_lens = paddle.to_tensor(ys_lens_np, dtype='int64')\n",
|
|
"\n",
|
|
" logits = logits.transpose([1,0,2])\n",
|
|
"\n",
|
|
" ctc_loss = pn.CTCLoss(reduction='sum')\n",
|
|
" # 开始计算时间\n",
|
|
" start_time = time.time()\n",
|
|
" pn_loss = ctc_loss(logits, ys_pad, hlens, ys_lens)\n",
|
|
" end_time = time.time()\n",
|
|
" \n",
|
|
" pn_loss = pn_loss / logits.shape[1]\n",
|
|
" return end_time - start_time, pn_loss.item()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 8,
|
|
"id": "40413ef9",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"CPU, iteration 10\n",
|
|
"torch_ctc_loss 159.17137145996094\n",
|
|
"paddle_ctc_loss 159.16574096679688\n",
|
|
"paddle average time 1.718252992630005\n",
|
|
"torch average time 0.17536230087280275\n",
|
|
"paddle time / torch time (cpu) 9.798303193320452\n",
|
|
"\n",
|
|
"GPU, iteration 10\n",
|
|
"torch_ctc_loss 159.172119140625\n",
|
|
"paddle_ctc_loss 159.17205810546875\n",
|
|
"paddle average time 0.018606925010681154\n",
|
|
"torch average time 0.0026710033416748047\n",
|
|
"paddle time / torch time (gpu) 6.966267963938231\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"# 使用 CPU\n",
|
|
"\n",
|
|
"iteration = 10\n",
|
|
"use_cpu = True\n",
|
|
"torch_total_time = 0\n",
|
|
"paddle_total_time = 0\n",
|
|
"for _ in range(iteration):\n",
|
|
" cost_time, torch_loss = torch_ctc_loss(use_cpu)\n",
|
|
" torch_total_time += cost_time\n",
|
|
"for _ in range(iteration):\n",
|
|
" cost_time, paddle_loss = paddle_ctc_loss(use_cpu)\n",
|
|
" paddle_total_time += cost_time\n",
|
|
"print (\"CPU, iteration\", iteration)\n",
|
|
"print (\"torch_ctc_loss\", torch_loss)\n",
|
|
"print (\"paddle_ctc_loss\", paddle_loss)\n",
|
|
"print (\"paddle average time\", paddle_total_time / iteration)\n",
|
|
"print (\"torch average time\", torch_total_time / iteration)\n",
|
|
"print (\"paddle time / torch time (cpu)\" , paddle_total_time/ torch_total_time)\n",
|
|
"\n",
|
|
"print (\"\")\n",
|
|
"\n",
|
|
"# 使用 GPU\n",
|
|
"\n",
|
|
"use_cpu = False\n",
|
|
"torch_total_time = 0\n",
|
|
"paddle_total_time = 0\n",
|
|
"for _ in range(iteration):\n",
|
|
" cost_time, torch_loss = torch_ctc_loss(use_cpu)\n",
|
|
" torch_total_time += cost_time\n",
|
|
"for _ in range(iteration):\n",
|
|
" cost_time, paddle_loss = paddle_ctc_loss(use_cpu)\n",
|
|
" paddle_total_time += cost_time\n",
|
|
"print (\"GPU, iteration\", iteration)\n",
|
|
"print (\"torch_ctc_loss\", torch_loss)\n",
|
|
"print (\"paddle_ctc_loss\", paddle_loss)\n",
|
|
"print (\"paddle average time\", paddle_total_time / iteration)\n",
|
|
"print (\"torch average time\", torch_total_time / iteration)\n",
|
|
"print (\"paddle time / torch time (gpu)\" , paddle_total_time/ torch_total_time)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "7cdf8697",
|
|
"metadata": {},
|
|
"source": [
|
|
"## 其他: 使用 PaddleSpeech 中的 ctcloss 查一下loss值"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 9,
|
|
"id": "73fad81d",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"logits_np = np.load(os.path.join(data_dir, \"logits.npy\"))\n",
|
|
"ys_pad_np = np.load(os.path.join(data_dir, \"ys_pad.npy\"))\n",
|
|
"hlens_np = np.load(os.path.join(data_dir, \"hlens.npy\"))\n",
|
|
"ys_lens_np = np.load(os.path.join(data_dir, \"ys_lens.npy\"))"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 10,
|
|
"id": "2b41e45d",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"2022-02-25 11:34:34.143 | INFO | paddlespeech.s2t.modules.loss:__init__:41 - CTCLoss Loss reduction: sum, div-bs: True\n",
|
|
"2022-02-25 11:34:34.143 | INFO | paddlespeech.s2t.modules.loss:__init__:42 - CTCLoss Grad Norm Type: instance\n",
|
|
"2022-02-25 11:34:34.144 | INFO | paddlespeech.s2t.modules.loss:__init__:73 - CTCLoss() kwargs:{'norm_by_times': True}, not support: {'norm_by_batchsize': False, 'norm_by_total_logits_len': False}\n",
|
|
"loss 159.17205810546875\n"
|
|
]
|
|
},
|
|
{
|
|
"name": "stderr",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"/root/miniconda3/lib/python3.7/site-packages/paddle/fluid/dygraph/math_op_patch.py:253: UserWarning: The dtype of left and right variables are not the same, left dtype is paddle.float32, but right dtype is paddle.int32, the right dtype will convert to paddle.float32\n",
|
|
" format(lhs_dtype, rhs_dtype, lhs_dtype))\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"use_cpu = False\n",
|
|
"\n",
|
|
"from paddlespeech.s2t.modules.loss import CTCLoss\n",
|
|
"\n",
|
|
"if use_cpu:\n",
|
|
" device = \"cpu\"\n",
|
|
"else:\n",
|
|
" device = \"gpu\"\n",
|
|
"\n",
|
|
"paddle.set_device(device)\n",
|
|
"\n",
|
|
"blank_id=0\n",
|
|
"reduction_type='sum'\n",
|
|
"batch_average= True\n",
|
|
"grad_norm_type='instance'\n",
|
|
"\n",
|
|
"criterion = CTCLoss(\n",
|
|
" blank=blank_id,\n",
|
|
" reduction=reduction_type,\n",
|
|
" batch_average=batch_average,\n",
|
|
" grad_norm_type=grad_norm_type)\n",
|
|
"\n",
|
|
"logits = paddle.to_tensor(logits_np)\n",
|
|
"ys_pad = paddle.to_tensor(ys_pad_np,dtype='int32')\n",
|
|
"hlens = paddle.to_tensor(hlens_np, dtype='int64')\n",
|
|
"ys_lens = paddle.to_tensor(ys_lens_np, dtype='int64')\n",
|
|
"\n",
|
|
"pn_ctc_loss = criterion(logits, ys_pad, hlens, ys_lens)\n",
|
|
"print(\"loss\", pn_ctc_loss.item())\n",
|
|
" "
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"id": "de525d38",
|
|
"metadata": {},
|
|
"source": [
|
|
"## 结论\n",
|
|
"在 CPU 环境下: torch 的 CTC loss 的计算速度是 paddle 的 9.8 倍 \n",
|
|
"在 GPU 环境下: torch 的 CTC loss 的计算速度是 paddle 的 6.87 倍\n",
|
|
"\n",
|
|
"## 其他结论\n",
|
|
"torch 的 ctc loss 在 CPU 和 GPU 下 都没有完全对齐。其中CPU的前向对齐精度大约为 1e-2。 GPU 的前向对齐精度大约为 1e-4 。"
|
|
]
|
|
}
|
|
],
|
|
"metadata": {
|
|
"kernelspec": {
|
|
"display_name": "Python 3 (ipykernel)",
|
|
"language": "python",
|
|
"name": "python3"
|
|
},
|
|
"language_info": {
|
|
"codemirror_mode": {
|
|
"name": "ipython",
|
|
"version": 3
|
|
},
|
|
"file_extension": ".py",
|
|
"mimetype": "text/x-python",
|
|
"name": "python",
|
|
"nbconvert_exporter": "python",
|
|
"pygments_lexer": "ipython3",
|
|
"version": "3.7.10"
|
|
}
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 5
|
|
}
|