remove notebook

3 years ago · 10cd656095
parent 4e9bc9ed5e
commit 10cd656095
17 changed files with 0 additions and 22811 deletions
--- a/.notebook/Linear_test.ipynb
+++ b/.notebook/Linear_test.ipynb
@ -1,605 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": 1,
-   "id": "academic-surname",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import paddle\n",
-    "from paddle import nn"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 2,
-   "id": "fundamental-treasure",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/workspace/DeepSpeech-2.x/tools/venv-dev/lib/python3.7/site-packages/ipykernel/ipkernel.py:283: DeprecationWarning: `should_run_async` will not call `transform_cell` automatically in the future. Please pass the result to `transformed_cell` argument and any exception that happen during thetransform in `preprocessing_exc_tuple` in IPython 7.17 and above.\n",
-      "  and should_run_async(code)\n"
-     ]
-    }
-   ],
-   "source": [
-    "L = nn.Linear(256, 2048)\n",
-    "L2 = nn.Linear(2048, 256)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 3,
-   "id": "consolidated-elephant",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import numpy as np\n",
-    "import torch\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 4,
-   "id": "moderate-noise",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "float64\n",
-      "Tensor(shape=[2, 51, 256], dtype=float32, place=CUDAPlace(0), stop_gradient=True,\n",
-      "       [[[-1.54171216, -2.61531472, -1.79881978, ..., -0.31395876,  0.56513089, -0.44516513],\n",
-      "         [-0.79492962,  1.91157901,  0.66567147, ...,  0.54825783, -1.01471853, -0.84924090],\n",
-      "         [-1.22556651, -0.36225814,  0.65063190, ...,  0.65726501,  0.05563191,  0.09009409],\n",
-      "         ...,\n",
-      "         [ 0.38615900, -0.77905393,  0.99732304, ..., -1.38463700, -3.32365036, -1.31089687],\n",
-      "         [ 0.05579993,  0.06885809, -1.66662002, ..., -0.23346378, -3.29372883,  1.30561364],\n",
-      "         [ 1.90676069,  1.95093191, -0.28849599, ..., -0.06860496,  0.95347673,  1.00475824]],\n",
-      "\n",
-      "        [[-0.91453546,  0.55298805, -1.06146812, ..., -0.86378336,  1.00454640,  1.26062179],\n",
-      "         [ 0.10223761,  0.81301165,  2.36865163, ...,  0.16821407,  0.29240361,  1.05408621],\n",
-      "         [-1.33196676,  1.94433689,  0.01934209, ...,  0.48036841,  0.51585966,  1.22893548],\n",
-      "         ...,\n",
-      "         [-0.19558455, -0.47075930,  0.90796155, ..., -1.28598249, -0.24321797,  0.17734711],\n",
-      "         [ 0.89819717, -1.39516675,  0.17138045, ...,  2.39761519,  1.76364994, -0.52177650],\n",
-      "         [ 0.94122332, -0.18581429,  1.36099780, ...,  0.67647684, -0.04699665,  1.51205540]]])\n",
-      "tensor([[[-1.5417, -2.6153, -1.7988,  ..., -0.3140,  0.5651, -0.4452],\n",
-      "         [-0.7949,  1.9116,  0.6657,  ...,  0.5483, -1.0147, -0.8492],\n",
-      "         [-1.2256, -0.3623,  0.6506,  ...,  0.6573,  0.0556,  0.0901],\n",
-      "         ...,\n",
-      "         [ 0.3862, -0.7791,  0.9973,  ..., -1.3846, -3.3237, -1.3109],\n",
-      "         [ 0.0558,  0.0689, -1.6666,  ..., -0.2335, -3.2937,  1.3056],\n",
-      "         [ 1.9068,  1.9509, -0.2885,  ..., -0.0686,  0.9535,  1.0048]],\n",
-      "\n",
-      "        [[-0.9145,  0.5530, -1.0615,  ..., -0.8638,  1.0045,  1.2606],\n",
-      "         [ 0.1022,  0.8130,  2.3687,  ...,  0.1682,  0.2924,  1.0541],\n",
-      "         [-1.3320,  1.9443,  0.0193,  ...,  0.4804,  0.5159,  1.2289],\n",
-      "         ...,\n",
-      "         [-0.1956, -0.4708,  0.9080,  ..., -1.2860, -0.2432,  0.1773],\n",
-      "         [ 0.8982, -1.3952,  0.1714,  ...,  2.3976,  1.7636, -0.5218],\n",
-      "         [ 0.9412, -0.1858,  1.3610,  ...,  0.6765, -0.0470,  1.5121]]])\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/workspace/DeepSpeech-2.x/tools/venv-dev/lib/python3.7/site-packages/ipykernel/ipkernel.py:283: DeprecationWarning: `should_run_async` will not call `transform_cell` automatically in the future. Please pass the result to `transformed_cell` argument and any exception that happen during thetransform in `preprocessing_exc_tuple` in IPython 7.17 and above.\n",
-      "  and should_run_async(code)\n"
-     ]
-    }
-   ],
-   "source": [
-    "x = np.random.randn(2, 51, 256)\n",
-    "print(x.dtype)\n",
-    "px = paddle.to_tensor(x, dtype='float32')\n",
-    "tx = torch.tensor(x, dtype=torch.float32)\n",
-    "print(px)\n",
-    "print(tx)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "cooked-progressive",
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 5,
-   "id": "mechanical-prisoner",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "data = np.load('enc_0_ff_out.npz', allow_pickle=True)\n",
-    "t_norm_ff = data['norm_ff']\n",
-    "t_ff_out = data['ff_out']\n",
-    "t_ff_l_x = data['ff_l_x']\n",
-    "t_ff_l_a_x = data['ff_l_a_x']\n",
-    "t_ff_l_a_l_x = data['ff_l_a_l_x']\n",
-    "t_ps = data['ps']"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "indie-marriage",
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 6,
-   "id": "assured-zambia",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "True\n",
-      "True\n",
-      "True\n",
-      "True\n"
-     ]
-    }
-   ],
-   "source": [
-    "L.set_state_dict({'weight': t_ps[0].T, 'bias': t_ps[1]})\n",
-    "L2.set_state_dict({'weight': t_ps[2].T, 'bias': t_ps[3]})\n",
-    "\n",
-    "ps = []\n",
-    "for n, p in L.named_parameters():\n",
-    "   ps.append(p)\n",
-    "\n",
-    "for n, p in L2.state_dict().items():\n",
-    "    ps.append(p)\n",
-    "    \n",
-    "for p, tp in zip(ps, t_ps):\n",
-    "    print(np.allclose(p.numpy(), tp.T))"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "committed-jacob",
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "extreme-traffic",
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "optimum-milwaukee",
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 7,
-   "id": "viral-indian",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "True\n",
-      "True\n",
-      "True\n",
-      "True\n"
-     ]
-    }
-   ],
-   "source": [
-    "# data = np.load('enc_0_ff_out.npz', allow_pickle=True)\n",
-    "# t_norm_ff = data['norm_ff']\n",
-    "# t_ff_out = data['ff_out']\n",
-    "# t_ff_l_x = data['ff_l_x']\n",
-    "# t_ff_l_a_x = data['ff_l_a_x']\n",
-    "# t_ff_l_a_l_x = data['ff_l_a_l_x']\n",
-    "# t_ps = data['ps']\n",
-    "TL = torch.nn.Linear(256, 2048)\n",
-    "TL2 = torch.nn.Linear(2048, 256)\n",
-    "TL.load_state_dict({'weight': torch.tensor(t_ps[0]), 'bias': torch.tensor(t_ps[1])})\n",
-    "TL2.load_state_dict({'weight': torch.tensor(t_ps[2]), 'bias': torch.tensor(t_ps[3])})\n",
-    "\n",
-    "# for n, p in TL.named_parameters():\n",
-    "#    print(n, p)\n",
-    "# for n, p in TL2.named_parameters():\n",
-    "#    print(n, p)\n",
-    "\n",
-    "ps = []\n",
-    "for n, p in TL.state_dict().items():\n",
-    "    ps.append(p.data.numpy())\n",
-    "    \n",
-    "for n, p in TL2.state_dict().items():\n",
-    "    ps.append(p.data.numpy())\n",
-    "    \n",
-    "for p, tp in zip(ps, t_ps):\n",
-    "    print(np.allclose(p, tp))"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 8,
-   "id": "skilled-vietnamese",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "[[[ 0.67277956  0.08313607 -0.62761104 ... -0.17480263  0.42718208\n",
-      "   -0.5787626 ]\n",
-      "  [ 0.91516656  0.5393416   1.7159258  ...  0.06144593  0.06486575\n",
-      "   -0.03350811]\n",
-      "  [ 0.438351    0.6227843   0.24096036 ...  1.0912522  -0.90929437\n",
-      "   -1.012989  ]\n",
-      "  ...\n",
-      "  [ 0.68631977  0.14240924  0.10763275 ... -0.11513516  0.48065388\n",
-      "    0.04070369]\n",
-      "  [-0.9525228   0.23197874  0.31264272 ...  0.5312439   0.18773697\n",
-      "   -0.8450228 ]\n",
-      "  [ 0.42024016 -0.04561988  0.54541194 ... -0.41933843 -0.00436018\n",
-      "   -0.06663495]]\n",
-      "\n",
-      " [[-0.11638781 -0.33566502 -0.20887226 ...  0.17423287 -0.9195841\n",
-      "   -0.8161046 ]\n",
-      "  [-0.3469874   0.88269687 -0.11887559 ... -0.15566081  0.16357468\n",
-      "   -0.20766167]\n",
-      "  [-0.3847657   0.3984318  -0.06963477 ... -0.00360622  1.2360432\n",
-      "   -0.26811332]\n",
-      "  ...\n",
-      "  [ 0.08230796 -0.46158582  0.54582864 ...  0.15747628 -0.44790155\n",
-      "    0.06020184]\n",
-      "  [-0.8095085   0.43163058 -0.42837143 ...  0.8627463   0.90656304\n",
-      "    0.15847842]\n",
-      "  [-1.485811   -0.18216592 -0.8882585  ...  0.32596245  0.7822631\n",
-      "   -0.6460344 ]]]\n",
-      "[[[ 0.67278004  0.08313602 -0.6276114  ... -0.17480245  0.42718196\n",
-      "   -0.5787625 ]\n",
-      "  [ 0.91516703  0.5393413   1.7159253  ...  0.06144581  0.06486579\n",
-      "   -0.03350812]\n",
-      "  [ 0.43835106  0.62278455  0.24096027 ...  1.0912521  -0.9092943\n",
-      "   -1.0129892 ]\n",
-      "  ...\n",
-      "  [ 0.6863195   0.14240888  0.10763284 ... -0.11513527  0.48065376\n",
-      "    0.04070365]\n",
-      "  [-0.9525231   0.23197863  0.31264275 ...  0.53124386  0.18773702\n",
-      "   -0.84502304]\n",
-      "  [ 0.42024007 -0.04561983  0.545412   ... -0.41933888 -0.00436005\n",
-      "   -0.066635  ]]\n",
-      "\n",
-      " [[-0.11638767 -0.33566508 -0.20887226 ...  0.17423296 -0.9195838\n",
-      "   -0.8161046 ]\n",
-      "  [-0.34698725  0.88269705 -0.11887549 ... -0.15566081  0.16357464\n",
-      "   -0.20766166]\n",
-      "  [-0.3847657   0.3984319  -0.06963488 ... -0.00360619  1.2360426\n",
-      "   -0.26811326]\n",
-      "  ...\n",
-      "  [ 0.08230786 -0.4615857   0.5458287  ...  0.15747619 -0.44790167\n",
-      "    0.06020182]\n",
-      "  [-0.8095083   0.4316307  -0.42837155 ...  0.862746    0.9065631\n",
-      "    0.15847899]\n",
-      "  [-1.485811   -0.18216613 -0.8882584  ...  0.32596254  0.7822631\n",
-      "   -0.6460344 ]]]\n",
-      "True\n",
-      "False\n"
-     ]
-    }
-   ],
-   "source": [
-    "y = L(px)\n",
-    "print(y.numpy())\n",
-    "\n",
-    "ty = TL(tx)\n",
-    "print(ty.data.numpy())\n",
-    "print(np.allclose(px.numpy(), tx.detach().numpy()))\n",
-    "print(np.allclose(y.numpy(), ty.detach().numpy()))"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "incorrect-allah",
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "prostate-cameroon",
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 9,
-   "id": "governmental-surge",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "[[ 0.04476918  0.554463   -0.3027508  ... -0.49600336  0.3751858\n",
-      "   0.8254095 ]\n",
-      " [ 0.95594174 -0.29528382 -1.2899452  ...  0.43718258  0.05584608\n",
-      "  -0.06974669]]\n",
-      "[[ 0.04476918  0.5544631  -0.3027507  ... -0.49600336  0.37518573\n",
-      "   0.8254096 ]\n",
-      " [ 0.95594174 -0.29528376 -1.2899454  ...  0.4371827   0.05584623\n",
-      "  -0.0697467 ]]\n",
-      "True\n",
-      "False\n",
-      "True\n"
-     ]
-    }
-   ],
-   "source": [
-    "x = np.random.randn(2, 256)\n",
-    "px = paddle.to_tensor(x, dtype='float32')\n",
-    "tx = torch.tensor(x, dtype=torch.float32)\n",
-    "y = L(px)\n",
-    "print(y.numpy())\n",
-    "ty = TL(tx)\n",
-    "print(ty.data.numpy())\n",
-    "print(np.allclose(px.numpy(), tx.detach().numpy()))\n",
-    "print(np.allclose(y.numpy(), ty.detach().numpy()))\n",
-    "print(np.allclose(y.numpy(), ty.detach().numpy(), atol=1e-5))"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "confidential-jacket",
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 10,
-   "id": "improved-civilization",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "5e7e7c9fde8350084abf1898cf52651cfc84b17a\n"
-     ]
-    }
-   ],
-   "source": [
-    "print(paddle.version.commit)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 11,
-   "id": "d1e2d3b4",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "['__builtins__',\n",
-       " '__cached__',\n",
-       " '__doc__',\n",
-       " '__file__',\n",
-       " '__loader__',\n",
-       " '__name__',\n",
-       " '__package__',\n",
-       " '__spec__',\n",
-       " 'commit',\n",
-       " 'full_version',\n",
-       " 'istaged',\n",
-       " 'major',\n",
-       " 'minor',\n",
-       " 'mkl',\n",
-       " 'patch',\n",
-       " 'rc',\n",
-       " 'show',\n",
-       " 'with_mkl']"
-      ]
-     },
-     "execution_count": 11,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "dir(paddle.version)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 12,
-   "id": "c880c719",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "2.1.0\n"
-     ]
-    }
-   ],
-   "source": [
-    "print(paddle.version.full_version)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 13,
-   "id": "f26977bf",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "commit: 5e7e7c9fde8350084abf1898cf52651cfc84b17a\n",
-      "None\n"
-     ]
-    }
-   ],
-   "source": [
-    "print(paddle.version.show())"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 14,
-   "id": "04ad47f6",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "1.6.0\n"
-     ]
-    }
-   ],
-   "source": [
-    "print(torch.__version__)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 15,
-   "id": "e1e03830",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "['__builtins__',\n",
-       " '__cached__',\n",
-       " '__doc__',\n",
-       " '__file__',\n",
-       " '__loader__',\n",
-       " '__name__',\n",
-       " '__package__',\n",
-       " '__spec__',\n",
-       " '__version__',\n",
-       " 'cuda',\n",
-       " 'debug',\n",
-       " 'git_version',\n",
-       " 'hip']"
-      ]
-     },
-     "execution_count": 15,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "dir(torch.version)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 19,
-   "id": "4ad0389b",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "'b31f58de6fa8bbda5353b3c77d9be4914399724d'"
-      ]
-     },
-     "execution_count": 19,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "torch.version.git_version"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 21,
-   "id": "7870ea10",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "'10.2'"
-      ]
-     },
-     "execution_count": 21,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "torch.version.cuda"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "db8ee5a7",
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "6321ec2a",
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.7.0"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 5
-}
--- a/.notebook/WarmupLR.ipynb
+++ b/.notebook/WarmupLR.ipynb
--- a/.notebook/audio_feature.ipynb
+++ b/.notebook/audio_feature.ipynb
--- a/.notebook/compute_cmvn_loader_test.ipynb
+++ b/.notebook/compute_cmvn_loader_test.ipynb
--- a/.notebook/dataloader.ipynb
+++ b/.notebook/dataloader.ipynb
@ -1,389 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": 1,
-   "id": "emerging-meter",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/workspace/DeepSpeech-2.x/tools/venv/lib/python3.7/site-packages/paddle/fluid/layers/utils.py:26: DeprecationWarning: `np.int` is a deprecated alias for the builtin `int`. To silence this warning, use `int` by itself. Doing this will not modify any behavior and is safe. When replacing `np.int`, you may wish to use e.g. `np.int64` or `np.int32` to specify the precision. If you wish to review your current use, check the release note link for additional information.\n",
-      "Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations\n",
-      "  def convert_to_list(value, n, name, dtype=np.int):\n",
-      "/workspace/DeepSpeech-2.x/tools/venv/lib/python3.7/site-packages/scipy/fftpack/__init__.py:103: DeprecationWarning: The module numpy.dual is deprecated.  Instead of using dual, use the functions directly from numpy or scipy.\n",
-      "  from numpy.dual import register_func\n",
-      "/workspace/DeepSpeech-2.x/tools/venv/lib/python3.7/site-packages/scipy/special/orthogonal.py:81: DeprecationWarning: `np.int` is a deprecated alias for the builtin `int`. To silence this warning, use `int` by itself. Doing this will not modify any behavior and is safe. When replacing `np.int`, you may wish to use e.g. `np.int64` or `np.int32` to specify the precision. If you wish to review your current use, check the release note link for additional information.\n",
-      "Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations\n",
-      "  from numpy import (exp, inf, pi, sqrt, floor, sin, cos, around, int,\n",
-      "/workspace/DeepSpeech-2.x/tools/venv/lib/python3.7/site-packages/numba/core/types/__init__.py:108: DeprecationWarning: `np.long` is a deprecated alias for `np.compat.long`. To silence this warning, use `np.compat.long` by itself. In the likely event your code does not need to work on Python 2 you can use the builtin `int` for which `np.compat.long` is itself an alias. Doing this will not modify any behaviour and is safe. When replacing `np.long`, you may wish to use e.g. `np.int64` or `np.int32` to specify the precision. If you wish to review your current use, check the release note link for additional information.\n",
-      "Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations\n",
-      "  long_ = _make_signed(np.long)\n",
-      "/workspace/DeepSpeech-2.x/tools/venv/lib/python3.7/site-packages/numba/core/types/__init__.py:109: DeprecationWarning: `np.long` is a deprecated alias for `np.compat.long`. To silence this warning, use `np.compat.long` by itself. In the likely event your code does not need to work on Python 2 you can use the builtin `int` for which `np.compat.long` is itself an alias. Doing this will not modify any behaviour and is safe. When replacing `np.long`, you may wish to use e.g. `np.int64` or `np.int32` to specify the precision. If you wish to review your current use, check the release note link for additional information.\n",
-      "Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations\n",
-      "  ulong = _make_unsigned(np.long)\n"
-     ]
-    }
-   ],
-   "source": [
-    "import math\n",
-    "import random\n",
-    "import tarfile\n",
-    "import logging\n",
-    "import numpy as np\n",
-    "from collections import namedtuple\n",
-    "from functools import partial\n",
-    "\n",
-    "import paddle\n",
-    "from paddle.io import Dataset\n",
-    "from paddle.io import DataLoader\n",
-    "from paddle.io import BatchSampler\n",
-    "from paddle.io import DistributedBatchSampler\n",
-    "from paddle import distributed as dist\n",
-    "\n",
-    "from data_utils.utility import read_manifest\n",
-    "from data_utils.augmentor.augmentation import AugmentationPipeline\n",
-    "from data_utils.featurizer.speech_featurizer import SpeechFeaturizer\n",
-    "from data_utils.speech import SpeechSegment\n",
-    "from data_utils.normalizer import FeatureNormalizer\n",
-    "\n",
-    "\n",
-    "from data_utils.dataset import (\n",
-    "    DeepSpeech2Dataset,\n",
-    "    DeepSpeech2DistributedBatchSampler,\n",
-    "    DeepSpeech2BatchSampler,\n",
-    "    SpeechCollator,\n",
-    ")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 20,
-   "id": "excessive-american",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def create_dataloader(manifest_path,\t\n",
-    "                      vocab_filepath,\t\n",
-    "                      mean_std_filepath,\t\n",
-    "                      augmentation_config='{}',\t\n",
-    "                      max_duration=float('inf'),\t\n",
-    "                      min_duration=0.0,\t\n",
-    "                      stride_ms=10.0,\t\n",
-    "                      window_ms=20.0,\t\n",
-    "                      max_freq=None,\t\n",
-    "                      specgram_type='linear',\t\n",
-    "                      use_dB_normalization=True,\t\n",
-    "                      random_seed=0,\t\n",
-    "                      keep_transcription_text=False,\t\n",
-    "                      is_training=False,\t\n",
-    "                      batch_size=1,\t\n",
-    "                      num_workers=0,\t\n",
-    "                      sortagrad=False,\t\n",
-    "                      shuffle_method=None,\t\n",
-    "                      dist=False):\t\n",
-    "\n",
-    "    dataset = DeepSpeech2Dataset(\t\n",
-    "        manifest_path,\t\n",
-    "        vocab_filepath,\t\n",
-    "        mean_std_filepath,\t\n",
-    "        augmentation_config=augmentation_config,\t\n",
-    "        max_duration=max_duration,\t\n",
-    "        min_duration=min_duration,\t\n",
-    "        stride_ms=stride_ms,\t\n",
-    "        window_ms=window_ms,\t\n",
-    "        max_freq=max_freq,\t\n",
-    "        specgram_type=specgram_type,\t\n",
-    "        use_dB_normalization=use_dB_normalization,\t\n",
-    "        random_seed=random_seed,\t\n",
-    "        keep_transcription_text=keep_transcription_text)\t\n",
-    "\n",
-    "    if dist:\t\n",
-    "        batch_sampler = DeepSpeech2DistributedBatchSampler(\t\n",
-    "            dataset,\t\n",
-    "            batch_size,\t\n",
-    "            num_replicas=None,\t\n",
-    "            rank=None,\t\n",
-    "            shuffle=is_training,\t\n",
-    "            drop_last=is_training,\t\n",
-    "            sortagrad=is_training,\t\n",
-    "            shuffle_method=shuffle_method)\t\n",
-    "    else:\t\n",
-    "        batch_sampler = DeepSpeech2BatchSampler(\t\n",
-    "            dataset,\t\n",
-    "            shuffle=is_training,\t\n",
-    "            batch_size=batch_size,\t\n",
-    "            drop_last=is_training,\t\n",
-    "            sortagrad=is_training,\t\n",
-    "            shuffle_method=shuffle_method)\t\n",
-    "\n",
-    "    def padding_batch(batch, padding_to=-1, flatten=False, is_training=True):\t\n",
-    "        \"\"\"\t\n",
-    "        Padding audio features with zeros to make them have the same shape (or\t\n",
-    "        a user-defined shape) within one bach.\t\n",
-    "\n",
-    "        If ``padding_to`` is -1, the maximun shape in the batch will be used\t\n",
-    "        as the target shape for padding. Otherwise, `padding_to` will be the\t\n",
-    "        target shape (only refers to the second axis).\t\n",
-    "\n",
-    "        If `flatten` is True, features will be flatten to 1darray.\t\n",
-    "        \"\"\"\t\n",
-    "        new_batch = []\t\n",
-    "        # get target shape\t\n",
-    "        max_length = max([audio.shape[1] for audio, text in batch])\t\n",
-    "        if padding_to != -1:\t\n",
-    "            if padding_to < max_length:\t\n",
-    "                raise ValueError(\"If padding_to is not -1, it should be larger \"\t\n",
-    "                                 \"than any instance's shape in the batch\")\t\n",
-    "            max_length = padding_to\t\n",
-    "        max_text_length = max([len(text) for audio, text in batch])\t\n",
-    "        # padding\t\n",
-    "        padded_audios = []\t\n",
-    "        audio_lens = []\t\n",
-    "        texts, text_lens = [], []\t\n",
-    "        for audio, text in batch:\t\n",
-    "            padded_audio = np.zeros([audio.shape[0], max_length])\t\n",
-    "            padded_audio[:, :audio.shape[1]] = audio\t\n",
-    "            if flatten:\t\n",
-    "                padded_audio = padded_audio.flatten()\t\n",
-    "            padded_audios.append(padded_audio)\t\n",
-    "            audio_lens.append(audio.shape[1])\t\n",
-    "\n",
-    "            padded_text = np.zeros([max_text_length])\n",
-    "            if is_training:\n",
-    "                padded_text[:len(text)] = text\t# ids\n",
-    "            else:\n",
-    "                padded_text[:len(text)] = [ord(t) for t in text] # string\n",
-    "            \n",
-    "            texts.append(padded_text)\t\n",
-    "            text_lens.append(len(text))\t\n",
-    "\n",
-    "        padded_audios = np.array(padded_audios).astype('float32')\t\n",
-    "        audio_lens = np.array(audio_lens).astype('int64')\t\n",
-    "        texts = np.array(texts).astype('int32')\t\n",
-    "        text_lens = np.array(text_lens).astype('int64')\t\n",
-    "        return padded_audios, texts, audio_lens, text_lens\t\n",
-    "\n",
-    "    loader = DataLoader(\t\n",
-    "        dataset,\t\n",
-    "        batch_sampler=batch_sampler,\t\n",
-    "        collate_fn=partial(padding_batch, is_training=is_training),\t\n",
-    "        num_workers=num_workers)\t\n",
-    "    return loader"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 21,
-   "id": "naval-brave",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "{'num_samples': 5, 'beam_size': 500, 'num_proc_bsearch': 8, 'num_conv_layers': 2, 'num_rnn_layers': 3, 'rnn_layer_size': 2048, 'alpha': 2.5, 'beta': 0.3, 'cutoff_prob': 1.0, 'cutoff_top_n': 40, 'use_gru': False, 'use_gpu': True, 'share_rnn_weights': True, 'infer_manifest': 'examples/aishell/data/manifest.dev', 'mean_std_path': 'examples/aishell/data/mean_std.npz', 'vocab_path': 'examples/aishell/data/vocab.txt', 'lang_model_path': 'models/lm/common_crawl_00.prune01111.trie.klm', 'model_path': 'examples/aishell/checkpoints/step_final', 'decoding_method': 'ctc_beam_search', 'error_rate_type': 'wer', 'specgram_type': 'linear'}\n"
-     ]
-    }
-   ],
-   "source": [
-    "import sys\n",
-    "import argparse\n",
-    "import functools\n",
-    "from utils.utility import add_arguments, print_arguments\n",
-    "parser = argparse.ArgumentParser(description=__doc__)\n",
-    "add_arg = functools.partial(add_arguments, argparser=parser)\n",
-    "# yapf: disable\n",
-    "add_arg('num_samples',      int,    5,     \"# of samples to infer.\")\n",
-    "add_arg('beam_size',        int,    500,    \"Beam search width.\")\n",
-    "add_arg('num_proc_bsearch', int,    8,      \"# of CPUs for beam search.\")\n",
-    "add_arg('num_conv_layers',  int,    2,      \"# of convolution layers.\")\n",
-    "add_arg('num_rnn_layers',   int,    3,      \"# of recurrent layers.\")\n",
-    "add_arg('rnn_layer_size',   int,    2048,   \"# of recurrent cells per layer.\")\n",
-    "add_arg('alpha',            float,  2.5,    \"Coef of LM for beam search.\")\n",
-    "add_arg('beta',             float,  0.3,    \"Coef of WC for beam search.\")\n",
-    "add_arg('cutoff_prob',      float,  1.0,    \"Cutoff probability for pruning.\")\n",
-    "add_arg('cutoff_top_n',     int,    40,     \"Cutoff number for pruning.\")\n",
-    "add_arg('use_gru',          bool,   False,  \"Use GRUs instead of simple RNNs.\")\n",
-    "add_arg('use_gpu',          bool,   True,   \"Use GPU or not.\")\n",
-    "add_arg('share_rnn_weights',bool,   True,   \"Share input-hidden weights across \"\n",
-    "                                            \"bi-directional RNNs. Not for GRU.\")\n",
-    "add_arg('infer_manifest',   str,\n",
-    "        'examples/aishell/data/manifest.dev',\n",
-    "        \"Filepath of manifest to infer.\")\n",
-    "add_arg('mean_std_path',    str,\n",
-    "        'examples/aishell/data/mean_std.npz',\n",
-    "        \"Filepath of normalizer's mean & std.\")\n",
-    "add_arg('vocab_path',       str,\n",
-    "        'examples/aishell/data/vocab.txt',\n",
-    "        \"Filepath of vocabulary.\")\n",
-    "add_arg('lang_model_path',  str,\n",
-    "        'models/lm/common_crawl_00.prune01111.trie.klm',\n",
-    "        \"Filepath for language model.\")\n",
-    "add_arg('model_path',       str,\n",
-    "        'examples/aishell/checkpoints/step_final',\n",
-    "        \"If None, the training starts from scratch, \"\n",
-    "        \"otherwise, it resumes from the pre-trained model.\")\n",
-    "add_arg('decoding_method',  str,\n",
-    "        'ctc_beam_search',\n",
-    "        \"Decoding method. Options: ctc_beam_search, ctc_greedy\",\n",
-    "        choices = ['ctc_beam_search', 'ctc_greedy'])\n",
-    "add_arg('error_rate_type',  str,\n",
-    "        'wer',\n",
-    "        \"Error rate type for evaluation.\",\n",
-    "        choices=['wer', 'cer'])\n",
-    "add_arg('specgram_type',    str,\n",
-    "        'linear',\n",
-    "        \"Audio feature type. Options: linear, mfcc.\",\n",
-    "        choices=['linear', 'mfcc'])\n",
-    "# yapf: disable\n",
-    "args = parser.parse_args([])\n",
-    "print(vars(args))"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 22,
-   "id": "bearing-physics",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "batch_reader = create_dataloader(\n",
-    "            manifest_path=args.infer_manifest,\n",
-    "            vocab_filepath=args.vocab_path,\n",
-    "            mean_std_filepath=args.mean_std_path,\n",
-    "            augmentation_config='{}',\n",
-    "            #max_duration=float('inf'),\n",
-    "            max_duration=27.0,\n",
-    "            min_duration=0.0,\n",
-    "            stride_ms=10.0,\n",
-    "            window_ms=20.0,\n",
-    "            max_freq=None,\n",
-    "            specgram_type=args.specgram_type,\n",
-    "            use_dB_normalization=True,\n",
-    "            random_seed=0,\n",
-    "            keep_transcription_text=True,\n",
-    "            is_training=False,\n",
-    "            batch_size=args.num_samples,\n",
-    "            sortagrad=True,\n",
-    "            shuffle_method=None,\n",
-    "            dist=False)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 30,
-   "id": "classified-melissa",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "test Tensor(shape=[5, 6], dtype=int32, place=CUDAPinnedPlace, stop_gradient=True,\n",
-      "       [[22823, 26102, 20195, 37324, 0    , 0    ],\n",
-      "        [22238, 26469, 23601, 22909, 0    , 0    ],\n",
-      "        [20108, 26376, 22235, 26085, 0    , 0    ],\n",
-      "        [36824, 35201, 20445, 25345, 32654, 24863],\n",
-      "        [29042, 27748, 21463, 23456, 0    , 0    ]])\n",
-      "test raw 大时代里\n",
-      "test raw 煲汤受宠\n",
-      "audio len Tensor(shape=[5], dtype=int64, place=CUDAPinnedPlace, stop_gradient=True,\n",
-      "       [163, 167, 180, 186, 186])\n",
-      "test len Tensor(shape=[5], dtype=int64, place=CUDAPlace(0), stop_gradient=True,\n",
-      "       [4, 4, 4, 6, 4])\n",
-      "audio Tensor(shape=[5, 161, 186], dtype=float32, place=CUDAPinnedPlace, stop_gradient=True,\n",
-      "       [[[ 1.11669052,  0.79015088,  0.93658292, ...,  0.        ,  0.        ,  0.        ],\n",
-      "         [ 0.83549136,  0.72643483,  0.83578080, ...,  0.        ,  0.        ,  0.        ],\n",
-      "         [-0.89155018, -0.18894747, -0.53357804, ...,  0.        ,  0.        ,  0.        ],\n",
-      "         ...,\n",
-      "         [ 0.33386710, -0.81240511,  0.12869737, ...,  0.        ,  0.        ,  0.        ],\n",
-      "         [-0.17537928,  0.58380985,  0.70696265, ...,  0.        ,  0.        ,  0.        ],\n",
-      "         [-0.84175998,  1.22041416,  0.07929770, ...,  0.        ,  0.        ,  0.        ]],\n",
-      "\n",
-      "        [[-0.35964420,  0.77392709,  0.71409988, ...,  0.        ,  0.        ,  0.        ],\n",
-      "         [-0.15990183,  0.42962283,  0.06222462, ...,  0.        ,  0.        ,  0.        ],\n",
-      "         [-0.31166190, -0.74864638, -0.52836996, ...,  0.        ,  0.        ,  0.        ],\n",
-      "         ...,\n",
-      "         [-0.27546275,  0.32889456,  0.12410031, ...,  0.        ,  0.        ,  0.        ],\n",
-      "         [ 0.16264282,  0.49418071, -0.15960945, ...,  0.        ,  0.        ,  0.        ],\n",
-      "         [ 0.12476666,  0.00516864,  1.16021466, ...,  0.        ,  0.        ,  0.        ]],\n",
-      "\n",
-      "        [[ 0.90202141,  1.48541915,  0.92062062, ...,  0.        ,  0.        ,  0.        ],\n",
-      "         [ 0.82661545,  1.37171340,  0.86746097, ...,  0.        ,  0.        ,  0.        ],\n",
-      "         [-0.62287915, -0.48645937,  0.35041964, ...,  0.        ,  0.        ,  0.        ],\n",
-      "         ...,\n",
-      "         [ 0.07376949,  0.07138316,  0.76355994, ...,  0.        ,  0.        ,  0.        ],\n",
-      "         [-0.32306790,  0.43247896,  1.27311838, ...,  0.        ,  0.        ,  0.        ],\n",
-      "         [-0.97667056,  0.60747612,  0.79181534, ...,  0.        ,  0.        ,  0.        ]],\n",
-      "\n",
-      "        [[ 0.72022128,  0.95428467,  0.92766261, ...,  0.29105374, -0.45564806, -0.62151009],\n",
-      "         [ 0.42083180,  0.49279949,  0.82724041, ..., -0.17333922, -1.45363355, -0.61673522],\n",
-      "         [-0.76116520, -0.84750438, -0.09512503, ..., -1.01497340, -1.42781055, -0.80859023],\n",
-      "         ...,\n",
-      "         [-0.23009977,  1.06155431,  1.09065628, ...,  0.25581080,  0.53794998, -1.22650719],\n",
-      "         [-1.37693381,  0.30778193,  0.17152318, ...,  0.51650339,  0.25580606,  0.83097816],\n",
-      "         [-1.62180591,  1.30567718,  1.09928656, ..., -0.77590007,  1.27712476,  0.53189957]],\n",
-      "\n",
-      "        [[ 1.03205252, -0.51535392,  0.21077573, ...,  0.76618457,  1.27425683,  1.52250278],\n",
-      "         [ 0.82059991,  0.43990925,  0.13090958, ...,  0.86662549,  1.01687658,  1.48495352],\n",
-      "         [-0.75489789, -0.01997089, -0.65174174, ...,  0.09061214, -0.55211234, -0.01614586],\n",
-      "         ...,\n",
-      "         [ 0.50985396,  1.84555030,  0.79185146, ...,  1.13666189,  1.19898069,  1.98158395],\n",
-      "         [ 1.98721015,  2.52385354,  1.11714780, ...,  0.19416514,  1.11329341,  0.64460152],\n",
-      "         [ 2.69512844,  1.90993905,  0.50245082, ..., -0.50902629,  0.03333465, -1.24584770]]])\n"
-     ]
-    }
-   ],
-   "source": [
-    "for idx, (audio, audio_len, text, text_len) in enumerate(batch_reader()):\n",
-    "    print('test', text)\n",
-    "    print(\"test raw\", ''.join( chr(i) for i in text[0][:int(text_len[0])] ))\n",
-    "    print(\"test raw\", ''.join( chr(i) for i in text[-1][:int(text_len[-1])] ))\n",
-    "    print('audio len', audio_len)\n",
-    "    print('test len', text_len)\n",
-    "    print('audio', audio)\n",
-    "    break"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "unexpected-skating",
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "minus-modern",
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.7.0"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 5
-}
--- a/.notebook/dataloader_with_tokens_tokenids.ipynb
+++ b/.notebook/dataloader_with_tokens_tokenids.ipynb
--- a/.notebook/espnet_dataloader.ipynb
+++ b/.notebook/espnet_dataloader.ipynb
--- a/.notebook/hack_api_test.ipynb
+++ b/.notebook/hack_api_test.ipynb
@ -1,290 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": 1,
-   "id": "breeding-haven",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "/home/ssd5/zhanghui/DeepSpeech2.x\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "'/home/ssd5/zhanghui/DeepSpeech2.x'"
-      ]
-     },
-     "execution_count": 1,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "%cd ..\n",
-    "%pwd"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 2,
-   "id": "appropriate-theta",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "LICENSE       deepspeech  examples\t\t    requirements.txt  tools\r\n",
-      "README.md     docs\t  libsndfile-1.0.28\t    setup.sh\t      utils\r\n",
-      "README_cn.md  env.sh\t  libsndfile-1.0.28.tar.gz  tests\r\n"
-     ]
-    }
-   ],
-   "source": [
-    "!ls"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 3,
-   "id": "entire-bloom",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/home/ssd5/zhanghui/DeepSpeech2.x/tools/venv/lib/python3.7/site-packages/paddle/fluid/layers/utils.py:26: DeprecationWarning: `np.int` is a deprecated alias for the builtin `int`. To silence this warning, use `int` by itself. Doing this will not modify any behavior and is safe. When replacing `np.int`, you may wish to use e.g. `np.int64` or `np.int32` to specify the precision. If you wish to review your current use, check the release note link for additional information.\n",
-      "Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations\n",
-      "  def convert_to_list(value, n, name, dtype=np.int):\n",
-      "WARNING:root:override cat of paddle.Tensor if exists or register, remove this when fixed!\n",
-      "WARNING:root:register user masked_fill to paddle.Tensor, remove this when fixed!\n",
-      "WARNING:root:register user masked_fill_ to paddle.Tensor, remove this when fixed!\n",
-      "WARNING:root:register user repeat to paddle.Tensor, remove this when fixed!\n",
-      "WARNING:root:register user glu to paddle.nn.functional, remove this when fixed!\n",
-      "WARNING:root:register user GLU to paddle.nn, remove this when fixed!\n",
-      "WARNING:root:register user ConstantPad2d to paddle.nn, remove this when fixed!\n",
-      "WARNING:root:override ctc_loss of paddle.nn.functional if exists, remove this when fixed!\n"
-     ]
-    }
-   ],
-   "source": [
-    "from deepspeech.modules import loss"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 4,
-   "id": "governmental-aircraft",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/home/ssd5/zhanghui/DeepSpeech2.x/tools/venv/lib/python3.7/site-packages/ipykernel/ipkernel.py:283: DeprecationWarning: `should_run_async` will not call `transform_cell` automatically in the future. Please pass the result to `transformed_cell` argument and any exception that happen during thetransform in `preprocessing_exc_tuple` in IPython 7.17 and above.\n",
-      "  and should_run_async(code)\n"
-     ]
-    }
-   ],
-   "source": [
-    "import paddle"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 5,
-   "id": "proprietary-disaster",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "<function deepspeech.modules.repeat(xs: paddle.VarBase, *size: Any) -> paddle.VarBase>"
-      ]
-     },
-     "execution_count": 5,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "paddle.Tensor.repeat"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 6,
-   "id": "first-diagram",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "<property at 0x7fb515eeeb88>"
-      ]
-     },
-     "execution_count": 6,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "paddle.Tensor.size"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 7,
-   "id": "intelligent-david",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "<function paddle.tensor.manipulation.concat(x, axis=0, name=None)>"
-      ]
-     },
-     "execution_count": 7,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "paddle.Tensor.cat"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 12,
-   "id": "bronze-tenant",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "a = paddle.to_tensor([12,32, 10, 12, 123,32 ,4])"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 13,
-   "id": "balanced-bearing",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "7"
-      ]
-     },
-     "execution_count": 13,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "a.size"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 20,
-   "id": "extreme-republic",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def size(xs: paddle.Tensor, *args: int) -> paddle.Tensor:\n",
-    "    nargs = len(args)\n",
-    "    assert (nargs <= 1)\n",
-    "    s = paddle.shape(xs)\n",
-    "    if nargs == 1:\n",
-    "        return s[args[0]]\n",
-    "    else:\n",
-    "        return s\n",
-    "\n",
-    "# logger.warn(\n",
-    "#     \"override size of paddle.Tensor if exists or register, remove this when fixed!\"\n",
-    "# )\n",
-    "paddle.Tensor.size = size"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 21,
-   "id": "gross-addiction",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "Tensor(shape=[1], dtype=int32, place=CPUPlace, stop_gradient=True,\n",
-       "       [7])"
-      ]
-     },
-     "execution_count": 21,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "a.size(0)\n",
-    "a.size()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 22,
-   "id": "adverse-dining",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "Tensor(shape=[1], dtype=int32, place=CPUPlace, stop_gradient=True,\n",
-       "       [7])"
-      ]
-     },
-     "execution_count": 22,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "a.size()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "popular-potato",
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.7.0"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 5
-}
--- a/.notebook/jit_infer.ipynb
+++ b/.notebook/jit_infer.ipynb
@ -1,672 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": 1,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "/home/ssd5/zhanghui/DeepSpeech2.x\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "'/home/ssd5/zhanghui/DeepSpeech2.x'"
-      ]
-     },
-     "execution_count": 1,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "%cd ..\n",
-    "%pwd"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 2,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "2021-03-26 02:55:23,873 - WARNING - register user softmax to paddle, remove this when fixed!\n",
-      "2021-03-26 02:55:23,875 - WARNING - register user sigmoid to paddle, remove this when fixed!\n",
-      "2021-03-26 02:55:23,875 - WARNING - register user relu to paddle, remove this when fixed!\n",
-      "2021-03-26 02:55:23,876 - WARNING - override cat of paddle if exists or register, remove this when fixed!\n",
-      "2021-03-26 02:55:23,876 - WARNING - override eq of paddle.Tensor if exists or register, remove this when fixed!\n",
-      "2021-03-26 02:55:23,877 - WARNING - override contiguous of paddle.Tensor if exists or register, remove this when fixed!\n",
-      "2021-03-26 02:55:23,877 - WARNING - override size of paddle.Tensor (`to_static` do not process `size` property, maybe some `paddle` api dependent on it), remove this when fixed!\n",
-      "2021-03-26 02:55:23,878 - WARNING - register user view to paddle.Tensor, remove this when fixed!\n",
-      "2021-03-26 02:55:23,878 - WARNING - register user view_as to paddle.Tensor, remove this when fixed!\n",
-      "2021-03-26 02:55:23,879 - WARNING - register user masked_fill to paddle.Tensor, remove this when fixed!\n",
-      "2021-03-26 02:55:23,880 - WARNING - register user masked_fill_ to paddle.Tensor, remove this when fixed!\n",
-      "2021-03-26 02:55:23,880 - WARNING - register user fill_ to paddle.Tensor, remove this when fixed!\n",
-      "2021-03-26 02:55:23,881 - WARNING - register user repeat to paddle.Tensor, remove this when fixed!\n",
-      "2021-03-26 02:55:23,881 - WARNING - register user softmax to paddle.Tensor, remove this when fixed!\n",
-      "2021-03-26 02:55:23,882 - WARNING - register user sigmoid to paddle.Tensor, remove this when fixed!\n",
-      "2021-03-26 02:55:23,882 - WARNING - register user relu to paddle.Tensor, remove this when fixed!\n",
-      "2021-03-26 02:55:23,883 - WARNING - register user glu to paddle.nn.functional, remove this when fixed!\n",
-      "2021-03-26 02:55:23,883 - WARNING - override ctc_loss of paddle.nn.functional if exists, remove this when fixed!\n",
-      "2021-03-26 02:55:23,884 - WARNING - register user GLU to paddle.nn, remove this when fixed!\n",
-      "2021-03-26 02:55:23,884 - WARNING - register user ConstantPad2d to paddle.nn, remove this when fixed!\n",
-      "/home/ssd5/zhanghui/DeepSpeech2.x/tools/venv-dev/lib/python3.7/site-packages/scipy/fftpack/__init__.py:103: DeprecationWarning: The module numpy.dual is deprecated.  Instead of using dual, use the functions directly from numpy or scipy.\n",
-      "  from numpy.dual import register_func\n",
-      "/home/ssd5/zhanghui/DeepSpeech2.x/tools/venv-dev/lib/python3.7/site-packages/scipy/special/orthogonal.py:81: DeprecationWarning: `np.int` is a deprecated alias for the builtin `int`. To silence this warning, use `int` by itself. Doing this will not modify any behavior and is safe. When replacing `np.int`, you may wish to use e.g. `np.int64` or `np.int32` to specify the precision. If you wish to review your current use, check the release note link for additional information.\n",
-      "Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations\n",
-      "  from numpy import (exp, inf, pi, sqrt, floor, sin, cos, around, int,\n"
-     ]
-    }
-   ],
-   "source": [
-    "import os\n",
-    "import time\n",
-    "import argparse\n",
-    "import functools\n",
-    "import paddle\n",
-    "import numpy as np\n",
-    "\n",
-    "from deepspeech.utils.socket_server import warm_up_test\n",
-    "from deepspeech.utils.socket_server import AsrTCPServer\n",
-    "from deepspeech.utils.socket_server import AsrRequestHandler\n",
-    "\n",
-    "from deepspeech.training.cli import default_argument_parser\n",
-    "from deepspeech.exps.deepspeech2.config import get_cfg_defaults\n",
-    "\n",
-    "from deepspeech.frontend.utility import read_manifest\n",
-    "from deepspeech.utils.utility import add_arguments, print_arguments\n",
-    "\n",
-    "from deepspeech.models.ds2 import DeepSpeech2Model\n",
-    "from deepspeech.models.ds2 import DeepSpeech2InferModel\n",
-    "from deepspeech.io.dataset import ManifestDataset\n",
-    "\n",
-    "\n",
-    "\n",
-    "from deepspeech.frontend.utility import read_manifest"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 3,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "0.0.0\n",
-      "e7f28d6c0db54eb9c9a810612300b526687e56a6\n",
-      "OFF\n",
-      "OFF\n",
-      "commit: e7f28d6c0db54eb9c9a810612300b526687e56a6\n",
-      "None\n",
-      "0\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/home/ssd5/zhanghui/DeepSpeech2.x/tools/venv-dev/lib/python3.7/site-packages/ipykernel/ipkernel.py:283: DeprecationWarning: `should_run_async` will not call `transform_cell` automatically in the future. Please pass the result to `transformed_cell` argument and any exception that happen during thetransform in `preprocessing_exc_tuple` in IPython 7.17 and above.\n",
-      "  and should_run_async(code)\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "['__builtins__',\n",
-       " '__cached__',\n",
-       " '__doc__',\n",
-       " '__file__',\n",
-       " '__loader__',\n",
-       " '__name__',\n",
-       " '__package__',\n",
-       " '__spec__',\n",
-       " 'commit',\n",
-       " 'full_version',\n",
-       " 'istaged',\n",
-       " 'major',\n",
-       " 'minor',\n",
-       " 'mkl',\n",
-       " 'patch',\n",
-       " 'rc',\n",
-       " 'show',\n",
-       " 'with_mkl']"
-      ]
-     },
-     "execution_count": 3,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "print(paddle.__version__)\n",
-    "print(paddle.version.commit)\n",
-    "print(paddle.version.with_mkl)\n",
-    "print(paddle.version.mkl())\n",
-    "print(paddle.version.show())\n",
-    "print(paddle.version.patch)\n",
-    "dir(paddle.version)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 4,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "data:\n",
-      "  augmentation_config: conf/augmentation.config\n",
-      "  batch_size: 64\n",
-      "  dev_manifest: data/manifest.dev\n",
-      "  keep_transcription_text: False\n",
-      "  max_duration: 27.0\n",
-      "  max_freq: None\n",
-      "  mean_std_filepath: examples/aishell/data/mean_std.npz\n",
-      "  min_duration: 0.0\n",
-      "  n_fft: None\n",
-      "  num_workers: 0\n",
-      "  random_seed: 0\n",
-      "  shuffle_method: batch_shuffle\n",
-      "  sortagrad: True\n",
-      "  specgram_type: linear\n",
-      "  stride_ms: 10.0\n",
-      "  target_dB: -20\n",
-      "  target_sample_rate: 16000\n",
-      "  test_manifest: examples/aishell/data/manifest.test\n",
-      "  train_manifest: data/manifest.train\n",
-      "  use_dB_normalization: True\n",
-      "  vocab_filepath: examples/aishell/data/vocab.txt\n",
-      "  window_ms: 20.0\n",
-      "decoding:\n",
-      "  alpha: 2.6\n",
-      "  batch_size: 128\n",
-      "  beam_size: 300\n",
-      "  beta: 5.0\n",
-      "  cutoff_prob: 0.99\n",
-      "  cutoff_top_n: 40\n",
-      "  decoding_method: ctc_beam_search\n",
-      "  error_rate_type: cer\n",
-      "  lang_model_path: data/lm/zh_giga.no_cna_cmn.prune01244.klm\n",
-      "  num_proc_bsearch: 10\n",
-      "model:\n",
-      "  num_conv_layers: 2\n",
-      "  num_rnn_layers: 3\n",
-      "  rnn_layer_size: 1024\n",
-      "  share_rnn_weights: False\n",
-      "  use_gru: True\n",
-      "training:\n",
-      "  global_grad_clip: 5.0\n",
-      "  lr: 0.0005\n",
-      "  lr_decay: 0.83\n",
-      "  n_epoch: 30\n",
-      "  weight_decay: 1e-06\n",
-      "-----------  Configuration Arguments -----------\n",
-      "checkpoint_path: examples/aishell/ckpt-loss2e-3-0.83-5/checkpoints/step-11725\n",
-      "config: examples/aishell/conf/deepspeech2.yaml\n",
-      "device: gpu\n",
-      "dump_config: None\n",
-      "export_path: None\n",
-      "host_ip: localhost\n",
-      "host_port: 8086\n",
-      "model_dir: None\n",
-      "model_file: examples/aishell/jit.model.pdmodel\n",
-      "nprocs: 1\n",
-      "opts: ['data.test_manifest', 'examples/aishell/data/manifest.test', 'data.mean_std_filepath', 'examples/aishell/data/mean_std.npz', 'data.vocab_filepath', 'examples/aishell/data/vocab.txt']\n",
-      "output: None\n",
-      "params_file: examples/aishell/jit.model.pdiparams\n",
-      "speech_save_dir: demo_cache\n",
-      "use_gpu: False\n",
-      "warmup_manifest: examples/aishell/data/manifest.test\n",
-      "------------------------------------------------\n"
-     ]
-    }
-   ],
-   "source": [
-    "parser = default_argument_parser()\n",
-    "add_arg = functools.partial(add_arguments, argparser=parser)\n",
-    "add_arg('host_ip',          str,\n",
-    "        'localhost',\n",
-    "        \"Server's IP address.\")\n",
-    "add_arg('host_port',        int,    8086,    \"Server's IP port.\")\n",
-    "add_arg('speech_save_dir',  str,\n",
-    "        'demo_cache',\n",
-    "        \"Directory to save demo audios.\")\n",
-    "add_arg('warmup_manifest',  \n",
-    "        str, \n",
-    "        \"examples/aishell/data/manifest.test\", \n",
-    "        \"Filepath of manifest to warm up.\")\n",
-    "add_arg(\n",
-    "    \"--model_file\",\n",
-    "    type=str,\n",
-    "    default=\"examples/aishell/jit.model.pdmodel\",\n",
-    "    help=\"Model filename, Specify this when your model is a combined model.\"\n",
-    ")\n",
-    "add_arg(\n",
-    "    \"--params_file\",\n",
-    "    type=str,\n",
-    "    default=\"examples/aishell/jit.model.pdiparams\",\n",
-    "    help=\n",
-    "    \"Parameter filename, Specify this when your model is a combined model.\"\n",
-    ")\n",
-    "add_arg(\n",
-    "    \"--model_dir\",\n",
-    "    type=str,\n",
-    "    default=None,\n",
-    "    help=\n",
-    "    \"Model dir, If you load a non-combined model, specify the directory of the model.\"\n",
-    ")\n",
-    "add_arg(\"--use_gpu\",type=bool,default=False, help=\"Whether use gpu.\")\n",
-    "\n",
-    "\n",
-    "args = parser.parse_args(\n",
-    "    \"--checkpoint_path examples/aishell/ckpt-loss2e-3-0.83-5/checkpoints/step-11725 --config examples/aishell/conf/deepspeech2.yaml --opts data.test_manifest examples/aishell/data/manifest.test data.mean_std_filepath examples/aishell/data/mean_std.npz  data.vocab_filepath examples/aishell/data/vocab.txt\".split()\n",
-    ")\n",
-    "\n",
-    "\n",
-    "config = get_cfg_defaults()\n",
-    "if args.config:\n",
-    "    config.merge_from_file(args.config)\n",
-    "if args.opts:\n",
-    "    config.merge_from_list(args.opts)\n",
-    "config.freeze()\n",
-    "print(config)\n",
-    "\n",
-    "args.warmup_manifest = config.data.test_manifest\n",
-    "\n",
-    "print_arguments(args)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 5,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "dataset = ManifestDataset(\n",
-    "        config.data.test_manifest,\n",
-    "        config.data.unit_type,\n",
-    "        config.data.vocab_filepath,\n",
-    "        config.data.mean_std_filepath,\n",
-    "        augmentation_config=\"{}\",\n",
-    "        max_duration=config.data.max_duration,\n",
-    "        min_duration=config.data.min_duration,\n",
-    "        stride_ms=config.data.stride_ms,\n",
-    "        window_ms=config.data.window_ms,\n",
-    "        n_fft=config.data.n_fft,\n",
-    "        max_freq=config.data.max_freq,\n",
-    "        target_sample_rate=config.data.target_sample_rate,\n",
-    "        specgram_type=config.data.specgram_type,\n",
-    "        feat_dim=config.data.feat_dim,\n",
-    "        delta_delta=config.data.delat_delta,\n",
-    "        use_dB_normalization=config.data.use_dB_normalization,\n",
-    "        target_dB=config.data.target_dB,\n",
-    "        random_seed=config.data.random_seed,\n",
-    "        keep_transcription_text=True)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 6,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "2021-03-26 02:55:57,930 - INFO - [checkpoint] Rank 0: loaded model from examples/aishell/ckpt-loss2e-3-0.83-5/checkpoints/step-11725.pdparams\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "layer summary:\n",
-      "encoder.conv.conv_in.conv.weight|[32, 1, 41, 11]|14432\n",
-      "encoder.conv.conv_in.bn.weight|[32]|32\n",
-      "encoder.conv.conv_in.bn.bias|[32]|32\n",
-      "encoder.conv.conv_in.bn._mean|[32]|32\n",
-      "encoder.conv.conv_in.bn._variance|[32]|32\n",
-      "encoder.conv.conv_stack.0.conv.weight|[32, 32, 21, 11]|236544\n",
-      "encoder.conv.conv_stack.0.bn.weight|[32]|32\n",
-      "encoder.conv.conv_stack.0.bn.bias|[32]|32\n",
-      "encoder.conv.conv_stack.0.bn._mean|[32]|32\n",
-      "encoder.conv.conv_stack.0.bn._variance|[32]|32\n",
-      "encoder.rnn.rnn_stacks.0.fw_fc.weight|[1312, 3072]|4030464\n",
-      "encoder.rnn.rnn_stacks.0.fw_bn.weight|[3072]|3072\n",
-      "encoder.rnn.rnn_stacks.0.fw_bn.bias|[3072]|3072\n",
-      "encoder.rnn.rnn_stacks.0.fw_bn._mean|[3072]|3072\n",
-      "encoder.rnn.rnn_stacks.0.fw_bn._variance|[3072]|3072\n",
-      "encoder.rnn.rnn_stacks.0.bw_fc.weight|[1312, 3072]|4030464\n",
-      "encoder.rnn.rnn_stacks.0.bw_bn.weight|[3072]|3072\n",
-      "encoder.rnn.rnn_stacks.0.bw_bn.bias|[3072]|3072\n",
-      "encoder.rnn.rnn_stacks.0.bw_bn._mean|[3072]|3072\n",
-      "encoder.rnn.rnn_stacks.0.bw_bn._variance|[3072]|3072\n",
-      "encoder.rnn.rnn_stacks.0.fw_cell.weight_hh|[3072, 1024]|3145728\n",
-      "encoder.rnn.rnn_stacks.0.fw_cell.bias_hh|[3072]|3072\n",
-      "encoder.rnn.rnn_stacks.0.bw_cell.weight_hh|[3072, 1024]|3145728\n",
-      "encoder.rnn.rnn_stacks.0.bw_cell.bias_hh|[3072]|3072\n",
-      "encoder.rnn.rnn_stacks.0.fw_rnn.cell.weight_hh|[3072, 1024]|3145728\n",
-      "encoder.rnn.rnn_stacks.0.fw_rnn.cell.bias_hh|[3072]|3072\n",
-      "encoder.rnn.rnn_stacks.0.bw_rnn.cell.weight_hh|[3072, 1024]|3145728\n",
-      "encoder.rnn.rnn_stacks.0.bw_rnn.cell.bias_hh|[3072]|3072\n",
-      "encoder.rnn.rnn_stacks.1.fw_fc.weight|[2048, 3072]|6291456\n",
-      "encoder.rnn.rnn_stacks.1.fw_bn.weight|[3072]|3072\n",
-      "encoder.rnn.rnn_stacks.1.fw_bn.bias|[3072]|3072\n",
-      "encoder.rnn.rnn_stacks.1.fw_bn._mean|[3072]|3072\n",
-      "encoder.rnn.rnn_stacks.1.fw_bn._variance|[3072]|3072\n",
-      "encoder.rnn.rnn_stacks.1.bw_fc.weight|[2048, 3072]|6291456\n",
-      "encoder.rnn.rnn_stacks.1.bw_bn.weight|[3072]|3072\n",
-      "encoder.rnn.rnn_stacks.1.bw_bn.bias|[3072]|3072\n",
-      "encoder.rnn.rnn_stacks.1.bw_bn._mean|[3072]|3072\n",
-      "encoder.rnn.rnn_stacks.1.bw_bn._variance|[3072]|3072\n",
-      "encoder.rnn.rnn_stacks.1.fw_cell.weight_hh|[3072, 1024]|3145728\n",
-      "encoder.rnn.rnn_stacks.1.fw_cell.bias_hh|[3072]|3072\n",
-      "encoder.rnn.rnn_stacks.1.bw_cell.weight_hh|[3072, 1024]|3145728\n",
-      "encoder.rnn.rnn_stacks.1.bw_cell.bias_hh|[3072]|3072\n",
-      "encoder.rnn.rnn_stacks.1.fw_rnn.cell.weight_hh|[3072, 1024]|3145728\n",
-      "encoder.rnn.rnn_stacks.1.fw_rnn.cell.bias_hh|[3072]|3072\n",
-      "encoder.rnn.rnn_stacks.1.bw_rnn.cell.weight_hh|[3072, 1024]|3145728\n",
-      "encoder.rnn.rnn_stacks.1.bw_rnn.cell.bias_hh|[3072]|3072\n",
-      "encoder.rnn.rnn_stacks.2.fw_fc.weight|[2048, 3072]|6291456\n",
-      "encoder.rnn.rnn_stacks.2.fw_bn.weight|[3072]|3072\n",
-      "encoder.rnn.rnn_stacks.2.fw_bn.bias|[3072]|3072\n",
-      "encoder.rnn.rnn_stacks.2.fw_bn._mean|[3072]|3072\n",
-      "encoder.rnn.rnn_stacks.2.fw_bn._variance|[3072]|3072\n",
-      "encoder.rnn.rnn_stacks.2.bw_fc.weight|[2048, 3072]|6291456\n",
-      "encoder.rnn.rnn_stacks.2.bw_bn.weight|[3072]|3072\n",
-      "encoder.rnn.rnn_stacks.2.bw_bn.bias|[3072]|3072\n",
-      "encoder.rnn.rnn_stacks.2.bw_bn._mean|[3072]|3072\n",
-      "encoder.rnn.rnn_stacks.2.bw_bn._variance|[3072]|3072\n",
-      "encoder.rnn.rnn_stacks.2.fw_cell.weight_hh|[3072, 1024]|3145728\n",
-      "encoder.rnn.rnn_stacks.2.fw_cell.bias_hh|[3072]|3072\n",
-      "encoder.rnn.rnn_stacks.2.bw_cell.weight_hh|[3072, 1024]|3145728\n",
-      "encoder.rnn.rnn_stacks.2.bw_cell.bias_hh|[3072]|3072\n",
-      "encoder.rnn.rnn_stacks.2.fw_rnn.cell.weight_hh|[3072, 1024]|3145728\n",
-      "encoder.rnn.rnn_stacks.2.fw_rnn.cell.bias_hh|[3072]|3072\n",
-      "encoder.rnn.rnn_stacks.2.bw_rnn.cell.weight_hh|[3072, 1024]|3145728\n",
-      "encoder.rnn.rnn_stacks.2.bw_rnn.cell.bias_hh|[3072]|3072\n",
-      "decoder.ctc_lo.weight|[2048, 4300]|8806400\n",
-      "decoder.ctc_lo.bias|[4300]|4300\n",
-      "layer has 66 parameters, 80148012 elements.\n"
-     ]
-    }
-   ],
-   "source": [
-    "model = DeepSpeech2InferModel.from_pretrained(dataset, config,\n",
-    "                                             args.checkpoint_path)\n",
-    "model.eval()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 22,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "examples/aishell/jit.model.pdmodel\n",
-      "examples/aishell/jit.model.pdiparams\n",
-      "0\n",
-      "False\n"
-     ]
-    }
-   ],
-   "source": [
-    "\n",
-    "from paddle.inference import Config\n",
-    "from paddle.inference import PrecisionType\n",
-    "from paddle.inference import create_predictor\n",
-    "\n",
-    "args.use_gpu=False\n",
-    "paddle.set_device('cpu')\n",
-    "\n",
-    "def init_predictor(args):\n",
-    "    if args.model_dir is not None:\n",
-    "        config = Config(args.model_dir)\n",
-    "    else:\n",
-    "        config = Config(args.model_file, args.params_file)\n",
-    "\n",
-    "    if args.use_gpu:\n",
-    "        config.enable_use_gpu(memory_pool_init_size_mb=1000, device_id=0)\n",
-    "#         config.enable_tensorrt_engine(precision_mode=PrecisionType.Float32,\n",
-    "#                               use_calib_mode=True) # 开启TensorRT预测，精度为fp32，开启int8离线量化\n",
-    "    else:\n",
-    "        # If not specific mkldnn, you can set the blas thread.\n",
-    "        # The thread num should not be greater than the number of cores in the CPU.\n",
-    "        config.set_cpu_math_library_num_threads(1)\n",
-    "        config.enable_mkldnn()\n",
-    "        \n",
-    "    config.enable_memory_optim()\n",
-    "    config.switch_ir_optim(True)\n",
-    "    \n",
-    "    print(config.model_dir())\n",
-    "    print(config.prog_file())\n",
-    "    print(config.params_file())\n",
-    "    print(config.gpu_device_id())\n",
-    "    print(args.use_gpu)\n",
-    "    predictor = create_predictor(config)\n",
-    "    return predictor\n",
-    "\n",
-    "def run(predictor, audio, audio_len):\n",
-    "    # copy img data to input tensor\n",
-    "    input_names = predictor.get_input_names()\n",
-    "    for i, name in enumerate(input_names):\n",
-    "        print(\"input:\", i, name)\n",
-    "        \n",
-    "    audio_tensor = predictor.get_input_handle('audio')\n",
-    "    audio_tensor.reshape(audio.shape)\n",
-    "    audio_tensor.copy_from_cpu(audio.copy())\n",
-    "    \n",
-    "    audiolen_tensor = predictor.get_input_handle('audio_len')\n",
-    "    audiolen_tensor.reshape(audio_len.shape)\n",
-    "    audiolen_tensor.copy_from_cpu(audio_len.copy())\n",
-    "\n",
-    "    output_names = predictor.get_output_names()\n",
-    "    for i, name in enumerate(output_names):\n",
-    "        print(\"output:\", i, name)\n",
-    "\n",
-    "    # do the inference\n",
-    "    predictor.run()\n",
-    "\n",
-    "    results = []\n",
-    "    # get out data from output tensor\n",
-    "    output_names = predictor.get_output_names()\n",
-    "    for i, name in enumerate(output_names):\n",
-    "        output_tensor = predictor.get_output_handle(name)\n",
-    "        output_data = output_tensor.copy_to_cpu()\n",
-    "        results.append(output_data)\n",
-    "\n",
-    "    return results\n",
-    "\n",
-    "\n",
-    "predictor = init_predictor(args)\n",
-    "\n",
-    "def file_to_transcript(filename):\n",
-    "        print(filename)\n",
-    "        feature = dataset.process_utterance(filename, \"\")\n",
-    "        audio = np.array([feature[0]]).astype('float32')  #[1, D, T]\n",
-    "        audio_len = feature[0].shape[1]\n",
-    "        audio_len = np.array([audio_len]).astype('int64')  # [1]\n",
-    "        \n",
-    "        \n",
-    "        i_probs = run(predictor, audio, audio_len)\n",
-    "        print('jit:', i_probs[0], type(i_probs[0]))\n",
-    "        \n",
-    "        audio = paddle.to_tensor(audio)\n",
-    "        audio_len = paddle.to_tensor(audio_len)\n",
-    "        print(audio.shape)\n",
-    "        print(audio_len.shape)\n",
-    "        \n",
-    "        #eouts, eouts_len = model.encoder(audio, audio_len)\n",
-    "        #probs = model.decoder.softmax(eouts)\n",
-    "        probs = model.forward(audio, audio_len)\n",
-    "        print('paddle:', probs.numpy())\n",
-    "        \n",
-    "        flag = np.allclose(i_probs[0], probs.numpy())\n",
-    "        print(flag)\n",
-    "        \n",
-    "        return probs\n",
-    "\n",
-    "#         result_transcript = model.decode(\n",
-    "#             audio,\n",
-    "#             audio_len,\n",
-    "#             vocab_list=dataset.vocab_list,\n",
-    "#             decoding_method=config.decoding.decoding_method,\n",
-    "#             lang_model_path=config.decoding.lang_model_path,\n",
-    "#             beam_alpha=config.decoding.alpha,\n",
-    "#             beam_beta=config.decoding.beta,\n",
-    "#             beam_size=config.decoding.beam_size,\n",
-    "#             cutoff_prob=config.decoding.cutoff_prob,\n",
-    "#             cutoff_top_n=config.decoding.cutoff_top_n,\n",
-    "#             num_processes=config.decoding.num_proc_bsearch)\n",
-    "#         return result_transcript[0]"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 23,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Warm-up Test Case %d: %s 0 /home/ssd5/zhanghui/DeepSpeech2.x/examples/aishell/../dataset/aishell/data_aishell/wav/test/S0764/BAC009S0764W0124.wav\n",
-      "/home/ssd5/zhanghui/DeepSpeech2.x/examples/aishell/../dataset/aishell/data_aishell/wav/test/S0764/BAC009S0764W0124.wav\n",
-      "input: 0 audio\n",
-      "input: 1 audio_len\n",
-      "output: 0 tmp_75\n",
-      "jit: [[[8.91786298e-12 4.45648032e-12 3.67572750e-09 ... 8.91767563e-12\n",
-      "   8.91573707e-12 4.64317296e-08]\n",
-      "  [1.55950222e-15 2.62794089e-14 4.50423509e-12 ... 1.55944271e-15\n",
-      "   1.55891342e-15 9.99992609e-01]\n",
-      "  [1.24638127e-17 7.61802427e-16 2.93265812e-14 ... 1.24633371e-17\n",
-      "   1.24587264e-17 1.00000000e+00]\n",
-      "  ...\n",
-      "  [4.37488240e-15 2.43676260e-12 1.98770514e-12 ... 4.37479896e-15\n",
-      "   4.37354747e-15 1.00000000e+00]\n",
-      "  [3.89334696e-13 1.66754856e-11 1.42900388e-11 ... 3.89329492e-13\n",
-      "   3.89252270e-13 1.00000000e+00]\n",
-      "  [1.00349985e-10 2.56293708e-10 2.91177582e-10 ... 1.00347876e-10\n",
-      "   1.00334095e-10 9.99998808e-01]]] <class 'numpy.ndarray'>\n",
-      "[1, 161, 522]\n",
-      "[1]\n",
-      "paddle: [[[8.91789680e-12 4.45649724e-12 3.67574149e-09 ... 8.91770945e-12\n",
-      "   8.91577090e-12 4.64319072e-08]\n",
-      "  [1.55950222e-15 2.62794089e-14 4.50423509e-12 ... 1.55944271e-15\n",
-      "   1.55891342e-15 9.99992609e-01]\n",
-      "  [1.24638599e-17 7.61805339e-16 2.93267472e-14 ... 1.24633842e-17\n",
-      "   1.24587735e-17 1.00000000e+00]\n",
-      "  ...\n",
-      "  [4.37488240e-15 2.43676737e-12 1.98770514e-12 ... 4.37479896e-15\n",
-      "   4.37354747e-15 1.00000000e+00]\n",
-      "  [3.89336187e-13 1.66755481e-11 1.42900925e-11 ... 3.89330983e-13\n",
-      "   3.89253761e-13 1.00000000e+00]\n",
-      "  [1.00349985e-10 2.56293708e-10 2.91177582e-10 ... 1.00347876e-10\n",
-      "   1.00334095e-10 9.99998808e-01]]]\n",
-      "False\n"
-     ]
-    }
-   ],
-   "source": [
-    "manifest = read_manifest(args.warmup_manifest)\n",
-    "\n",
-    "for idx, sample in enumerate(manifest[:1]):\n",
-    "    print(\"Warm-up Test Case %d: %s\", idx, sample['audio_filepath'])\n",
-    "    start_time = time.time()\n",
-    "    transcript = file_to_transcript(sample['audio_filepath'])\n",
-    "    finish_time = time.time()\n",
-    "#     print(\"Response Time: %f, Transcript: %s\" %\n",
-    "#           (finish_time - start_time, transcript))\n",
-    "    break"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 21,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "(1, 161, 522) (1,)\n",
-      "input: 0 audio\n",
-      "input: 1 audio_len\n",
-      "output: 0 tmp_75\n",
-      "jit: [[[8.91789680e-12 4.45649724e-12 3.67574149e-09 ... 8.91770945e-12\n",
-      "   8.91577090e-12 4.64319072e-08]\n",
-      "  [1.55950222e-15 2.62794089e-14 4.50423509e-12 ... 1.55944271e-15\n",
-      "   1.55891342e-15 9.99992609e-01]\n",
-      "  [1.24638599e-17 7.61805339e-16 2.93267472e-14 ... 1.24633842e-17\n",
-      "   1.24587735e-17 1.00000000e+00]\n",
-      "  ...\n",
-      "  [4.37488240e-15 2.43676737e-12 1.98770514e-12 ... 4.37479896e-15\n",
-      "   4.37354747e-15 1.00000000e+00]\n",
-      "  [3.89336187e-13 1.66755481e-11 1.42900925e-11 ... 3.89330983e-13\n",
-      "   3.89253761e-13 1.00000000e+00]\n",
-      "  [1.00349985e-10 2.56293708e-10 2.91177582e-10 ... 1.00347876e-10\n",
-      "   1.00334095e-10 9.99998808e-01]]]\n"
-     ]
-    }
-   ],
-   "source": [
-    "def test(filename):\n",
-    "    feature = dataset.process_utterance(filename, \"\")\n",
-    "    audio = np.array([feature[0]]).astype('float32')  #[1, D, T]\n",
-    "    audio_len = feature[0].shape[1]\n",
-    "    audio_len = np.array([audio_len]).astype('int64')  # [1]\n",
-    "    \n",
-    "    print(audio.shape, audio_len.shape)\n",
-    "\n",
-    "    i_probs = run(predictor, audio, audio_len)\n",
-    "    print('jit:', i_probs[0])\n",
-    "    return i_probs\n",
-    "    \n",
-    "probs = test(sample['audio_filepath'])"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.7.0"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}
--- a/.notebook/layer_norm_test.ipynb
+++ b/.notebook/layer_norm_test.ipynb
@ -1,229 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": 32,
-   "id": "academic-surname",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import paddle\n",
-    "from paddle import nn"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 33,
-   "id": "fundamental-treasure",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Parameter containing:\n",
-      "Tensor(shape=[256], dtype=float32, place=CUDAPlace(0), stop_gradient=False,\n",
-      "       [1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.])\n",
-      "Parameter containing:\n",
-      "Tensor(shape=[256], dtype=float32, place=CUDAPlace(0), stop_gradient=False,\n",
-      "       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])\n"
-     ]
-    }
-   ],
-   "source": [
-    "L = nn.LayerNorm(256, epsilon=1e-12)\n",
-    "for p in L.parameters():\n",
-    "    print(p)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 34,
-   "id": "consolidated-elephant",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import numpy as np\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 46,
-   "id": "moderate-noise",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "float64\n"
-     ]
-    }
-   ],
-   "source": [
-    "x = np.random.randn(2, 51, 256)\n",
-    "print(x.dtype)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 47,
-   "id": "cooked-progressive",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "y = L(paddle.to_tensor(x, dtype='float32'))"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 48,
-   "id": "optimum-milwaukee",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import torch"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 49,
-   "id": "viral-indian",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Parameter containing:\n",
-      "tensor([1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n",
-      "        1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n",
-      "        1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n",
-      "        1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n",
-      "        1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n",
-      "        1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n",
-      "        1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n",
-      "        1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n",
-      "        1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n",
-      "        1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n",
-      "        1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n",
-      "        1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n",
-      "        1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n",
-      "        1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,\n",
-      "        1., 1., 1., 1.], requires_grad=True)\n",
-      "Parameter containing:\n",
-      "tensor([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,\n",
-      "        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,\n",
-      "        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,\n",
-      "        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,\n",
-      "        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,\n",
-      "        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,\n",
-      "        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,\n",
-      "        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,\n",
-      "        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,\n",
-      "        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,\n",
-      "        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],\n",
-      "       requires_grad=True)\n"
-     ]
-    }
-   ],
-   "source": [
-    "TL = torch.nn.LayerNorm(256, eps=1e-12)\n",
-    "for p in TL.parameters():\n",
-    "    print(p)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 50,
-   "id": "skilled-vietnamese",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "ty = TL(torch.tensor(x, dtype=torch.float32))"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 51,
-   "id": "incorrect-allah",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "False"
-      ]
-     },
-     "execution_count": 51,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "np.allclose(y.numpy(), ty.detach().numpy())"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "prostate-cameroon",
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 52,
-   "id": "governmental-surge",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "True"
-      ]
-     },
-     "execution_count": 52,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "x = np.random.randn(2, 256)\n",
-    "y = L(paddle.to_tensor(x, dtype='float32'))\n",
-    "ty = TL(torch.tensor(x, dtype=torch.float32))\n",
-    "np.allclose(y.numpy(), ty.detach().numpy())"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "confidential-jacket",
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.7.0"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 5
-}
--- a/.notebook/mask_and_masked_fill_test.ipynb
+++ b/.notebook/mask_and_masked_fill_test.ipynb
@ -1,449 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": 1,
-   "id": "primary-organic",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import torch"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 38,
-   "id": "stopped-semester",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def mask_finished_scores(score: torch.Tensor,\n",
-    "                         flag: torch.Tensor) -> torch.Tensor:\n",
-    "    \"\"\"\n",
-    "    If a sequence is finished, we only allow one alive branch. This function\n",
-    "    aims to give one branch a zero score and the rest -inf score.\n",
-    "    Args:\n",
-    "        score (torch.Tensor): A real value array with shape\n",
-    "            (batch_size * beam_size, beam_size).\n",
-    "        flag (torch.Tensor): A bool array with shape\n",
-    "            (batch_size * beam_size, 1).\n",
-    "    Returns:\n",
-    "        torch.Tensor: (batch_size * beam_size, beam_size).\n",
-    "    \"\"\"\n",
-    "    beam_size = score.size(-1)\n",
-    "    zero_mask = torch.zeros_like(flag, dtype=torch.bool)\n",
-    "    if beam_size > 1:\n",
-    "        unfinished = torch.cat((zero_mask, flag.repeat([1, beam_size - 1])),\n",
-    "                               dim=1)\n",
-    "        finished = torch.cat((flag, zero_mask.repeat([1, beam_size - 1])),\n",
-    "                             dim=1)\n",
-    "    else:\n",
-    "        unfinished = zero_mask\n",
-    "        finished = flag\n",
-    "    print(unfinished)\n",
-    "    print(finished)\n",
-    "    score.masked_fill_(unfinished, -float('inf'))\n",
-    "    score.masked_fill_(finished, 0)\n",
-    "    return score"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 58,
-   "id": "agreed-portuguese",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "tensor([[ True],\n",
-      "        [False]])\n",
-      "tensor([[-0.8841,  0.7381, -0.9986],\n",
-      "        [ 0.2675, -0.7971,  0.3798]])\n",
-      "tensor([[ True,  True],\n",
-      "        [False, False]])\n"
-     ]
-    }
-   ],
-   "source": [
-    "score = torch.randn((2, 3))\n",
-    "flag = torch.ones((2, 1), dtype=torch.bool)\n",
-    "flag[1] = False\n",
-    "print(flag)\n",
-    "print(score)\n",
-    "print(flag.repeat([1, 2]))"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 59,
-   "id": "clean-aspect",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "tensor([[False,  True,  True],\n",
-      "        [False, False, False]])\n",
-      "tensor([[ True, False, False],\n",
-      "        [False, False, False]])\n",
-      "tensor([[ 0.0000,    -inf,    -inf],\n",
-      "        [ 0.2675, -0.7971,  0.3798]])\n",
-      "tensor([[ 0.0000,    -inf,    -inf],\n",
-      "        [ 0.2675, -0.7971,  0.3798]])\n"
-     ]
-    }
-   ],
-   "source": [
-    "r  = mask_finished_scores(score, flag)\n",
-    "print(r)\n",
-    "print(score)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 55,
-   "id": "thrown-airline",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Tensor(shape=[2, 1], dtype=bool, place=CUDAPlace(0), stop_gradient=True,\n",
-      "       [[True ],\n",
-      "        [False]])\n",
-      "Tensor(shape=[2, 3], dtype=float32, place=CUDAPlace(0), stop_gradient=True,\n",
-      "       [[ 2.05994511,  1.87704289,  0.01988174],\n",
-      "        [-0.40165186,  0.77547729, -0.64469045]])\n",
-      "Tensor(shape=[2, 2], dtype=bool, place=CUDAPlace(0), stop_gradient=True,\n",
-      "       [[True , True ],\n",
-      "        [False, False]])\n"
-     ]
-    }
-   ],
-   "source": [
-    "import paddle\n",
-    "\n",
-    "score = paddle.randn((2, 3))\n",
-    "flag = paddle.ones((2, 1), dtype='bool')\n",
-    "flag[1] = False\n",
-    "print(flag)\n",
-    "print(score)\n",
-    "print(flag.tile([1, 2]))"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 56,
-   "id": "internal-patent",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Tensor(shape=[2, 3], dtype=bool, place=CUDAPlace(0), stop_gradient=True,\n",
-      "       [[False, True , True ],\n",
-      "        [False, False, False]])\n",
-      "Tensor(shape=[2, 3], dtype=bool, place=CUDAPlace(0), stop_gradient=True,\n",
-      "       [[True , False, False],\n",
-      "        [False, False, False]])\n",
-      "x Tensor(shape=[2, 3], dtype=float32, place=CUDAPlace(0), stop_gradient=True,\n",
-      "       [[ 2.05994511,  1.87704289,  0.01988174],\n",
-      "        [-0.40165186,  0.77547729, -0.64469045]])\n",
-      "2 Tensor(shape=[2, 3], dtype=float32, place=CUDAPlace(0), stop_gradient=True,\n",
-      "       [[ 2.05994511,  1.87704289,  0.01988174],\n",
-      "        [-0.40165186,  0.77547729, -0.64469045]])\n",
-      "3 Tensor(shape=[2, 3], dtype=float32, place=CUDAPlace(0), stop_gradient=True,\n",
-      "       [[ 2.05994511, -inf.      , -inf.      ],\n",
-      "        [-0.40165186,  0.77547729, -0.64469045]])\n",
-      "x Tensor(shape=[2, 3], dtype=float32, place=CUDAPlace(0), stop_gradient=True,\n",
-      "       [[ 2.05994511, -inf.      , -inf.      ],\n",
-      "        [-0.40165186,  0.77547729, -0.64469045]])\n",
-      "2 Tensor(shape=[2, 3], dtype=float32, place=CUDAPlace(0), stop_gradient=True,\n",
-      "       [[ 2.05994511, -inf.      , -inf.      ],\n",
-      "        [-0.40165186,  0.77547729, -0.64469045]])\n",
-      "3 Tensor(shape=[2, 3], dtype=float32, place=CUDAPlace(0), stop_gradient=True,\n",
-      "       [[ 0.        , -inf.      , -inf.      ],\n",
-      "        [-0.40165186,  0.77547729, -0.64469045]])\n",
-      "Tensor(shape=[2, 3], dtype=float32, place=CUDAPlace(0), stop_gradient=True,\n",
-      "       [[ 0.        , -inf.      , -inf.      ],\n",
-      "        [-0.40165186,  0.77547729, -0.64469045]])\n"
-     ]
-    }
-   ],
-   "source": [
-    "paddle.bool = 'bool'\n",
-    "\n",
-    "def masked_fill(xs:paddle.Tensor, mask:paddle.Tensor, value:float):\n",
-    "    print(xs)\n",
-    "    trues = paddle.ones_like(xs) * value\n",
-    "    assert xs.shape == mask.shape\n",
-    "    xs = paddle.where(mask, trues, xs)\n",
-    "    return xs\n",
-    "\n",
-    "def masked_fill_(xs:paddle.Tensor, mask:paddle.Tensor, value:float):\n",
-    "    print('x', xs)\n",
-    "    trues = paddle.ones_like(xs) * value\n",
-    "    assert xs.shape == mask.shape\n",
-    "    ret = paddle.where(mask, trues, xs)\n",
-    "    print('2', xs)\n",
-    "    paddle.assign(ret, output=xs)\n",
-    "    print('3', xs)\n",
-    "\n",
-    "paddle.Tensor.masked_fill = masked_fill\n",
-    "paddle.Tensor.masked_fill_ = masked_fill_\n",
-    "\n",
-    "def mask_finished_scores_pd(score: paddle.Tensor,\n",
-    "                         flag: paddle.Tensor) -> paddle.Tensor:\n",
-    "    \"\"\"\n",
-    "    If a sequence is finished, we only allow one alive branch. This function\n",
-    "    aims to give one branch a zero score and the rest -inf score.\n",
-    "    Args:\n",
-    "        score (torch.Tensor): A real value array with shape\n",
-    "            (batch_size * beam_size, beam_size).\n",
-    "        flag (torch.Tensor): A bool array with shape\n",
-    "            (batch_size * beam_size, 1).\n",
-    "    Returns:\n",
-    "        torch.Tensor: (batch_size * beam_size, beam_size).\n",
-    "    \"\"\"\n",
-    "    beam_size = score.shape[-1]\n",
-    "    zero_mask = paddle.zeros_like(flag, dtype=paddle.bool)\n",
-    "    if beam_size > 1:\n",
-    "        unfinished = paddle.concat((zero_mask, flag.tile([1, beam_size - 1])),\n",
-    "                               axis=1)\n",
-    "        finished = paddle.concat((flag, zero_mask.tile([1, beam_size - 1])),\n",
-    "                             axis=1)\n",
-    "    else:\n",
-    "        unfinished = zero_mask\n",
-    "        finished = flag\n",
-    "    print(unfinished)\n",
-    "    print(finished)\n",
-    "    \n",
-    "    #score.masked_fill_(unfinished, -float('inf'))\n",
-    "    #score.masked_fill_(finished, 0)\n",
-    "#     infs = paddle.ones_like(score) * -float('inf')\n",
-    "#     score = paddle.where(unfinished, infs, score)\n",
-    "#     score = paddle.where(finished, paddle.zeros_like(score), score)\n",
-    "\n",
-    "#     score = score.masked_fill(unfinished, -float('inf'))\n",
-    "#     score = score.masked_fill(finished, 0)\n",
-    "    score.masked_fill_(unfinished, -float('inf'))\n",
-    "    score.masked_fill_(finished, 0)\n",
-    "    return score\n",
-    "\n",
-    "r  = mask_finished_scores_pd(score, flag)\n",
-    "print(r)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 57,
-   "id": "vocal-prime",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "<bound method PyCapsule.value of Tensor(shape=[2, 3], dtype=float32, place=CUDAPlace(0), stop_gradient=True,\n",
-       "       [[ 0.        , -inf.      , -inf.      ],\n",
-       "        [-0.40165186,  0.77547729, -0.64469045]])>"
-      ]
-     },
-     "execution_count": 57,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "score.value"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 71,
-   "id": "bacterial-adolescent",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from typing import Union, Any"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 72,
-   "id": "absent-fiber",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def repeat(xs : paddle.Tensor, *size: Any):\n",
-    "    print(size)\n",
-    "    return paddle.tile(xs, size)\n",
-    "paddle.Tensor.repeat = repeat"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 73,
-   "id": "material-harbor",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "(1, 2)\n",
-      "Tensor(shape=[2, 2], dtype=bool, place=CUDAPlace(0), stop_gradient=True,\n",
-      "       [[True , True ],\n",
-      "        [False, False]])\n"
-     ]
-    }
-   ],
-   "source": [
-    "flag = paddle.ones((2, 1), dtype='bool')\n",
-    "flag[1] = False\n",
-    "print(flag.repeat(1, 2))"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 84,
-   "id": "acute-brighton",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "(Tensor(shape=[1], dtype=int64, place=CUDAPlace(0), stop_gradient=True,\n",
-      "       [1]), 2)\n",
-      "Tensor(shape=[2, 2], dtype=bool, place=CUDAPlace(0), stop_gradient=True,\n",
-      "       [[True , True ],\n",
-      "        [False, False]])\n"
-     ]
-    }
-   ],
-   "source": [
-    "flag = paddle.ones((2, 1), dtype='bool')\n",
-    "flag[1] = False\n",
-    "print(flag.repeat(paddle.to_tensor(1), 2))"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 85,
-   "id": "european-rugby",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def size(xs, *args: int):\n",
-    "    nargs = len(args)\n",
-    "    s = paddle.shape(xs)\n",
-    "    assert(nargs <= 1)\n",
-    "    if nargs == 1:\n",
-    "        return s[args[0]]\n",
-    "    else:\n",
-    "        return s\n",
-    "paddle.Tensor.size = size"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 86,
-   "id": "moral-special",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "Tensor(shape=[2], dtype=int32, place=CPUPlace, stop_gradient=True,\n",
-       "       [2, 1])"
-      ]
-     },
-     "execution_count": 86,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "flag.size()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 87,
-   "id": "ahead-coach",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "Tensor(shape=[1], dtype=int32, place=CPUPlace, stop_gradient=True,\n",
-       "       [1])"
-      ]
-     },
-     "execution_count": 87,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "flag.size(1)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 88,
-   "id": "incomplete-fitness",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "Tensor(shape=[1], dtype=int32, place=CPUPlace, stop_gradient=True,\n",
-       "       [2])"
-      ]
-     },
-     "execution_count": 88,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "flag.size(0)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "upset-connectivity",
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.7.0"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 5
-}
--- a/.notebook/position_embeding_check.ipynb
+++ b/.notebook/position_embeding_check.ipynb
@ -1,231 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": 2,
-   "id": "designing-borough",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/workspace/DeepSpeech-2.x/tools/venv/lib/python3.7/site-packages/ipykernel/ipkernel.py:283: DeprecationWarning: `should_run_async` will not call `transform_cell` automatically in the future. Please pass the result to `transformed_cell` argument and any exception that happen during thetransform in `preprocessing_exc_tuple` in IPython 7.17 and above.\n",
-      "  and should_run_async(code)\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "[[ 0.0000000e+00  0.0000000e+00  0.0000000e+00 ...  0.0000000e+00\n",
-      "   0.0000000e+00  0.0000000e+00]\n",
-      " [ 8.4147096e-01  8.0196178e-01  7.6172036e-01 ...  1.2409373e-04\n",
-      "   1.1547816e-04  1.0746076e-04]\n",
-      " [ 9.0929741e-01  9.5814437e-01  9.8704624e-01 ...  2.4818745e-04\n",
-      "   2.3095631e-04  2.1492151e-04]\n",
-      " ...\n",
-      " [ 3.7960774e-01  7.4510968e-01  7.3418564e-01 ...  1.2036801e-02\n",
-      "   1.1201146e-02  1.0423505e-02]\n",
-      " [-5.7338190e-01 -8.9752287e-02 -4.1488394e-02 ...  1.2160885e-02\n",
-      "   1.1316618e-02  1.0530960e-02]\n",
-      " [-9.9920684e-01 -8.5234123e-01 -7.8794664e-01 ...  1.2284970e-02\n",
-      "   1.1432089e-02  1.0638415e-02]]\n",
-      "True\n",
-      "True\n"
-     ]
-    }
-   ],
-   "source": [
-    "import torch\n",
-    "import math\n",
-    "import numpy as np\n",
-    "\n",
-    "max_len=100\n",
-    "d_model=256\n",
-    "\n",
-    "pe = torch.zeros(max_len, d_model)\n",
-    "position = torch.arange(0, max_len,\n",
-    "                        dtype=torch.float32).unsqueeze(1)\n",
-    "toruch_position = position\n",
-    "div_term = torch.exp(\n",
-    "    torch.arange(0, d_model, 2, dtype=torch.float32) *\n",
-    "    -(math.log(10000.0) / d_model))\n",
-    "tourch_div_term = div_term.cpu().detach().numpy()\n",
-    "\n",
-    "\n",
-    "\n",
-    "torhc_sin = torch.sin(position * div_term)\n",
-    "torhc_cos = torch.cos(position * div_term)\n",
-    "print(torhc_sin.cpu().detach().numpy())\n",
-    "np_sin = np.sin((position * div_term).cpu().detach().numpy())\n",
-    "np_cos = np.cos((position * div_term).cpu().detach().numpy())\n",
-    "print(np.allclose(np_sin, torhc_sin.cpu().detach().numpy()))\n",
-    "print(np.allclose(np_cos, torhc_cos.cpu().detach().numpy()))\n",
-    "pe[:, 0::2] = torhc_sin\n",
-    "pe[:, 1::2] = torhc_cos\n",
-    "tourch_pe = pe.cpu().detach().numpy()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 5,
-   "id": "swiss-referral",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "True\n",
-      "True\n",
-      "False\n",
-      "False\n",
-      "False\n",
-      "False\n",
-      "[[ 1.          1.          1.         ...  1.          1.\n",
-      "   1.        ]\n",
-      " [ 0.5403023   0.59737533  0.6479059  ...  1.          1.\n",
-      "   1.        ]\n",
-      " [-0.41614684 -0.28628543 -0.1604359  ...  0.99999994  1.\n",
-      "   1.        ]\n",
-      " ...\n",
-      " [-0.92514753 -0.66694194 -0.67894876 ...  0.9999276   0.99993724\n",
-      "   0.9999457 ]\n",
-      " [-0.81928825 -0.9959641  -0.999139   ...  0.99992603  0.999936\n",
-      "   0.99994457]\n",
-      " [ 0.03982088 -0.52298605 -0.6157435  ...  0.99992454  0.9999347\n",
-      "   0.99994344]]\n",
-      "----\n",
-      "[[ 1.          1.          1.         ...  1.          1.\n",
-      "   1.        ]\n",
-      " [ 0.54030234  0.59737533  0.6479059  ...  1.          1.\n",
-      "   1.        ]\n",
-      " [-0.41614684 -0.28628543 -0.1604359  ...  1.          1.\n",
-      "   1.        ]\n",
-      " ...\n",
-      " [-0.92514753 -0.66694194 -0.67894876 ...  0.9999276   0.9999373\n",
-      "   0.9999457 ]\n",
-      " [-0.81928825 -0.9959641  -0.999139   ...  0.99992603  0.999936\n",
-      "   0.99994457]\n",
-      " [ 0.03982088 -0.5229861  -0.6157435  ...  0.99992454  0.9999347\n",
-      "   0.99994344]]\n",
-      ")))))))\n",
-      "[[ 0.0000000e+00  0.0000000e+00  0.0000000e+00 ...  0.0000000e+00\n",
-      "   0.0000000e+00  0.0000000e+00]\n",
-      " [ 8.4147096e-01  8.0196178e-01  7.6172036e-01 ...  1.2409373e-04\n",
-      "   1.1547816e-04  1.0746076e-04]\n",
-      " [ 9.0929741e-01  9.5814437e-01  9.8704624e-01 ...  2.4818745e-04\n",
-      "   2.3095631e-04  2.1492151e-04]\n",
-      " ...\n",
-      " [ 3.7960774e-01  7.4510968e-01  7.3418564e-01 ...  1.2036801e-02\n",
-      "   1.1201146e-02  1.0423505e-02]\n",
-      " [-5.7338190e-01 -8.9752287e-02 -4.1488394e-02 ...  1.2160885e-02\n",
-      "   1.1316618e-02  1.0530960e-02]\n",
-      " [-9.9920684e-01 -8.5234123e-01 -7.8794664e-01 ...  1.2284970e-02\n",
-      "   1.1432089e-02  1.0638415e-02]]\n",
-      "----\n",
-      "[[ 0.0000000e+00  0.0000000e+00  0.0000000e+00 ...  0.0000000e+00\n",
-      "   0.0000000e+00  0.0000000e+00]\n",
-      " [ 8.4147096e-01  8.0196178e-01  7.6172036e-01 ...  1.2409373e-04\n",
-      "   1.1547816e-04  1.0746076e-04]\n",
-      " [ 9.0929741e-01  9.5814437e-01  9.8704624e-01 ...  2.4818745e-04\n",
-      "   2.3095631e-04  2.1492151e-04]\n",
-      " ...\n",
-      " [ 3.7960774e-01  7.4510968e-01  7.3418564e-01 ...  1.2036801e-02\n",
-      "   1.1201146e-02  1.0423505e-02]\n",
-      " [-5.7338190e-01 -8.9752287e-02 -4.1488394e-02 ...  1.2160885e-02\n",
-      "   1.1316618e-02  1.0530960e-02]\n",
-      " [-9.9920684e-01 -8.5234123e-01 -7.8794664e-01 ...  1.2284970e-02\n",
-      "   1.1432089e-02  1.0638415e-02]]\n"
-     ]
-    }
-   ],
-   "source": [
-    "import paddle\n",
-    "paddle.set_device('cpu')\n",
-    "ppe = paddle.zeros((max_len, d_model), dtype='float32')\n",
-    "position = paddle.arange(0, max_len,\n",
-    "                        dtype='float32').unsqueeze(1)\n",
-    "print(np.allclose(position.numpy(), toruch_position))\n",
-    "div_term = paddle.exp(\n",
-    "    paddle.arange(0, d_model, 2, dtype='float32') *\n",
-    "    -(math.log(10000.0) / d_model))\n",
-    "print(np.allclose(div_term.numpy(), tourch_div_term))\n",
-    "\n",
-    "\n",
-    "\n",
-    "p_sin = paddle.sin(position * div_term)\n",
-    "p_cos = paddle.cos(position * div_term)\n",
-    "print(np.allclose(np_sin, p_sin.numpy(), rtol=1.e-6, atol=0))\n",
-    "print(np.allclose(np_cos, p_cos.numpy(), rtol=1.e-6, atol=0))\n",
-    "ppe[:, 0::2] = p_sin\n",
-    "ppe[:, 1::2] = p_cos\n",
-    "print(np.allclose(p_sin.numpy(), torhc_sin.cpu().detach().numpy()))\n",
-    "print(np.allclose(p_cos.numpy(), torhc_cos.cpu().detach().numpy()))\n",
-    "print(p_cos.numpy())\n",
-    "print(\"----\")\n",
-    "print(torhc_cos.cpu().detach().numpy())\n",
-    "print(\")))))))\")\n",
-    "print(p_sin.numpy())\n",
-    "print(\"----\")\n",
-    "print(torhc_sin.cpu().detach().numpy())"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 4,
-   "id": "integrated-boards",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "False\n"
-     ]
-    }
-   ],
-   "source": [
-    "print(np.allclose(ppe.numpy(), pe.numpy()))"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "flying-reserve",
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "revised-divide",
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.7.0"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 5
-}
--- a/.notebook/python_test.ipynb
+++ b/.notebook/python_test.ipynb
--- a/.notebook/train_test.ipynb
+++ b/.notebook/train_test.ipynb
--- a/.notebook/u2_confermer_model_wenet.ipynb
+++ b/.notebook/u2_confermer_model_wenet.ipynb
--- a/.notebook/u2_tansformer_model_espnet.ipynb
+++ b/.notebook/u2_tansformer_model_espnet.ipynb
--- a/.notebook/wenet_model.ipynb
+++ b/.notebook/wenet_model.ipynb