@ -2,7 +2,7 @@
"cells": [
"cells": [
{
{
"cell_type": "code",
"cell_type": "code",
"execution_count": 1,
"execution_count": 147 ,
"id": "extensive-venice",
"id": "extensive-venice",
"metadata": {},
"metadata": {},
"outputs": [
"outputs": [
@ -10,16 +10,16 @@
"name": "stdout",
"name": "stdout",
"output_type": "stream",
"output_type": "stream",
"text": [
"text": [
"/workspace/zhanghui/DeepSpeech-2.x \n"
"/\n"
]
]
},
},
{
{
"data": {
"data": {
"text/plain": [
"text/plain": [
"'/workspace/zhanghui/DeepSpeech-2.x '"
"'/'"
]
]
},
},
"execution_count": 1,
"execution_count": 147 ,
"metadata": {},
"metadata": {},
"output_type": "execute_result"
"output_type": "execute_result"
}
}
@ -31,7 +31,7 @@
},
},
{
{
"cell_type": "code",
"cell_type": "code",
"execution_count": 8,
"execution_count": 14 8,
"id": "correct-window",
"id": "correct-window",
"metadata": {},
"metadata": {},
"outputs": [
"outputs": [
@ -50,7 +50,7 @@
},
},
{
{
"cell_type": "code",
"cell_type": "code",
"execution_count": 9,
"execution_count": 14 9,
"id": "exceptional-cheese",
"id": "exceptional-cheese",
"metadata": {},
"metadata": {},
"outputs": [],
"outputs": [],
@ -60,53 +60,17 @@
},
},
{
{
"cell_type": "code",
"cell_type": "code",
"execution_count": 11 ,
"execution_count": 150 ,
"id": "extraordinary-orleans",
"id": "extraordinary-orleans",
"metadata": {},
"metadata": {},
"outputs": [
"outputs": [],
{
"name": "stderr",
"output_type": "stream",
"text": [
"grep: warning: GREP_OPTIONS is deprecated; please use an alias or script\n",
"register user softmax to paddle, remove this when fixed!\n",
"register user log_softmax to paddle, remove this when fixed!\n",
"register user sigmoid to paddle, remove this when fixed!\n",
"register user log_sigmoid to paddle, remove this when fixed!\n",
"register user relu to paddle, remove this when fixed!\n",
"override cat of paddle if exists or register, remove this when fixed!\n",
"override long of paddle.Tensor if exists or register, remove this when fixed!\n",
"override new_full of paddle.Tensor if exists or register, remove this when fixed!\n",
"override eq of paddle.Tensor if exists or register, remove this when fixed!\n",
"override eq of paddle if exists or register, remove this when fixed!\n",
"override contiguous of paddle.Tensor if exists or register, remove this when fixed!\n",
"override size of paddle.Tensor (`to_static` do not process `size` property, maybe some `paddle` api dependent on it), remove this when fixed!\n",
"register user view to paddle.Tensor, remove this when fixed!\n",
"register user view_as to paddle.Tensor, remove this when fixed!\n",
"register user masked_fill to paddle.Tensor, remove this when fixed!\n",
"register user masked_fill_ to paddle.Tensor, remove this when fixed!\n",
"register user fill_ to paddle.Tensor, remove this when fixed!\n",
"register user repeat to paddle.Tensor, remove this when fixed!\n",
"register user softmax to paddle.Tensor, remove this when fixed!\n",
"register user sigmoid to paddle.Tensor, remove this when fixed!\n",
"register user relu to paddle.Tensor, remove this when fixed!\n",
"register user type_as to paddle.Tensor, remove this when fixed!\n",
"register user to to paddle.Tensor, remove this when fixed!\n",
"register user float to paddle.Tensor, remove this when fixed!\n",
"register user int to paddle.Tensor, remove this when fixed!\n",
"register user GLU to paddle.nn, remove this when fixed!\n",
"register user ConstantPad2d to paddle.nn, remove this when fixed!\n",
"register user export to paddle.jit, remove this when fixed!\n"
]
}
],
"source": [
"source": [
"from deepspeech.frontend.utility import read_manifest"
"from deepspeech.frontend.utility import read_manifest"
]
]
},
},
{
{
"cell_type": "code",
"cell_type": "code",
"execution_count": 12 ,
"execution_count": 151,
"id": "returning-lighter",
"id": "returning-lighter",
"metadata": {},
"metadata": {},
"outputs": [],
"outputs": [],
@ -116,7 +80,7 @@
},
},
{
{
"cell_type": "code",
"cell_type": "code",
"execution_count": 13 ,
"execution_count": 152 ,
"id": "western-founder",
"id": "western-founder",
"metadata": {},
"metadata": {},
"outputs": [
"outputs": [
@ -158,7 +122,7 @@
},
},
{
{
"cell_type": "code",
"cell_type": "code",
"execution_count": 14 ,
"execution_count": 97 ,
"id": "motivated-receptor",
"id": "motivated-receptor",
"metadata": {},
"metadata": {},
"outputs": [],
"outputs": [],
@ -638,10 +602,19 @@
},
},
{
{
"cell_type": "code",
"cell_type": "code",
"execution_count": 15 ,
"execution_count": 98 ,
"id": "acquired-hurricane",
"id": "acquired-hurricane",
"metadata": {},
"metadata": {},
"outputs": [
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"[INFO 2021/08/18 06:57:10 1445365138.py:284] use shuffled batch.\n",
"[INFO 2021/08/18 06:57:10 1445365138.py:286] # utts: 5542\n",
"[INFO 2021/08/18 06:57:10 1445365138.py:468] # minibatches: 555\n"
]
},
{
{
"name": "stdout",
"name": "stdout",
"output_type": "stream",
"output_type": "stream",
@ -686,7 +659,7 @@
},
},
{
{
"cell_type": "code",
"cell_type": "code",
"execution_count": 16 ,
"execution_count": 99 ,
"id": "warming-malpractice",
"id": "warming-malpractice",
"metadata": {},
"metadata": {},
"outputs": [
"outputs": [
@ -694,16 +667,8 @@
"name": "stdout",
"name": "stdout",
"output_type": "stream",
"output_type": "stream",
"text": [
"text": [
"Collecting kaldiio\n",
"Requirement already satisfied: kaldiio in ./DeepSpeech-2.x/tools/venv/lib/python3.7/site-packages (2.17.2)\n",
" Downloading kaldiio-2.17.2.tar.gz (24 kB)\n",
"Requirement already satisfied: numpy in ./DeepSpeech-2.x/tools/venv/lib/python3.7/site-packages/numpy-1.21.2-py3.7-linux-x86_64.egg (from kaldiio) (1.21.2)\n",
"Requirement already satisfied: numpy in ./tools/venv/lib/python3.7/site-packages/numpy-1.21.2-py3.7-linux-x86_64.egg (from kaldiio) (1.21.2)\n",
"Building wheels for collected packages: kaldiio\n",
" Building wheel for kaldiio (setup.py) ... \u001b[?25ldone\n",
"\u001b[?25h Created wheel for kaldiio: filename=kaldiio-2.17.2-py3-none-any.whl size=24468 sha256=cd6e066764dcc8c24a9dfe3f7bd8acda18761a6fbcb024995729da8debdb466e\n",
" Stored in directory: /root/.cache/pip/wheels/04/07/e8/45641287c59bf6ce41e22259f8680b521c31e6306cb88392ac\n",
"Successfully built kaldiio\n",
"Installing collected packages: kaldiio\n",
"Successfully installed kaldiio-2.17.2\n",
"\u001b[33mWARNING: You are using pip version 20.3.3; however, version 21.2.4 is available.\n",
"\u001b[33mWARNING: You are using pip version 20.3.3; however, version 21.2.4 is available.\n",
"You should consider upgrading via the '/workspace/zhanghui/DeepSpeech-2.x/tools/venv/bin/python -m pip install --upgrade pip' command.\u001b[0m\n"
"You should consider upgrading via the '/workspace/zhanghui/DeepSpeech-2.x/tools/venv/bin/python -m pip install --upgrade pip' command.\u001b[0m\n"
]
]
@ -723,7 +688,7 @@
},
},
{
{
"cell_type": "code",
"cell_type": "code",
"execution_count": 19 ,
"execution_count": 100 ,
"id": "superb-methodology",
"id": "superb-methodology",
"metadata": {},
"metadata": {},
"outputs": [],
"outputs": [],
@ -1029,7 +994,7 @@
},
},
{
{
"cell_type": "code",
"cell_type": "code",
"execution_count": 20 ,
"execution_count": 101 ,
"id": "monthly-muscle",
"id": "monthly-muscle",
"metadata": {},
"metadata": {},
"outputs": [],
"outputs": [],
@ -1047,7 +1012,7 @@
},
},
{
{
"cell_type": "code",
"cell_type": "code",
"execution_count": 23 ,
"execution_count": 10 2,
"id": "periodic-senegal",
"id": "periodic-senegal",
"metadata": {},
"metadata": {},
"outputs": [],
"outputs": [],
@ -1057,7 +1022,7 @@
},
},
{
{
"cell_type": "code",
"cell_type": "code",
"execution_count": 34 ,
"execution_count": 10 3,
"id": "502d3f4d",
"id": "502d3f4d",
"metadata": {},
"metadata": {},
"outputs": [
"outputs": [
@ -1069,8 +1034,8 @@
"2\n",
"2\n",
"10\n",
"10\n",
"10\n",
"10\n",
"(1763 , 83) float32\n",
"(1174 , 83) float32\n",
"(73 ,) int64\n"
"(29 ,) int64\n"
]
]
}
}
],
],
@ -1088,7 +1053,7 @@
},
},
{
{
"cell_type": "code",
"cell_type": "code",
"execution_count": 36 ,
"execution_count": 104 ,
"id": "humanitarian-container",
"id": "humanitarian-container",
"metadata": {},
"metadata": {},
"outputs": [],
"outputs": [],
@ -1098,7 +1063,7 @@
},
},
{
{
"cell_type": "code",
"cell_type": "code",
"execution_count": 39 ,
"execution_count": 105 ,
"id": "heard-prize",
"id": "heard-prize",
"metadata": {},
"metadata": {},
"outputs": [
"outputs": [
@ -1106,7 +1071,7 @@
"name": "stdout",
"name": "stdout",
"output_type": "stream",
"output_type": "stream",
"text": [
"text": [
"['1673-143396-0008', '1650-173552-0000', '2803-154320-0000', '6267-65525-0045', '7641-96684-0029', '5338-284437-0010', '8173-294714-0033', '5543-27761-0047', '8254-115543-0043', '6467-94831-0038 '] 10\n",
"['4572-112383-0005', '6313-66125-0015', '251-137823-0022', '2277-149896-0030', '652-130726-0032', '5895-34615-0013', '1462-170138-0002', '777-126732-0008', '3660-172182-0021', '2277-149896-0027 '] 10\n",
"10\n"
"10\n"
]
]
}
}
@ -1118,7 +1083,7 @@
},
},
{
{
"cell_type": "code",
"cell_type": "code",
"execution_count": 83 ,
"execution_count": 106 ,
"id": "convinced-animation",
"id": "convinced-animation",
"metadata": {},
"metadata": {},
"outputs": [],
"outputs": [],
@ -1185,7 +1150,7 @@
},
},
{
{
"cell_type": "code",
"cell_type": "code",
"execution_count": 84 ,
"execution_count": 107 ,
"id": "0b92ade5",
"id": "0b92ade5",
"metadata": {},
"metadata": {},
"outputs": [],
"outputs": [],
@ -1195,7 +1160,7 @@
},
},
{
{
"cell_type": "code",
"cell_type": "code",
"execution_count": 85 ,
"execution_count": 10 8,
"id": "8dbd847c",
"id": "8dbd847c",
"metadata": {},
"metadata": {},
"outputs": [],
"outputs": [],
@ -1205,7 +1170,7 @@
},
},
{
{
"cell_type": "code",
"cell_type": "code",
"execution_count": 87 ,
"execution_count": 109 ,
"id": "31c085f4",
"id": "31c085f4",
"metadata": {},
"metadata": {},
"outputs": [
"outputs": [
@ -1213,72 +1178,42 @@
"name": "stdout",
"name": "stdout",
"output_type": "stream",
"output_type": "stream",
"text": [
"text": [
"['1673-143396-0008', '1650-173552-0000', '2803-154320-0000', '6267-65525-0045', '7641-96684-0029', '5338-284437-0010', '8173-294714-0033', '5543-27761-0047', '8254-115543-0043', '6467-94831-0038 ']\n",
"['4572-112383-0005', '6313-66125-0015', '251-137823-0022', '2277-149896-0030', '652-130726-0032', '5895-34615-0013', '1462-170138-0002', '777-126732-0008', '3660-172182-0021', '2277-149896-0027 ']\n",
"(10, 1763 , 83)\n",
"(10, 1174 , 83)\n",
"(10,)\n",
"(10,)\n",
"[1763 1214 1146 757 751 661 625 512 426 329]\n",
"[1174 821 716 628 597 473 463 441 419 358]\n",
"(10, 73)\n",
"(10, 32)\n",
"[[2896 621 4502 2176 404 198 3538 391 278 407 389 3719 4577 846\n",
"[[4502 2404 4223 3204 4502 587 1018 3861 2932 713 2458 2916 253 4508\n",
" 4501 482 1004 103 116 178 4222 624 4689 176 459 89 101 3465\n",
" 627 1395 713 4504 957 2761 209 2967 3173 3918 2598 4100 3 2816\n",
" 3204 4502 2029 1834 2298 829 3366 278 4705 4925 482 2920 3204 2481\n",
" 4990 -1 -1 -1]\n",
" 448 627 1254 404 20 202 36 2047 627 2495 4504 481 479 99\n",
" [1005 451 210 278 3411 206 482 2307 573 4502 3848 4577 4273 2388\n",
" 18 2079 4502 1628 202 226 4512 3267 210 278 483 234 367 4502\n",
" 4444 89 4919 278 1264 4501 2371 3 139 113 2603 4962 3158 3325\n",
" 2438 3204 1141]\n",
" 4577 814 4587 1422]\n",
" [ 742 4501 4768 4569 742 4483 2495 4502 3040 3204 4502 3961 3204 3992\n",
" [2345 4144 2291 200 713 2345 532 999 2458 3076 545 2458 4832 3038\n",
" 3089 4832 4258 621 2391 4642 3218 4502 3439 235 270 313 2385 2833\n",
" 4499 482 2812 1260 3080 -1 -1 -1 -1 -1 -1 -1 -1 -1\n",
" 742 4502 3282 332 3 280 4237 3252 830 2387 -1 -1 -1 -1\n",
" -1 -1 -1 -1]\n",
" -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1\n",
" [2345 832 4577 4920 4501 2345 2298 1236 381 288 389 101 2495 4172\n",
" -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1\n",
" 4843 3233 3245 4501 2345 2298 3987 4502 3023 3353 2345 1361 1635 2603\n",
" -1 -1 -1]\n",
" 4723 2371 -1 -1]\n",
" [2099 278 4904 2302 124 4832 3158 482 2888 2495 482 2450 627 1560\n",
" [4502 4207 432 3204 4502 2396 125 935 433 2598 483 18 327 2\n",
" 3158 4729 482 3514 3204 1027 3233 2391 2862 399 389 4962 2495 121\n",
" 389 627 4512 2340 713 482 1981 4525 4031 269 2030 1340 101 2495\n",
" 221 7 2340 1216 1658 -1 -1 -1 -1 -1 -1 -1 -1 -1\n",
" 4013 4844 -1 -1]\n",
" -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1\n",
" [4502 4892 3204 1892 3780 389 482 2774 3013 89 192 2495 4502 3475\n",
" -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1\n",
" 389 66 370 343 404 -1 -1 -1 -1 -1 -1 -1 -1 -1\n",
" -1 -1 -1]\n",
" -1 -1 -1 -1]\n",
" [2458 2659 1362 2 404 4975 4995 487 3079 2785 2371 3158 824 2603\n",
" [2458 2314 4577 2340 2863 1254 303 269 2 389 932 2079 4577 299\n",
" 4832 2323 999 2603 4832 4156 4678 627 1784 -1 -1 -1 -1 -1\n",
" 195 3233 4508 2 89 814 3144 1091 3204 3250 2193 3414 -1 -1\n",
" -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1\n",
" -1 -1 -1 -1]\n",
" -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1\n",
" [2391 1785 443 78 39 4962 2340 829 599 4593 278 4681 202 407\n",
" -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1\n",
" 269 194 182 4577 482 4308 -1 -1 -1 -1 -1 -1 -1 -1\n",
" -1 -1 -1]\n",
" -1 -1 -1 -1]\n",
" [2458 2340 1661 101 4723 2138 4502 4690 463 332 251 2345 4534 4502\n",
" [ 627 4873 2175 363 202 404 1018 4577 4502 3412 4875 2286 107 122\n",
" 2396 444 4501 2287 389 4531 4894 1466 959 389 1658 2584 4502 3681\n",
" 4832 2345 3896 89 2368 -1 -1 -1 -1 -1 -1 -1 -1 -1\n",
" 279 3204 4502 2228 3204 4502 4690 463 332 251 -1 -1 -1 -1\n",
" -1 -1 -1 -1]\n",
" -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1\n",
" [ 481 174 474 599 1881 3252 2842 742 4502 2545 107 88 3204 4525\n",
" -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1\n",
" 4517 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1\n",
" -1 -1 -1]\n",
" -1 -1 -1 -1]]\n",
" [2368 1248 208 4832 3158 482 1473 3401 999 482 4159 3838 389 478\n",
"[29 32 19 30 30 19 26 20 19 15]\n",
" 4572 404 3158 3063 1481 113 4499 4501 3204 4643 2 389 4111 -1\n",
" -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1\n",
" -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1\n",
" -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1\n",
" -1 -1 -1]\n",
" [2882 2932 4329 1808 4577 4350 4577 482 1636 2 389 1841 3204 3079\n",
" 1091 389 3204 2816 2079 4172 4986 4990 -1 -1 -1 -1 -1 -1\n",
" -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1\n",
" -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1\n",
" -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1\n",
" -1 -1 -1]\n",
" [4869 2598 2603 1976 96 389 478 3 4031 721 4925 2263 1259 2598\n",
" 4508 653 4979 4925 2741 252 72 236 -1 -1 -1 -1 -1 -1\n",
" -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1\n",
" -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1\n",
" -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1\n",
" -1 -1 -1]\n",
" [2458 4447 4505 713 624 3207 206 4577 4502 2404 3837 3458 2812 4936\n",
" -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1\n",
" -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1\n",
" -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1\n",
" -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1\n",
" -1 -1 -1]\n",
" [1501 3897 2537 278 2601 2 404 2603 482 2235 3388 -1 -1 -1\n",
" -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1\n",
" -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1\n",
" -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1\n",
" -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1\n",
" -1 -1 -1]]\n",
"[73 38 33 23 38 27 22 22 14 11]\n",
"float32\n",
"float32\n",
"int64\n",
"int64\n",
"int64\n",
"int64\n",
@ -1302,42 +1237,281 @@
},
},
{
{
"cell_type": "code",
"cell_type": "code",
"execution_count": 88 ,
"execution_count": 110 ,
"id": "72e9ba60",
"id": "72e9ba60",
"metadata": {},
"metadata": {},
"outputs": [],
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": 230,
"id": "64593e5f",
"metadata": {},
"outputs": [],
"source": [
"\n",
"from paddle.io import DataLoader\n",
"\n",
"from deepspeech.frontend.utility import read_manifest\n",
"from deepspeech.io.batchfy import make_batchset\n",
"from deepspeech.io.converter import CustomConverter\n",
"from deepspeech.io.dataset import TransformDataset\n",
"from deepspeech.io.reader import LoadInputsAndTargets\n",
"from deepspeech.utils.log import Log\n",
"\n",
"\n",
"logger = Log(__name__).getlog()\n",
"\n",
"\n",
"class BatchDataLoader():\n",
" def __init__(self,\n",
" json_file: str,\n",
" train_mode: bool,\n",
" sortagrad: bool=False,\n",
" batch_size: int=0,\n",
" maxlen_in: float=float('inf'),\n",
" maxlen_out: float=float('inf'),\n",
" minibatches: int=0,\n",
" mini_batch_size: int=1,\n",
" batch_count: str='auto',\n",
" batch_bins: int=0,\n",
" batch_frames_in: int=0,\n",
" batch_frames_out: int=0,\n",
" batch_frames_inout: int=0,\n",
" preprocess_conf=None,\n",
" n_iter_processes: int=1,\n",
" subsampling_factor: int=1,\n",
" num_encs: int=1):\n",
" self.json_file = json_file\n",
" self.train_mode = train_mode\n",
" self.use_sortagrad = sortagrad == -1 or sortagrad > 0\n",
" self.batch_size = batch_size\n",
" self.maxlen_in = maxlen_in\n",
" self.maxlen_out = maxlen_out\n",
" self.batch_count = batch_count\n",
" self.batch_bins = batch_bins\n",
" self.batch_frames_in = batch_frames_in\n",
" self.batch_frames_out = batch_frames_out\n",
" self.batch_frames_inout = batch_frames_inout\n",
" self.subsampling_factor = subsampling_factor\n",
" self.num_encs = num_encs\n",
" self.preprocess_conf = preprocess_conf\n",
" self.n_iter_processes = n_iter_processes\n",
"\n",
" \n",
" # read json data\n",
" self.data_json = read_manifest(json_file)\n",
"\n",
" # make minibatch list (variable length)\n",
" self.minibaches = make_batchset(\n",
" self.data_json,\n",
" batch_size,\n",
" maxlen_in,\n",
" maxlen_out,\n",
" minibatches, # for debug\n",
" min_batch_size=mini_batch_size,\n",
" shortest_first=self.use_sortagrad,\n",
" count=batch_count,\n",
" batch_bins=batch_bins,\n",
" batch_frames_in=batch_frames_in,\n",
" batch_frames_out=batch_frames_out,\n",
" batch_frames_inout=batch_frames_inout,\n",
" iaxis=0,\n",
" oaxis=0, )\n",
"\n",
" # data reader\n",
" self.reader = LoadInputsAndTargets(\n",
" mode=\"asr\",\n",
" load_output=True,\n",
" preprocess_conf=preprocess_conf,\n",
" preprocess_args={\"train\":\n",
" train_mode}, # Switch the mode of preprocessing\n",
" )\n",
"\n",
" # Setup a converter\n",
" if num_encs == 1:\n",
" self.converter = CustomConverter(\n",
" subsampling_factor=subsampling_factor, dtype=np.float32)\n",
" else:\n",
" assert NotImplementedError(\"not impl CustomConverterMulEnc.\")\n",
"\n",
" # hack to make batchsize argument as 1\n",
" # actual bathsize is included in a list\n",
" # default collate function converts numpy array to pytorch tensor\n",
" # we used an empty collate function instead which returns list\n",
" self.dataset = TransformDataset(self.minibaches, \n",
" lambda data: self.converter([self.reader(data, return_uttid=True)]))\n",
" self.dataloader = DataLoader(\n",
" dataset=self.dataset,\n",
" batch_size=1,\n",
" shuffle=not use_sortagrad if train_mode else False,\n",
" collate_fn=lambda x: x[0],\n",
" num_workers=n_iter_processes, )\n",
"\n",
" def __repr__(self):\n",
" echo = f\"<{self.__class__.__module__}.{self.__class__.__name__} object at {hex(id(self))}> \"\n",
" echo += f\"train_mode: {self.train_mode}, \"\n",
" echo += f\"sortagrad: {self.use_sortagrad}, \"\n",
" echo += f\"batch_size: {self.batch_size}, \"\n",
" echo += f\"maxlen_in: {self.maxlen_in}, \"\n",
" echo += f\"maxlen_out: {self.maxlen_out}, \"\n",
" echo += f\"batch_count: {self.batch_count}, \"\n",
" echo += f\"batch_bins: {self.batch_bins}, \"\n",
" echo += f\"batch_frames_in: {self.batch_frames_in}, \"\n",
" echo += f\"batch_frames_out: {self.batch_frames_out}, \"\n",
" echo += f\"batch_frames_inout: {self.batch_frames_inout}, \"\n",
" echo += f\"subsampling_factor: {self.subsampling_factor}, \"\n",
" echo += f\"num_encs: {self.num_encs}, \"\n",
" echo += f\"num_workers: {self.n_iter_processes}, \"\n",
" echo += f\"file: {self.json_file}\"\n",
" return echo\n",
" \n",
" def __len__(self):\n",
" return len(self.dataloader)\n",
" \n",
" def __iter__(self):\n",
" return self.dataloader.__iter__()\n",
" \n",
" def __call__(self):\n",
" return self.__iter__()\n"
]
},
{
"cell_type": "code",
"execution_count": 231,
"id": "fcea3fd0",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"[INFO 2021/08/18 07:42:23 batchfy.py:399] count is auto detected as seq\n",
"[INFO 2021/08/18 07:42:23 batchfy.py:423] # utts: 5542\n",
"[INFO 2021/08/18 07:42:23 batchfy.py:466] # minibatches: 278\n"
]
}
],
"source": [
"source": [
"from pathlib import Path"
"train = BatchDataLoader(dev_data, True, batch_size=20) "
]
]
},
},
{
{
"cell_type": "code",
"cell_type": "code",
"execution_count": 90,
"execution_count": 232 ,
"id": "64593e5f",
"id": "e2a2c9a8 ",
"metadata": {},
"metadata": {},
"outputs": [
"outputs": [
{
{
"ename": "AttributeError",
"name": "stdout",
"evalue": "'str' object has no attribute 'stat'",
"output_type": "stream",
"output_type": "error",
"text": [
"traceback": [
"278\n",
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"['__call__', '__class__', '__delattr__', '__dict__', '__dir__', '__doc__', '__eq__', '__format__', '__ge__', '__getattribute__', '__gt__', '__hash__', '__init__', '__init_subclass__', '__iter__', '__le__', '__len__', '__lt__', '__module__', '__ne__', '__new__', '__reduce__', '__reduce_ex__', '__repr__', '__setattr__', '__sizeof__', '__str__', '__subclasshook__', '__weakref__', 'auto_collate_batch', 'batch_sampler', 'batch_size', 'collate_fn', 'dataset', 'dataset_kind', 'feed_list', 'from_dataset', 'from_generator', 'num_workers', 'pin_memory', 'places', 'return_list', 'timeout', 'use_buffer_reader', 'use_shared_memory', 'worker_init_fn']\n",
"\u001b[0;31mAttributeError\u001b[0m Traceback (most recent call last)",
"<__main__.BatchDataLoader object at 0x7fdddba35470> train_mode: True, sortagrad: False, batch_size: 20, maxlen_in: inf, maxlen_out: inf, batch_count: auto, batch_bins: 0, batch_frames_in: 0, batch_frames_out: 0, batch_frames_inout: 0, subsampling_factor: 1, num_encs: 1, num_workers: 1, file: /workspace/zhanghui/DeepSpeech-2.x/examples/librispeech/s2/data/manifest.dev\n",
"\u001b[0;32m/tmp/ipykernel_48616/3505477735.py\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0ms\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m'xxxxxxxx'\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m \u001b[0mPath\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mis_file\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0ms\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
"278\n"
"\u001b[0;32m/usr/local/lib/python3.7/pathlib.py\u001b[0m in \u001b[0;36mis_file\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 1342\u001b[0m \"\"\"\n\u001b[1;32m 1343\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1344\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mS_ISREG\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstat\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mst_mode\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1345\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mOSError\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0me\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1346\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0me\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0merrno\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32min\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mENOENT\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mENOTDIR\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;31mAttributeError\u001b[0m: 'str' object has no attribute 'stat'"
]
]
}
}
],
],
"source": [
"source": [
"s='xxxxxxxx'\n",
"print(len(train.dataloader))\n",
"Path.is_file(s)"
"print(dir(train.dataloader))\n",
"print(train)\n",
"print(len(train))"
]
},
{
"cell_type": "code",
"execution_count": 220,
"id": "a5ba7d6e",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"['7601-101619-0003', '1255-138279-0000', '1272-128104-0004', '6123-59150-0027', '2078-142845-0025', '7850-73752-0018', '4570-24733-0004', '2506-169427-0002', '7601-101619-0004', '3170-137482-0000', '6267-53049-0019', '4570-14911-0009', '174-168635-0018', '7601-291468-0004', '3576-138058-0022', '1919-142785-0007', '6467-62797-0007', '4153-61735-0005', '1686-142278-0003', '2506-169427-0000']\n",
"Tensor(shape=[20, 2961, 83], dtype=float32, place=CUDAPinnedPlace, stop_gradient=True,\n",
" [[[-1.99415934, -1.80315673, -1.88801885, ..., 0.86933994, -0.59853148, 0.02596200],\n",
" [-1.95346808, -1.84891188, -2.17492867, ..., 0.83640492, -0.59853148, -0.11333394],\n",
" [-2.27899861, -2.21495342, -2.58480024, ..., 0.91874266, -0.59853148, -0.31453922],\n",
" ...,\n",
" [-2.64522028, -2.35221887, -2.91269732, ..., 1.48994756, -0.16100442, 0.36646330],\n",
" [-2.40107250, -2.21495342, -2.37986445, ..., 1.44072104, -0.13220564, 0.12656468],\n",
" [-2.15692472, -1.89466715, -2.25690317, ..., 1.31273174, -0.09620714, -0.15202725]],\n",
"\n",
" [[-0.28859532, -0.29033494, -0.86576819, ..., 1.37753224, -0.30570769, 0.25806731],\n",
" [-0.20149794, -0.17814466, -0.59891301, ..., 1.35188794, -0.30570769, -0.02964944],\n",
" [-0.34947991, -0.33597648, -0.96877253, ..., 1.38394332, -0.30570769, -0.38376236],\n",
" ...,\n",
" [ 0. , 0. , 0. , ..., 0. , 0. , 0. ],\n",
" [ 0. , 0. , 0. , ..., 0. , 0. , 0. ],\n",
" [ 0. , 0. , 0. , ..., 0. , 0. , 0. ]],\n",
"\n",
" [[-0.44914246, -0.33902276, -0.78237975, ..., 1.38218808, 0.29214793, -0.16815147],\n",
" [-0.55490732, -0.41596055, -0.84425378, ..., 1.34530187, 0.25002354, -0.04004869],\n",
" [-0.83694696, -0.62112784, -1.07112527, ..., 1.19160914, 0.20789915, 0.37984371],\n",
" ...,\n",
" [ 0. , 0. , 0. , ..., 0. , 0. , 0. ],\n",
" [ 0. , 0. , 0. , ..., 0. , 0. , 0. ],\n",
" [ 0. , 0. , 0. , ..., 0. , 0. , 0. ]],\n",
"\n",
" ...,\n",
"\n",
" [[-1.24343657, -0.94188881, -1.41092563, ..., 0.96716309, 0.60345763, 0.15360183],\n",
" [-1.19466043, -0.80585432, -0.49723154, ..., 1.06735480, 0.60345763, 0.14511746],\n",
" [-0.94079566, -0.59330046, -0.40948665, ..., 0.82244170, 0.55614340, 0.28086722],\n",
" ...,\n",
" [ 0. , 0. , 0. , ..., 0. , 0. , 0. ],\n",
" [ 0. , 0. , 0. , ..., 0. , 0. , 0. ],\n",
" [ 0. , 0. , 0. , ..., 0. , 0. , 0. ]],\n",
"\n",
" [[ 0.21757117, 0.11361472, -0.33262897, ..., 0.76338506, -0.10711290, -0.57754958],\n",
" [-1.00205481, -0.61152041, -0.47124696, ..., 1.11897349, -0.10711290, 0.24931324],\n",
" [-1.03929281, -1.20336759, -1.16433656, ..., 0.88888687, -0.10711290, -0.04115745],\n",
" ...,\n",
" [ 0. , 0. , 0. , ..., 0. , 0. , 0. ],\n",
" [ 0. , 0. , 0. , ..., 0. , 0. , 0. ],\n",
" [ 0. , 0. , 0. , ..., 0. , 0. , 0. ]],\n",
"\n",
" [[-1.25289667, -1.05046368, -0.82881606, ..., 1.23991334, 0.61702502, 0.05275881],\n",
" [-1.19659519, -0.78677225, -0.80407262, ..., 1.27644968, 0.61702502, -0.35079369],\n",
" [-1.49687004, -1.01750231, -0.82881606, ..., 1.29106426, 0.65006059, 0.17958963],\n",
" ...,\n",
" [ 0. , 0. , 0. , ..., 0. , 0. , 0. ],\n",
" [ 0. , 0. , 0. , ..., 0. , 0. , 0. ],\n",
" [ 0. , 0. , 0. , ..., 0. , 0. , 0. ]]])\n",
"Tensor(shape=[20], dtype=int64, place=CUDAPinnedPlace, stop_gradient=True,\n",
" [2961, 2948, 2938, 2907, 2904, 2838, 2832, 2819, 2815, 2797, 2775, 2710, 2709, 2696, 2688, 2661, 2616, 2595, 2589, 2576])\n",
"Tensor(shape=[20, 133], dtype=int64, place=CUDAPinnedPlace, stop_gradient=True,\n",
" [[3098, 1595, 389, ..., -1 , -1 , -1 ],\n",
" [2603, 4832, 482, ..., -1 , -1 , -1 ],\n",
" [2796, 303, 269, ..., -1 , -1 , -1 ],\n",
" ...,\n",
" [3218, 3673, 206, ..., -1 , -1 , -1 ],\n",
" [2371, 4832, 4031, ..., -1 , -1 , -1 ],\n",
" [2570, 2433, 4285, ..., -1 , -1 , -1 ]])\n",
"Tensor(shape=[20], dtype=int64, place=CUDAPinnedPlace, stop_gradient=True,\n",
" [80 , 83 , 102, 133, 82 , 102, 71 , 91 , 68 , 81 , 86 , 67 , 71 , 95 , 65 , 88 , 97 , 98 , 89 , 72 ])\n"
]
}
],
"source": [
"for batch in train:\n",
" utts, xs, ilens, ys, olens = batch\n",
" print(utts)\n",
" print(xs)\n",
" print(ilens)\n",
" print(ys)\n",
" print(olens)\n",
" break"
]
]
},
},
{
{
"cell_type": "code",
"cell_type": "code",
"execution_count": null,
"execution_count": null,
"id": "fcea3fd0",
"id": "3c974a1e ",
"metadata": {},
"metadata": {},
"outputs": [],
"outputs": [],
"source": []
"source": []