espnet loader test

pull/756/head
Hui Zhang 3 years ago
parent e3d73acd37
commit 981cecf72b

@ -10,13 +10,13 @@
"name": "stdout",
"output_type": "stream",
"text": [
"/workspace/DeepSpeech-2.x\n"
"/workspace/zhanghui/DeepSpeech-2.x\n"
]
},
{
"data": {
"text/plain": [
"'/workspace/DeepSpeech-2.x'"
"'/workspace/zhanghui/DeepSpeech-2.x'"
]
},
"execution_count": 1,
@ -31,7 +31,7 @@
},
{
"cell_type": "code",
"execution_count": 3,
"execution_count": 8,
"id": "correct-window",
"metadata": {},
"outputs": [
@ -45,22 +45,22 @@
}
],
"source": [
"!ls /workspace/DeepSpeech-2.x/examples/librispeech/s2/data/"
"!ls /workspace/zhanghui/DeepSpeech-2.x/examples/librispeech/s2/data/"
]
},
{
"cell_type": "code",
"execution_count": 5,
"execution_count": 9,
"id": "exceptional-cheese",
"metadata": {},
"outputs": [],
"source": [
"dev_data='/workspace/DeepSpeech-2.x/examples/librispeech/s2/data/manifest.dev'"
"dev_data='/workspace/zhanghui/DeepSpeech-2.x/examples/librispeech/s2/data/manifest.dev'"
]
},
{
"cell_type": "code",
"execution_count": 6,
"execution_count": 11,
"id": "extraordinary-orleans",
"metadata": {},
"outputs": [
@ -68,6 +68,7 @@
"name": "stderr",
"output_type": "stream",
"text": [
"grep: warning: GREP_OPTIONS is deprecated; please use an alias or script\n",
"register user softmax to paddle, remove this when fixed!\n",
"register user log_softmax to paddle, remove this when fixed!\n",
"register user sigmoid to paddle, remove this when fixed!\n",
@ -105,26 +106,17 @@
},
{
"cell_type": "code",
"execution_count": 7,
"execution_count": 12,
"id": "returning-lighter",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/workspace/DeepSpeech-2.x/tools/venv/lib/python3.7/site-packages/ipykernel/ipkernel.py:283: DeprecationWarning: `should_run_async` will not call `transform_cell` automatically in the future. Please pass the result to `transformed_cell` argument and any exception that happen during thetransform in `preprocessing_exc_tuple` in IPython 7.17 and above.\n",
" and should_run_async(code)\n"
]
}
],
"outputs": [],
"source": [
"dev_json = read_manifest(dev_data)"
]
},
{
"cell_type": "code",
"execution_count": 17,
"execution_count": 13,
"id": "western-founder",
"metadata": {},
"outputs": [
@ -166,7 +158,7 @@
},
{
"cell_type": "code",
"execution_count": 19,
"execution_count": 14,
"id": "motivated-receptor",
"metadata": {},
"outputs": [],
@ -646,19 +638,10 @@
},
{
"cell_type": "code",
"execution_count": 40,
"execution_count": 15,
"id": "acquired-hurricane",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"[INFO 2021/08/17 04:09:47 <ipython-input-19-4c01301916ec>:284] use shuffled batch.\n",
"[INFO 2021/08/17 04:09:47 <ipython-input-19-4c01301916ec>:286] # utts: 5542\n",
"[INFO 2021/08/17 04:09:47 <ipython-input-19-4c01301916ec>:467] # minibatches: 555\n"
]
},
{
"name": "stdout",
"output_type": "stream",
@ -703,7 +686,7 @@
},
{
"cell_type": "code",
"execution_count": 50,
"execution_count": 16,
"id": "warming-malpractice",
"metadata": {},
"outputs": [
@ -713,16 +696,16 @@
"text": [
"Collecting kaldiio\n",
" Downloading kaldiio-2.17.2.tar.gz (24 kB)\n",
"Requirement already satisfied: numpy in ./tools/venv/lib/python3.7/site-packages (from kaldiio) (1.20.1)\n",
"Requirement already satisfied: numpy in ./tools/venv/lib/python3.7/site-packages/numpy-1.21.2-py3.7-linux-x86_64.egg (from kaldiio) (1.21.2)\n",
"Building wheels for collected packages: kaldiio\n",
" Building wheel for kaldiio (setup.py) ... \u001b[?25ldone\n",
"\u001b[?25h Created wheel for kaldiio: filename=kaldiio-2.17.2-py3-none-any.whl size=24469 sha256=aadc8b1a8de5c9769af065ae724fb11326691d2350145019f6e3dba69f020134\n",
"\u001b[?25h Created wheel for kaldiio: filename=kaldiio-2.17.2-py3-none-any.whl size=24468 sha256=cd6e066764dcc8c24a9dfe3f7bd8acda18761a6fbcb024995729da8debdb466e\n",
" Stored in directory: /root/.cache/pip/wheels/04/07/e8/45641287c59bf6ce41e22259f8680b521c31e6306cb88392ac\n",
"Successfully built kaldiio\n",
"Installing collected packages: kaldiio\n",
"Successfully installed kaldiio-2.17.2\n",
"\u001b[33mWARNING: You are using pip version 20.0.1; however, version 21.2.4 is available.\n",
"You should consider upgrading via the '/workspace/DeepSpeech-2.x/tools/venv/bin/python -m pip install --upgrade pip' command.\u001b[0m\n"
"\u001b[33mWARNING: You are using pip version 20.3.3; however, version 21.2.4 is available.\n",
"You should consider upgrading via the '/workspace/zhanghui/DeepSpeech-2.x/tools/venv/bin/python -m pip install --upgrade pip' command.\u001b[0m\n"
]
}
],
@ -740,7 +723,7 @@
},
{
"cell_type": "code",
"execution_count": 54,
"execution_count": 19,
"id": "superb-methodology",
"metadata": {},
"outputs": [],
@ -1046,7 +1029,7 @@
},
{
"cell_type": "code",
"execution_count": 55,
"execution_count": 20,
"id": "monthly-muscle",
"metadata": {},
"outputs": [],
@ -1064,70 +1047,263 @@
},
{
"cell_type": "code",
"execution_count": 56,
"execution_count": 23,
"id": "periodic-senegal",
"metadata": {},
"outputs": [],
"source": [
"res = load(dev_data[0])"
]
},
{
"cell_type": "code",
"execution_count": 34,
"id": "7f0307eb",
"metadata": {},
"outputs": [
{
"ename": "FileNotFoundError",
"evalue": "[Errno 2] No such file or directory: '/workspace/zhanghui/asr/espnet/egs/librispeech/asr1/dump/dev/deltafalse/feats.12.ark'",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mFileNotFoundError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m<ipython-input-56-9f483b231463>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mres\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mload\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdev_data\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
"\u001b[0;32m<ipython-input-54-9deb677b23d5>\u001b[0m in \u001b[0;36m__call__\u001b[0;34m(self, batch, return_uttid)\u001b[0m\n\u001b[1;32m 94\u001b[0m x = self._get_from_loader(\n\u001b[1;32m 95\u001b[0m \u001b[0mfilepath\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0minp\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m\"feat\"\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 96\u001b[0;31m filetype=inp.get(\"filetype\", \"mat\"))\n\u001b[0m\u001b[1;32m 97\u001b[0m \u001b[0mx_feats_dict\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msetdefault\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0minp\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m\"name\"\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mappend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mx\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 98\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m<ipython-input-54-9deb677b23d5>\u001b[0m in \u001b[0;36m_get_from_loader\u001b[0;34m(self, filepath, filetype)\u001b[0m\n\u001b[1;32m 278\u001b[0m \u001b[0;31m# load_mat can load both matrix and vector\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 279\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mkeep_all_data_on_mem\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 280\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mkaldiio\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mload_mat\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfilepath\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 281\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mfilepath\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_loaders\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 282\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_loaders\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mfilepath\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mkaldiio\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mload_mat\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfilepath\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/workspace/DeepSpeech-2.x/tools/venv/lib/python3.7/site-packages/kaldiio/matio.py\u001b[0m in \u001b[0;36mload_mat\u001b[0;34m(ark_name, endian, fd_dict)\u001b[0m\n\u001b[1;32m 238\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0m_load_mat\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfd\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0moffset\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mslices\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mendian\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mendian\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 239\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 240\u001b[0;31m \u001b[0;32mwith\u001b[0m \u001b[0mopen_like_kaldi\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mark\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m\"rb\"\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mfd\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 241\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0m_load_mat\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfd\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0moffset\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mslices\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mendian\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mendian\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 242\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/workspace/DeepSpeech-2.x/tools/venv/lib/python3.7/site-packages/kaldiio/utils.py\u001b[0m in \u001b[0;36mopen_like_kaldi\u001b[0;34m(name, mode)\u001b[0m\n\u001b[1;32m 206\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 207\u001b[0m \u001b[0mencoding\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mNone\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0;34m\"b\"\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mmode\u001b[0m \u001b[0;32melse\u001b[0m \u001b[0mdefault_encoding\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 208\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mio\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mopen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mname\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmode\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mencoding\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mencoding\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 209\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 210\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;31mFileNotFoundError\u001b[0m: [Errno 2] No such file or directory: '/workspace/zhanghui/asr/espnet/egs/librispeech/asr1/dump/dev/deltafalse/feats.12.ark'"
"name": "stdout",
"output_type": "stream",
"text": [
"<class 'tuple'>\n",
"2\n",
"10\n",
"10\n",
"(1763, 83) float32\n",
"(73,) int64\n"
]
}
],
"source": [
"res = load(dev_data[0])"
"print(type(res))\n",
"print(len(res))\n",
"print(len(res[0]))\n",
"print(len(res[1]))\n",
"print(res[0][0].shape, res[0][0].dtype)\n",
"print(res[1][0].shape, res[1][0].dtype)\n",
"# Tuple[Tuple[np.ndarry], Tuple[np.ndarry]]\n",
"# 2[10, 10]\n",
"# feats, labels"
]
},
{
"cell_type": "code",
"execution_count": 73,
"execution_count": 36,
"id": "humanitarian-container",
"metadata": {},
"outputs": [],
"source": [
"(inputs, outputs), utts = load(dev_data[0], return_uttid=True)"
]
},
{
"cell_type": "code",
"execution_count": 39,
"id": "heard-prize",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"ls: cannot access '/workspace/zhanghui/asr/espnet/egs/librispeech/asr1/dump/dev/deltafalse/feats.12.ark': No such file or directory\r\n"
"['1673-143396-0008', '1650-173552-0000', '2803-154320-0000', '6267-65525-0045', '7641-96684-0029', '5338-284437-0010', '8173-294714-0033', '5543-27761-0047', '8254-115543-0043', '6467-94831-0038'] 10\n",
"10\n"
]
}
],
"source": [
"!ls /workspace/zhanghui/asr/espnet/egs/librispeech/asr1/dump/dev/deltafalse/feats.12.ark"
"print(utts, len(utts))\n",
"print(len(inputs))"
]
},
{
"cell_type": "code",
"execution_count": 77,
"id": "heard-prize",
"execution_count": 83,
"id": "convinced-animation",
"metadata": {},
"outputs": [],
"source": [
"import paddle\n",
"from deepspeech.io.utility import pad_list\n",
"class CustomConverter():\n",
" \"\"\"Custom batch converter.\n",
"\n",
" Args:\n",
" subsampling_factor (int): The subsampling factor.\n",
" dtype (paddle.dtype): Data type to convert.\n",
"\n",
" \"\"\"\n",
"\n",
" def __init__(self, subsampling_factor=1, dtype=np.float32):\n",
" \"\"\"Construct a CustomConverter object.\"\"\"\n",
" self.subsampling_factor = subsampling_factor\n",
" self.ignore_id = -1\n",
" self.dtype = dtype\n",
"\n",
" def __call__(self, batch):\n",
" \"\"\"Transform a batch and send it to a device.\n",
"\n",
" Args:\n",
" batch (list): The batch to transform.\n",
"\n",
" Returns:\n",
" tuple(paddle.Tensor, paddle.Tensor, paddle.Tensor)\n",
"\n",
" \"\"\"\n",
" # batch should be located in list\n",
" assert len(batch) == 1\n",
" (xs, ys), utts = batch[0]\n",
"\n",
" # perform subsampling\n",
" if self.subsampling_factor > 1:\n",
" xs = [x[::self.subsampling_factor, :] for x in xs]\n",
"\n",
" # get batch of lengths of input sequences\n",
" ilens = np.array([x.shape[0] for x in xs])\n",
"\n",
" # perform padding and convert to tensor\n",
" # currently only support real number\n",
" if xs[0].dtype.kind == \"c\":\n",
" xs_pad_real = pad_list([x.real for x in xs], 0).astype(self.dtype)\n",
" xs_pad_imag = pad_list([x.imag for x in xs], 0).astype(self.dtype)\n",
" # Note(kamo):\n",
" # {'real': ..., 'imag': ...} will be changed to ComplexTensor in E2E.\n",
" # Don't create ComplexTensor and give it E2E here\n",
" # because torch.nn.DataParellel can't handle it.\n",
" xs_pad = {\"real\": xs_pad_real, \"imag\": xs_pad_imag}\n",
" else:\n",
" xs_pad = pad_list(xs, 0).astype(self.dtype)\n",
"\n",
" # NOTE: this is for multi-output (e.g., speech translation)\n",
" ys_pad = pad_list(\n",
" [np.array(y[0][:]) if isinstance(y, tuple) else y for y in ys],\n",
" self.ignore_id)\n",
"\n",
" olens = np.array([y[0].shape[0] if isinstance(y, tuple) else y.shape[0] for y in ys])\n",
" return utts, xs_pad, ilens, ys_pad, olens"
]
},
{
"cell_type": "code",
"execution_count": 84,
"id": "1b6508fc",
"metadata": {},
"outputs": [],
"source": [
"convert = CustomConverter()"
]
},
{
"cell_type": "code",
"execution_count": 85,
"id": "25d655c0",
"metadata": {},
"outputs": [],
"source": [
"utts, xs, ilen, ys, olen = convert([load(dev_data[0], return_uttid=True)])"
]
},
{
"cell_type": "code",
"execution_count": 87,
"id": "a28e5141",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"ls: cannot access '/workspace/espnet/': No such file or directory\r\n"
"['1673-143396-0008', '1650-173552-0000', '2803-154320-0000', '6267-65525-0045', '7641-96684-0029', '5338-284437-0010', '8173-294714-0033', '5543-27761-0047', '8254-115543-0043', '6467-94831-0038']\n",
"(10, 1763, 83)\n",
"(10,)\n",
"[1763 1214 1146 757 751 661 625 512 426 329]\n",
"(10, 73)\n",
"[[2896 621 4502 2176 404 198 3538 391 278 407 389 3719 4577 846\n",
" 4501 482 1004 103 116 178 4222 624 4689 176 459 89 101 3465\n",
" 3204 4502 2029 1834 2298 829 3366 278 4705 4925 482 2920 3204 2481\n",
" 448 627 1254 404 20 202 36 2047 627 2495 4504 481 479 99\n",
" 18 2079 4502 1628 202 226 4512 3267 210 278 483 234 367 4502\n",
" 2438 3204 1141]\n",
" [ 742 4501 4768 4569 742 4483 2495 4502 3040 3204 4502 3961 3204 3992\n",
" 3089 4832 4258 621 2391 4642 3218 4502 3439 235 270 313 2385 2833\n",
" 742 4502 3282 332 3 280 4237 3252 830 2387 -1 -1 -1 -1\n",
" -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1\n",
" -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1\n",
" -1 -1 -1]\n",
" [2099 278 4904 2302 124 4832 3158 482 2888 2495 482 2450 627 1560\n",
" 3158 4729 482 3514 3204 1027 3233 2391 2862 399 389 4962 2495 121\n",
" 221 7 2340 1216 1658 -1 -1 -1 -1 -1 -1 -1 -1 -1\n",
" -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1\n",
" -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1\n",
" -1 -1 -1]\n",
" [2458 2659 1362 2 404 4975 4995 487 3079 2785 2371 3158 824 2603\n",
" 4832 2323 999 2603 4832 4156 4678 627 1784 -1 -1 -1 -1 -1\n",
" -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1\n",
" -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1\n",
" -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1\n",
" -1 -1 -1]\n",
" [2458 2340 1661 101 4723 2138 4502 4690 463 332 251 2345 4534 4502\n",
" 2396 444 4501 2287 389 4531 4894 1466 959 389 1658 2584 4502 3681\n",
" 279 3204 4502 2228 3204 4502 4690 463 332 251 -1 -1 -1 -1\n",
" -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1\n",
" -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1\n",
" -1 -1 -1]\n",
" [2368 1248 208 4832 3158 482 1473 3401 999 482 4159 3838 389 478\n",
" 4572 404 3158 3063 1481 113 4499 4501 3204 4643 2 389 4111 -1\n",
" -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1\n",
" -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1\n",
" -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1\n",
" -1 -1 -1]\n",
" [2882 2932 4329 1808 4577 4350 4577 482 1636 2 389 1841 3204 3079\n",
" 1091 389 3204 2816 2079 4172 4986 4990 -1 -1 -1 -1 -1 -1\n",
" -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1\n",
" -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1\n",
" -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1\n",
" -1 -1 -1]\n",
" [4869 2598 2603 1976 96 389 478 3 4031 721 4925 2263 1259 2598\n",
" 4508 653 4979 4925 2741 252 72 236 -1 -1 -1 -1 -1 -1\n",
" -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1\n",
" -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1\n",
" -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1\n",
" -1 -1 -1]\n",
" [2458 4447 4505 713 624 3207 206 4577 4502 2404 3837 3458 2812 4936\n",
" -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1\n",
" -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1\n",
" -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1\n",
" -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1\n",
" -1 -1 -1]\n",
" [1501 3897 2537 278 2601 2 404 2603 482 2235 3388 -1 -1 -1\n",
" -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1\n",
" -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1\n",
" -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1\n",
" -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1\n",
" -1 -1 -1]]\n",
"[73 38 33 23 38 27 22 22 14 11]\n",
"float32\n",
"int64\n",
"int64\n",
"int64\n"
]
}
],
"source": [
"!ls /workspace/espnet/"
"print(utts)\n",
"print(xs.shape)\n",
"print(ilen.shape)\n",
"print(ilen)\n",
"print(ys.shape)\n",
"print(ys)\n",
"print(olen)\n",
"print(xs.dtype)\n",
"print(ilen.dtype)\n",
"print(ys.dtype)\n",
"print(olen.dtype)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "convinced-animation",
"id": "1d981df4",
"metadata": {},
"outputs": [],
"source": []
@ -1135,7 +1311,7 @@
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},

@ -13,3 +13,4 @@ tensorboardX
textgrid
typeguard
yacs
kaldiio

Loading…
Cancel
Save