diff --git a/.notebook/espnet_dataloader.ipynb b/.notebook/espnet_dataloader.ipynb index 7abb138f..1bfc13e3 100644 --- a/.notebook/espnet_dataloader.ipynb +++ b/.notebook/espnet_dataloader.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 1, + "execution_count": 147, "id": "extensive-venice", "metadata": {}, "outputs": [ @@ -10,16 +10,16 @@ "name": "stdout", "output_type": "stream", "text": [ - "/workspace/zhanghui/DeepSpeech-2.x\n" + "/\n" ] }, { "data": { "text/plain": [ - "'/workspace/zhanghui/DeepSpeech-2.x'" + "'/'" ] }, - "execution_count": 1, + "execution_count": 147, "metadata": {}, "output_type": "execute_result" } @@ -31,7 +31,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 148, "id": "correct-window", "metadata": {}, "outputs": [ @@ -50,7 +50,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 149, "id": "exceptional-cheese", "metadata": {}, "outputs": [], @@ -60,53 +60,17 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 150, "id": "extraordinary-orleans", "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "grep: warning: GREP_OPTIONS is deprecated; please use an alias or script\n", - "register user softmax to paddle, remove this when fixed!\n", - "register user log_softmax to paddle, remove this when fixed!\n", - "register user sigmoid to paddle, remove this when fixed!\n", - "register user log_sigmoid to paddle, remove this when fixed!\n", - "register user relu to paddle, remove this when fixed!\n", - "override cat of paddle if exists or register, remove this when fixed!\n", - "override long of paddle.Tensor if exists or register, remove this when fixed!\n", - "override new_full of paddle.Tensor if exists or register, remove this when fixed!\n", - "override eq of paddle.Tensor if exists or register, remove this when fixed!\n", - "override eq of paddle if exists or register, remove this when fixed!\n", - "override contiguous of paddle.Tensor if exists or register, remove this when fixed!\n", - "override size of paddle.Tensor (`to_static` do not process `size` property, maybe some `paddle` api dependent on it), remove this when fixed!\n", - "register user view to paddle.Tensor, remove this when fixed!\n", - "register user view_as to paddle.Tensor, remove this when fixed!\n", - "register user masked_fill to paddle.Tensor, remove this when fixed!\n", - "register user masked_fill_ to paddle.Tensor, remove this when fixed!\n", - "register user fill_ to paddle.Tensor, remove this when fixed!\n", - "register user repeat to paddle.Tensor, remove this when fixed!\n", - "register user softmax to paddle.Tensor, remove this when fixed!\n", - "register user sigmoid to paddle.Tensor, remove this when fixed!\n", - "register user relu to paddle.Tensor, remove this when fixed!\n", - "register user type_as to paddle.Tensor, remove this when fixed!\n", - "register user to to paddle.Tensor, remove this when fixed!\n", - "register user float to paddle.Tensor, remove this when fixed!\n", - "register user int to paddle.Tensor, remove this when fixed!\n", - "register user GLU to paddle.nn, remove this when fixed!\n", - "register user ConstantPad2d to paddle.nn, remove this when fixed!\n", - "register user export to paddle.jit, remove this when fixed!\n" - ] - } - ], + "outputs": [], "source": [ "from deepspeech.frontend.utility import read_manifest" ] }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 151, "id": "returning-lighter", "metadata": {}, "outputs": [], @@ -116,7 +80,7 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 152, "id": "western-founder", "metadata": {}, "outputs": [ @@ -158,7 +122,7 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 97, "id": "motivated-receptor", "metadata": {}, "outputs": [], @@ -638,10 +602,19 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 98, "id": "acquired-hurricane", "metadata": {}, "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[INFO 2021/08/18 06:57:10 1445365138.py:284] use shuffled batch.\n", + "[INFO 2021/08/18 06:57:10 1445365138.py:286] # utts: 5542\n", + "[INFO 2021/08/18 06:57:10 1445365138.py:468] # minibatches: 555\n" + ] + }, { "name": "stdout", "output_type": "stream", @@ -686,7 +659,7 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 99, "id": "warming-malpractice", "metadata": {}, "outputs": [ @@ -694,16 +667,8 @@ "name": "stdout", "output_type": "stream", "text": [ - "Collecting kaldiio\n", - " Downloading kaldiio-2.17.2.tar.gz (24 kB)\n", - "Requirement already satisfied: numpy in ./tools/venv/lib/python3.7/site-packages/numpy-1.21.2-py3.7-linux-x86_64.egg (from kaldiio) (1.21.2)\n", - "Building wheels for collected packages: kaldiio\n", - " Building wheel for kaldiio (setup.py) ... \u001b[?25ldone\n", - "\u001b[?25h Created wheel for kaldiio: filename=kaldiio-2.17.2-py3-none-any.whl size=24468 sha256=cd6e066764dcc8c24a9dfe3f7bd8acda18761a6fbcb024995729da8debdb466e\n", - " Stored in directory: /root/.cache/pip/wheels/04/07/e8/45641287c59bf6ce41e22259f8680b521c31e6306cb88392ac\n", - "Successfully built kaldiio\n", - "Installing collected packages: kaldiio\n", - "Successfully installed kaldiio-2.17.2\n", + "Requirement already satisfied: kaldiio in ./DeepSpeech-2.x/tools/venv/lib/python3.7/site-packages (2.17.2)\n", + "Requirement already satisfied: numpy in ./DeepSpeech-2.x/tools/venv/lib/python3.7/site-packages/numpy-1.21.2-py3.7-linux-x86_64.egg (from kaldiio) (1.21.2)\n", "\u001b[33mWARNING: You are using pip version 20.3.3; however, version 21.2.4 is available.\n", "You should consider upgrading via the '/workspace/zhanghui/DeepSpeech-2.x/tools/venv/bin/python -m pip install --upgrade pip' command.\u001b[0m\n" ] @@ -723,7 +688,7 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 100, "id": "superb-methodology", "metadata": {}, "outputs": [], @@ -1029,7 +994,7 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": 101, "id": "monthly-muscle", "metadata": {}, "outputs": [], @@ -1047,7 +1012,7 @@ }, { "cell_type": "code", - "execution_count": 23, + "execution_count": 102, "id": "periodic-senegal", "metadata": {}, "outputs": [], @@ -1057,7 +1022,7 @@ }, { "cell_type": "code", - "execution_count": 34, + "execution_count": 103, "id": "502d3f4d", "metadata": {}, "outputs": [ @@ -1069,8 +1034,8 @@ "2\n", "10\n", "10\n", - "(1763, 83) float32\n", - "(73,) int64\n" + "(1174, 83) float32\n", + "(29,) int64\n" ] } ], @@ -1088,7 +1053,7 @@ }, { "cell_type": "code", - "execution_count": 36, + "execution_count": 104, "id": "humanitarian-container", "metadata": {}, "outputs": [], @@ -1098,7 +1063,7 @@ }, { "cell_type": "code", - "execution_count": 39, + "execution_count": 105, "id": "heard-prize", "metadata": {}, "outputs": [ @@ -1106,7 +1071,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "['1673-143396-0008', '1650-173552-0000', '2803-154320-0000', '6267-65525-0045', '7641-96684-0029', '5338-284437-0010', '8173-294714-0033', '5543-27761-0047', '8254-115543-0043', '6467-94831-0038'] 10\n", + "['4572-112383-0005', '6313-66125-0015', '251-137823-0022', '2277-149896-0030', '652-130726-0032', '5895-34615-0013', '1462-170138-0002', '777-126732-0008', '3660-172182-0021', '2277-149896-0027'] 10\n", "10\n" ] } @@ -1118,7 +1083,7 @@ }, { "cell_type": "code", - "execution_count": 83, + "execution_count": 106, "id": "convinced-animation", "metadata": {}, "outputs": [], @@ -1185,7 +1150,7 @@ }, { "cell_type": "code", - "execution_count": 84, + "execution_count": 107, "id": "0b92ade5", "metadata": {}, "outputs": [], @@ -1195,7 +1160,7 @@ }, { "cell_type": "code", - "execution_count": 85, + "execution_count": 108, "id": "8dbd847c", "metadata": {}, "outputs": [], @@ -1205,7 +1170,7 @@ }, { "cell_type": "code", - "execution_count": 87, + "execution_count": 109, "id": "31c085f4", "metadata": {}, "outputs": [ @@ -1213,72 +1178,42 @@ "name": "stdout", "output_type": "stream", "text": [ - "['1673-143396-0008', '1650-173552-0000', '2803-154320-0000', '6267-65525-0045', '7641-96684-0029', '5338-284437-0010', '8173-294714-0033', '5543-27761-0047', '8254-115543-0043', '6467-94831-0038']\n", - "(10, 1763, 83)\n", + "['4572-112383-0005', '6313-66125-0015', '251-137823-0022', '2277-149896-0030', '652-130726-0032', '5895-34615-0013', '1462-170138-0002', '777-126732-0008', '3660-172182-0021', '2277-149896-0027']\n", + "(10, 1174, 83)\n", "(10,)\n", - "[1763 1214 1146 757 751 661 625 512 426 329]\n", - "(10, 73)\n", - "[[2896 621 4502 2176 404 198 3538 391 278 407 389 3719 4577 846\n", - " 4501 482 1004 103 116 178 4222 624 4689 176 459 89 101 3465\n", - " 3204 4502 2029 1834 2298 829 3366 278 4705 4925 482 2920 3204 2481\n", - " 448 627 1254 404 20 202 36 2047 627 2495 4504 481 479 99\n", - " 18 2079 4502 1628 202 226 4512 3267 210 278 483 234 367 4502\n", - " 2438 3204 1141]\n", - " [ 742 4501 4768 4569 742 4483 2495 4502 3040 3204 4502 3961 3204 3992\n", - " 3089 4832 4258 621 2391 4642 3218 4502 3439 235 270 313 2385 2833\n", - " 742 4502 3282 332 3 280 4237 3252 830 2387 -1 -1 -1 -1\n", - " -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1\n", - " -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1\n", - " -1 -1 -1]\n", - " [2099 278 4904 2302 124 4832 3158 482 2888 2495 482 2450 627 1560\n", - " 3158 4729 482 3514 3204 1027 3233 2391 2862 399 389 4962 2495 121\n", - " 221 7 2340 1216 1658 -1 -1 -1 -1 -1 -1 -1 -1 -1\n", - " -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1\n", - " -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1\n", - " -1 -1 -1]\n", - " [2458 2659 1362 2 404 4975 4995 487 3079 2785 2371 3158 824 2603\n", - " 4832 2323 999 2603 4832 4156 4678 627 1784 -1 -1 -1 -1 -1\n", - " -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1\n", - " -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1\n", - " -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1\n", - " -1 -1 -1]\n", - " [2458 2340 1661 101 4723 2138 4502 4690 463 332 251 2345 4534 4502\n", - " 2396 444 4501 2287 389 4531 4894 1466 959 389 1658 2584 4502 3681\n", - " 279 3204 4502 2228 3204 4502 4690 463 332 251 -1 -1 -1 -1\n", - " -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1\n", - " -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1\n", - " -1 -1 -1]\n", - " [2368 1248 208 4832 3158 482 1473 3401 999 482 4159 3838 389 478\n", - " 4572 404 3158 3063 1481 113 4499 4501 3204 4643 2 389 4111 -1\n", - " -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1\n", - " -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1\n", - " -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1\n", - " -1 -1 -1]\n", - " [2882 2932 4329 1808 4577 4350 4577 482 1636 2 389 1841 3204 3079\n", - " 1091 389 3204 2816 2079 4172 4986 4990 -1 -1 -1 -1 -1 -1\n", - " -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1\n", - " -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1\n", - " -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1\n", - " -1 -1 -1]\n", - " [4869 2598 2603 1976 96 389 478 3 4031 721 4925 2263 1259 2598\n", - " 4508 653 4979 4925 2741 252 72 236 -1 -1 -1 -1 -1 -1\n", - " -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1\n", - " -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1\n", - " -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1\n", - " -1 -1 -1]\n", - " [2458 4447 4505 713 624 3207 206 4577 4502 2404 3837 3458 2812 4936\n", - " -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1\n", - " -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1\n", - " -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1\n", - " -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1\n", - " -1 -1 -1]\n", - " [1501 3897 2537 278 2601 2 404 2603 482 2235 3388 -1 -1 -1\n", - " -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1\n", - " -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1\n", - " -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1\n", - " -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1\n", - " -1 -1 -1]]\n", - "[73 38 33 23 38 27 22 22 14 11]\n", + "[1174 821 716 628 597 473 463 441 419 358]\n", + "(10, 32)\n", + "[[4502 2404 4223 3204 4502 587 1018 3861 2932 713 2458 2916 253 4508\n", + " 627 1395 713 4504 957 2761 209 2967 3173 3918 2598 4100 3 2816\n", + " 4990 -1 -1 -1]\n", + " [1005 451 210 278 3411 206 482 2307 573 4502 3848 4577 4273 2388\n", + " 4444 89 4919 278 1264 4501 2371 3 139 113 2603 4962 3158 3325\n", + " 4577 814 4587 1422]\n", + " [2345 4144 2291 200 713 2345 532 999 2458 3076 545 2458 4832 3038\n", + " 4499 482 2812 1260 3080 -1 -1 -1 -1 -1 -1 -1 -1 -1\n", + " -1 -1 -1 -1]\n", + " [2345 832 4577 4920 4501 2345 2298 1236 381 288 389 101 2495 4172\n", + " 4843 3233 3245 4501 2345 2298 3987 4502 3023 3353 2345 1361 1635 2603\n", + " 4723 2371 -1 -1]\n", + " [4502 4207 432 3204 4502 2396 125 935 433 2598 483 18 327 2\n", + " 389 627 4512 2340 713 482 1981 4525 4031 269 2030 1340 101 2495\n", + " 4013 4844 -1 -1]\n", + " [4502 4892 3204 1892 3780 389 482 2774 3013 89 192 2495 4502 3475\n", + " 389 66 370 343 404 -1 -1 -1 -1 -1 -1 -1 -1 -1\n", + " -1 -1 -1 -1]\n", + " [2458 2314 4577 2340 2863 1254 303 269 2 389 932 2079 4577 299\n", + " 195 3233 4508 2 89 814 3144 1091 3204 3250 2193 3414 -1 -1\n", + " -1 -1 -1 -1]\n", + " [2391 1785 443 78 39 4962 2340 829 599 4593 278 4681 202 407\n", + " 269 194 182 4577 482 4308 -1 -1 -1 -1 -1 -1 -1 -1\n", + " -1 -1 -1 -1]\n", + " [ 627 4873 2175 363 202 404 1018 4577 4502 3412 4875 2286 107 122\n", + " 4832 2345 3896 89 2368 -1 -1 -1 -1 -1 -1 -1 -1 -1\n", + " -1 -1 -1 -1]\n", + " [ 481 174 474 599 1881 3252 2842 742 4502 2545 107 88 3204 4525\n", + " 4517 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1\n", + " -1 -1 -1 -1]]\n", + "[29 32 19 30 30 19 26 20 19 15]\n", "float32\n", "int64\n", "int64\n", @@ -1302,42 +1237,281 @@ }, { "cell_type": "code", - "execution_count": 88, + "execution_count": 110, "id": "72e9ba60", "metadata": {}, "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 230, + "id": "64593e5f", + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "from paddle.io import DataLoader\n", + "\n", + "from deepspeech.frontend.utility import read_manifest\n", + "from deepspeech.io.batchfy import make_batchset\n", + "from deepspeech.io.converter import CustomConverter\n", + "from deepspeech.io.dataset import TransformDataset\n", + "from deepspeech.io.reader import LoadInputsAndTargets\n", + "from deepspeech.utils.log import Log\n", + "\n", + "\n", + "logger = Log(__name__).getlog()\n", + "\n", + "\n", + "class BatchDataLoader():\n", + " def __init__(self,\n", + " json_file: str,\n", + " train_mode: bool,\n", + " sortagrad: bool=False,\n", + " batch_size: int=0,\n", + " maxlen_in: float=float('inf'),\n", + " maxlen_out: float=float('inf'),\n", + " minibatches: int=0,\n", + " mini_batch_size: int=1,\n", + " batch_count: str='auto',\n", + " batch_bins: int=0,\n", + " batch_frames_in: int=0,\n", + " batch_frames_out: int=0,\n", + " batch_frames_inout: int=0,\n", + " preprocess_conf=None,\n", + " n_iter_processes: int=1,\n", + " subsampling_factor: int=1,\n", + " num_encs: int=1):\n", + " self.json_file = json_file\n", + " self.train_mode = train_mode\n", + " self.use_sortagrad = sortagrad == -1 or sortagrad > 0\n", + " self.batch_size = batch_size\n", + " self.maxlen_in = maxlen_in\n", + " self.maxlen_out = maxlen_out\n", + " self.batch_count = batch_count\n", + " self.batch_bins = batch_bins\n", + " self.batch_frames_in = batch_frames_in\n", + " self.batch_frames_out = batch_frames_out\n", + " self.batch_frames_inout = batch_frames_inout\n", + " self.subsampling_factor = subsampling_factor\n", + " self.num_encs = num_encs\n", + " self.preprocess_conf = preprocess_conf\n", + " self.n_iter_processes = n_iter_processes\n", + "\n", + " \n", + " # read json data\n", + " self.data_json = read_manifest(json_file)\n", + "\n", + " # make minibatch list (variable length)\n", + " self.minibaches = make_batchset(\n", + " self.data_json,\n", + " batch_size,\n", + " maxlen_in,\n", + " maxlen_out,\n", + " minibatches, # for debug\n", + " min_batch_size=mini_batch_size,\n", + " shortest_first=self.use_sortagrad,\n", + " count=batch_count,\n", + " batch_bins=batch_bins,\n", + " batch_frames_in=batch_frames_in,\n", + " batch_frames_out=batch_frames_out,\n", + " batch_frames_inout=batch_frames_inout,\n", + " iaxis=0,\n", + " oaxis=0, )\n", + "\n", + " # data reader\n", + " self.reader = LoadInputsAndTargets(\n", + " mode=\"asr\",\n", + " load_output=True,\n", + " preprocess_conf=preprocess_conf,\n", + " preprocess_args={\"train\":\n", + " train_mode}, # Switch the mode of preprocessing\n", + " )\n", + "\n", + " # Setup a converter\n", + " if num_encs == 1:\n", + " self.converter = CustomConverter(\n", + " subsampling_factor=subsampling_factor, dtype=np.float32)\n", + " else:\n", + " assert NotImplementedError(\"not impl CustomConverterMulEnc.\")\n", + "\n", + " # hack to make batchsize argument as 1\n", + " # actual bathsize is included in a list\n", + " # default collate function converts numpy array to pytorch tensor\n", + " # we used an empty collate function instead which returns list\n", + " self.dataset = TransformDataset(self.minibaches, \n", + " lambda data: self.converter([self.reader(data, return_uttid=True)]))\n", + " self.dataloader = DataLoader(\n", + " dataset=self.dataset,\n", + " batch_size=1,\n", + " shuffle=not use_sortagrad if train_mode else False,\n", + " collate_fn=lambda x: x[0],\n", + " num_workers=n_iter_processes, )\n", + "\n", + " def __repr__(self):\n", + " echo = f\"<{self.__class__.__module__}.{self.__class__.__name__} object at {hex(id(self))}> \"\n", + " echo += f\"train_mode: {self.train_mode}, \"\n", + " echo += f\"sortagrad: {self.use_sortagrad}, \"\n", + " echo += f\"batch_size: {self.batch_size}, \"\n", + " echo += f\"maxlen_in: {self.maxlen_in}, \"\n", + " echo += f\"maxlen_out: {self.maxlen_out}, \"\n", + " echo += f\"batch_count: {self.batch_count}, \"\n", + " echo += f\"batch_bins: {self.batch_bins}, \"\n", + " echo += f\"batch_frames_in: {self.batch_frames_in}, \"\n", + " echo += f\"batch_frames_out: {self.batch_frames_out}, \"\n", + " echo += f\"batch_frames_inout: {self.batch_frames_inout}, \"\n", + " echo += f\"subsampling_factor: {self.subsampling_factor}, \"\n", + " echo += f\"num_encs: {self.num_encs}, \"\n", + " echo += f\"num_workers: {self.n_iter_processes}, \"\n", + " echo += f\"file: {self.json_file}\"\n", + " return echo\n", + " \n", + " def __len__(self):\n", + " return len(self.dataloader)\n", + " \n", + " def __iter__(self):\n", + " return self.dataloader.__iter__()\n", + " \n", + " def __call__(self):\n", + " return self.__iter__()\n" + ] + }, + { + "cell_type": "code", + "execution_count": 231, + "id": "fcea3fd0", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[INFO 2021/08/18 07:42:23 batchfy.py:399] count is auto detected as seq\n", + "[INFO 2021/08/18 07:42:23 batchfy.py:423] # utts: 5542\n", + "[INFO 2021/08/18 07:42:23 batchfy.py:466] # minibatches: 278\n" + ] + } + ], "source": [ - "from pathlib import Path" + "train = BatchDataLoader(dev_data, True, batch_size=20)" ] }, { "cell_type": "code", - "execution_count": 90, - "id": "64593e5f", + "execution_count": 232, + "id": "e2a2c9a8", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "278\n", + "['__call__', '__class__', '__delattr__', '__dict__', '__dir__', '__doc__', '__eq__', '__format__', '__ge__', '__getattribute__', '__gt__', '__hash__', '__init__', '__init_subclass__', '__iter__', '__le__', '__len__', '__lt__', '__module__', '__ne__', '__new__', '__reduce__', '__reduce_ex__', '__repr__', '__setattr__', '__sizeof__', '__str__', '__subclasshook__', '__weakref__', 'auto_collate_batch', 'batch_sampler', 'batch_size', 'collate_fn', 'dataset', 'dataset_kind', 'feed_list', 'from_dataset', 'from_generator', 'num_workers', 'pin_memory', 'places', 'return_list', 'timeout', 'use_buffer_reader', 'use_shared_memory', 'worker_init_fn']\n", + "<__main__.BatchDataLoader object at 0x7fdddba35470> train_mode: True, sortagrad: False, batch_size: 20, maxlen_in: inf, maxlen_out: inf, batch_count: auto, batch_bins: 0, batch_frames_in: 0, batch_frames_out: 0, batch_frames_inout: 0, subsampling_factor: 1, num_encs: 1, num_workers: 1, file: /workspace/zhanghui/DeepSpeech-2.x/examples/librispeech/s2/data/manifest.dev\n", + "278\n" + ] + } + ], + "source": [ + "print(len(train.dataloader))\n", + "print(dir(train.dataloader))\n", + "print(train)\n", + "print(len(train))" + ] + }, + { + "cell_type": "code", + "execution_count": 220, + "id": "a5ba7d6e", "metadata": {}, "outputs": [ { - "ename": "AttributeError", - "evalue": "'str' object has no attribute 'stat'", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mAttributeError\u001b[0m Traceback (most recent call last)", - "\u001b[0;32m/tmp/ipykernel_48616/3505477735.py\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0ms\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m'xxxxxxxx'\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m \u001b[0mPath\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mis_file\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0ms\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", - "\u001b[0;32m/usr/local/lib/python3.7/pathlib.py\u001b[0m in \u001b[0;36mis_file\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 1342\u001b[0m \"\"\"\n\u001b[1;32m 1343\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1344\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mS_ISREG\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstat\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mst_mode\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1345\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mOSError\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0me\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1346\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0me\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0merrno\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32min\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mENOENT\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mENOTDIR\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;31mAttributeError\u001b[0m: 'str' object has no attribute 'stat'" + "name": "stdout", + "output_type": "stream", + "text": [ + "['7601-101619-0003', '1255-138279-0000', '1272-128104-0004', '6123-59150-0027', '2078-142845-0025', '7850-73752-0018', '4570-24733-0004', '2506-169427-0002', '7601-101619-0004', '3170-137482-0000', '6267-53049-0019', '4570-14911-0009', '174-168635-0018', '7601-291468-0004', '3576-138058-0022', '1919-142785-0007', '6467-62797-0007', '4153-61735-0005', '1686-142278-0003', '2506-169427-0000']\n", + "Tensor(shape=[20, 2961, 83], dtype=float32, place=CUDAPinnedPlace, stop_gradient=True,\n", + " [[[-1.99415934, -1.80315673, -1.88801885, ..., 0.86933994, -0.59853148, 0.02596200],\n", + " [-1.95346808, -1.84891188, -2.17492867, ..., 0.83640492, -0.59853148, -0.11333394],\n", + " [-2.27899861, -2.21495342, -2.58480024, ..., 0.91874266, -0.59853148, -0.31453922],\n", + " ...,\n", + " [-2.64522028, -2.35221887, -2.91269732, ..., 1.48994756, -0.16100442, 0.36646330],\n", + " [-2.40107250, -2.21495342, -2.37986445, ..., 1.44072104, -0.13220564, 0.12656468],\n", + " [-2.15692472, -1.89466715, -2.25690317, ..., 1.31273174, -0.09620714, -0.15202725]],\n", + "\n", + " [[-0.28859532, -0.29033494, -0.86576819, ..., 1.37753224, -0.30570769, 0.25806731],\n", + " [-0.20149794, -0.17814466, -0.59891301, ..., 1.35188794, -0.30570769, -0.02964944],\n", + " [-0.34947991, -0.33597648, -0.96877253, ..., 1.38394332, -0.30570769, -0.38376236],\n", + " ...,\n", + " [ 0. , 0. , 0. , ..., 0. , 0. , 0. ],\n", + " [ 0. , 0. , 0. , ..., 0. , 0. , 0. ],\n", + " [ 0. , 0. , 0. , ..., 0. , 0. , 0. ]],\n", + "\n", + " [[-0.44914246, -0.33902276, -0.78237975, ..., 1.38218808, 0.29214793, -0.16815147],\n", + " [-0.55490732, -0.41596055, -0.84425378, ..., 1.34530187, 0.25002354, -0.04004869],\n", + " [-0.83694696, -0.62112784, -1.07112527, ..., 1.19160914, 0.20789915, 0.37984371],\n", + " ...,\n", + " [ 0. , 0. , 0. , ..., 0. , 0. , 0. ],\n", + " [ 0. , 0. , 0. , ..., 0. , 0. , 0. ],\n", + " [ 0. , 0. , 0. , ..., 0. , 0. , 0. ]],\n", + "\n", + " ...,\n", + "\n", + " [[-1.24343657, -0.94188881, -1.41092563, ..., 0.96716309, 0.60345763, 0.15360183],\n", + " [-1.19466043, -0.80585432, -0.49723154, ..., 1.06735480, 0.60345763, 0.14511746],\n", + " [-0.94079566, -0.59330046, -0.40948665, ..., 0.82244170, 0.55614340, 0.28086722],\n", + " ...,\n", + " [ 0. , 0. , 0. , ..., 0. , 0. , 0. ],\n", + " [ 0. , 0. , 0. , ..., 0. , 0. , 0. ],\n", + " [ 0. , 0. , 0. , ..., 0. , 0. , 0. ]],\n", + "\n", + " [[ 0.21757117, 0.11361472, -0.33262897, ..., 0.76338506, -0.10711290, -0.57754958],\n", + " [-1.00205481, -0.61152041, -0.47124696, ..., 1.11897349, -0.10711290, 0.24931324],\n", + " [-1.03929281, -1.20336759, -1.16433656, ..., 0.88888687, -0.10711290, -0.04115745],\n", + " ...,\n", + " [ 0. , 0. , 0. , ..., 0. , 0. , 0. ],\n", + " [ 0. , 0. , 0. , ..., 0. , 0. , 0. ],\n", + " [ 0. , 0. , 0. , ..., 0. , 0. , 0. ]],\n", + "\n", + " [[-1.25289667, -1.05046368, -0.82881606, ..., 1.23991334, 0.61702502, 0.05275881],\n", + " [-1.19659519, -0.78677225, -0.80407262, ..., 1.27644968, 0.61702502, -0.35079369],\n", + " [-1.49687004, -1.01750231, -0.82881606, ..., 1.29106426, 0.65006059, 0.17958963],\n", + " ...,\n", + " [ 0. , 0. , 0. , ..., 0. , 0. , 0. ],\n", + " [ 0. , 0. , 0. , ..., 0. , 0. , 0. ],\n", + " [ 0. , 0. , 0. , ..., 0. , 0. , 0. ]]])\n", + "Tensor(shape=[20], dtype=int64, place=CUDAPinnedPlace, stop_gradient=True,\n", + " [2961, 2948, 2938, 2907, 2904, 2838, 2832, 2819, 2815, 2797, 2775, 2710, 2709, 2696, 2688, 2661, 2616, 2595, 2589, 2576])\n", + "Tensor(shape=[20, 133], dtype=int64, place=CUDAPinnedPlace, stop_gradient=True,\n", + " [[3098, 1595, 389, ..., -1 , -1 , -1 ],\n", + " [2603, 4832, 482, ..., -1 , -1 , -1 ],\n", + " [2796, 303, 269, ..., -1 , -1 , -1 ],\n", + " ...,\n", + " [3218, 3673, 206, ..., -1 , -1 , -1 ],\n", + " [2371, 4832, 4031, ..., -1 , -1 , -1 ],\n", + " [2570, 2433, 4285, ..., -1 , -1 , -1 ]])\n", + "Tensor(shape=[20], dtype=int64, place=CUDAPinnedPlace, stop_gradient=True,\n", + " [80 , 83 , 102, 133, 82 , 102, 71 , 91 , 68 , 81 , 86 , 67 , 71 , 95 , 65 , 88 , 97 , 98 , 89 , 72 ])\n" ] } ], "source": [ - "s='xxxxxxxx'\n", - "Path.is_file(s)" + "for batch in train:\n", + " utts, xs, ilens, ys, olens = batch\n", + " print(utts)\n", + " print(xs)\n", + " print(ilens)\n", + " print(ys)\n", + " print(olens)\n", + " break" ] }, { "cell_type": "code", "execution_count": null, - "id": "fcea3fd0", + "id": "3c974a1e", "metadata": {}, "outputs": [], "source": []