add batchdataloader test

3 years ago · 6949fbb026
parent 4af774d8f0
commit 6949fbb026
1 changed files with 327 additions and 153 deletions
--- a/.notebook/espnet_dataloader.ipynb
+++ b/.notebook/espnet_dataloader.ipynb
@ -2,7 +2,7 @@
 "cells": [
  {
   "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": 147,
   "id": "extensive-venice",
   "metadata": {},
   "outputs": [
@ -10,16 +10,16 @@
     "name": "stdout",
     "output_type": "stream",
     "text": [
-      "/workspace/zhanghui/DeepSpeech-2.x\n"
+      "/\n"
     ]
    },
    {
     "data": {
      "text/plain": [
-       "'/workspace/zhanghui/DeepSpeech-2.x'"
+       "'/'"
      ]
     },
-     "execution_count": 1,
+     "execution_count": 147,
     "metadata": {},
     "output_type": "execute_result"
    }
@ -31,7 +31,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": 148,
   "id": "correct-window",
   "metadata": {},
   "outputs": [
@ -50,7 +50,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 9,
+   "execution_count": 149,
   "id": "exceptional-cheese",
   "metadata": {},
   "outputs": [],
@ -60,53 +60,17 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 11,
+   "execution_count": 150,
   "id": "extraordinary-orleans",
   "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "grep: warning: GREP_OPTIONS is deprecated; please use an alias or script\n",
-      "register user softmax to paddle, remove this when fixed!\n",
-      "register user log_softmax to paddle, remove this when fixed!\n",
-      "register user sigmoid to paddle, remove this when fixed!\n",
-      "register user log_sigmoid to paddle, remove this when fixed!\n",
-      "register user relu to paddle, remove this when fixed!\n",
-      "override cat of paddle if exists or register, remove this when fixed!\n",
-      "override long of paddle.Tensor if exists or register, remove this when fixed!\n",
-      "override new_full of paddle.Tensor if exists or register, remove this when fixed!\n",
-      "override eq of paddle.Tensor if exists or register, remove this when fixed!\n",
-      "override eq of paddle if exists or register, remove this when fixed!\n",
-      "override contiguous of paddle.Tensor if exists or register, remove this when fixed!\n",
-      "override size of paddle.Tensor (`to_static` do not process `size` property, maybe some `paddle` api dependent on it), remove this when fixed!\n",
-      "register user view to paddle.Tensor, remove this when fixed!\n",
-      "register user view_as to paddle.Tensor, remove this when fixed!\n",
-      "register user masked_fill to paddle.Tensor, remove this when fixed!\n",
-      "register user masked_fill_ to paddle.Tensor, remove this when fixed!\n",
-      "register user fill_ to paddle.Tensor, remove this when fixed!\n",
-      "register user repeat to paddle.Tensor, remove this when fixed!\n",
-      "register user softmax to paddle.Tensor, remove this when fixed!\n",
-      "register user sigmoid to paddle.Tensor, remove this when fixed!\n",
-      "register user relu to paddle.Tensor, remove this when fixed!\n",
-      "register user type_as to paddle.Tensor, remove this when fixed!\n",
-      "register user to to paddle.Tensor, remove this when fixed!\n",
-      "register user float to paddle.Tensor, remove this when fixed!\n",
-      "register user int to paddle.Tensor, remove this when fixed!\n",
-      "register user GLU to paddle.nn, remove this when fixed!\n",
-      "register user ConstantPad2d to paddle.nn, remove this when fixed!\n",
-      "register user export to paddle.jit, remove this when fixed!\n"
-     ]
-    }
-   ],
+   "outputs": [],
   "source": [
    "from deepspeech.frontend.utility import read_manifest"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 12,
+   "execution_count": 151,
   "id": "returning-lighter",
   "metadata": {},
   "outputs": [],
@ -116,7 +80,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 13,
+   "execution_count": 152,
   "id": "western-founder",
   "metadata": {},
   "outputs": [
@ -158,7 +122,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 14,
+   "execution_count": 97,
   "id": "motivated-receptor",
   "metadata": {},
   "outputs": [],
@ -638,10 +602,19 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 15,
+   "execution_count": 98,
   "id": "acquired-hurricane",
   "metadata": {},
   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "[INFO 2021/08/18 06:57:10 1445365138.py:284] use shuffled batch.\n",
+      "[INFO 2021/08/18 06:57:10 1445365138.py:286] # utts: 5542\n",
+      "[INFO 2021/08/18 06:57:10 1445365138.py:468] # minibatches: 555\n"
+     ]
+    },
    {
     "name": "stdout",
     "output_type": "stream",
@ -686,7 +659,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 16,
+   "execution_count": 99,
   "id": "warming-malpractice",
   "metadata": {},
   "outputs": [
@ -694,16 +667,8 @@
     "name": "stdout",
     "output_type": "stream",
     "text": [
-      "Collecting kaldiio\n",
-      "  Downloading kaldiio-2.17.2.tar.gz (24 kB)\n",
-      "Requirement already satisfied: numpy in ./tools/venv/lib/python3.7/site-packages/numpy-1.21.2-py3.7-linux-x86_64.egg (from kaldiio) (1.21.2)\n",
-      "Building wheels for collected packages: kaldiio\n",
-      "  Building wheel for kaldiio (setup.py) ... \u001b[?25ldone\n",
-      "\u001b[?25h  Created wheel for kaldiio: filename=kaldiio-2.17.2-py3-none-any.whl size=24468 sha256=cd6e066764dcc8c24a9dfe3f7bd8acda18761a6fbcb024995729da8debdb466e\n",
-      "  Stored in directory: /root/.cache/pip/wheels/04/07/e8/45641287c59bf6ce41e22259f8680b521c31e6306cb88392ac\n",
-      "Successfully built kaldiio\n",
-      "Installing collected packages: kaldiio\n",
-      "Successfully installed kaldiio-2.17.2\n",
+      "Requirement already satisfied: kaldiio in ./DeepSpeech-2.x/tools/venv/lib/python3.7/site-packages (2.17.2)\n",
+      "Requirement already satisfied: numpy in ./DeepSpeech-2.x/tools/venv/lib/python3.7/site-packages/numpy-1.21.2-py3.7-linux-x86_64.egg (from kaldiio) (1.21.2)\n",
      "\u001b[33mWARNING: You are using pip version 20.3.3; however, version 21.2.4 is available.\n",
      "You should consider upgrading via the '/workspace/zhanghui/DeepSpeech-2.x/tools/venv/bin/python -m pip install --upgrade pip' command.\u001b[0m\n"
     ]
@ -723,7 +688,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 19,
+   "execution_count": 100,
   "id": "superb-methodology",
   "metadata": {},
   "outputs": [],
@ -1029,7 +994,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 20,
+   "execution_count": 101,
   "id": "monthly-muscle",
   "metadata": {},
   "outputs": [],
@ -1047,7 +1012,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 23,
+   "execution_count": 102,
   "id": "periodic-senegal",
   "metadata": {},
   "outputs": [],
@ -1057,7 +1022,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 34,
+   "execution_count": 103,
   "id": "502d3f4d",
   "metadata": {},
   "outputs": [
@ -1069,8 +1034,8 @@
      "2\n",
      "10\n",
      "10\n",
-      "(1763, 83) float32\n",
-      "(73,) int64\n"
+      "(1174, 83) float32\n",
+      "(29,) int64\n"
     ]
    }
   ],
@ -1088,7 +1053,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 36,
+   "execution_count": 104,
   "id": "humanitarian-container",
   "metadata": {},
   "outputs": [],
@ -1098,7 +1063,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 39,
+   "execution_count": 105,
   "id": "heard-prize",
   "metadata": {},
   "outputs": [
@ -1106,7 +1071,7 @@
     "name": "stdout",
     "output_type": "stream",
     "text": [
-      "['1673-143396-0008', '1650-173552-0000', '2803-154320-0000', '6267-65525-0045', '7641-96684-0029', '5338-284437-0010', '8173-294714-0033', '5543-27761-0047', '8254-115543-0043', '6467-94831-0038'] 10\n",
+      "['4572-112383-0005', '6313-66125-0015', '251-137823-0022', '2277-149896-0030', '652-130726-0032', '5895-34615-0013', '1462-170138-0002', '777-126732-0008', '3660-172182-0021', '2277-149896-0027'] 10\n",
      "10\n"
     ]
    }
@ -1118,7 +1083,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 83,
+   "execution_count": 106,
   "id": "convinced-animation",
   "metadata": {},
   "outputs": [],
@ -1185,7 +1150,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 84,
+   "execution_count": 107,
   "id": "0b92ade5",
   "metadata": {},
   "outputs": [],
@ -1195,7 +1160,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 85,
+   "execution_count": 108,
   "id": "8dbd847c",
   "metadata": {},
   "outputs": [],
@ -1205,7 +1170,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 87,
+   "execution_count": 109,
   "id": "31c085f4",
   "metadata": {},
   "outputs": [
@ -1213,72 +1178,42 @@
     "name": "stdout",
     "output_type": "stream",
     "text": [
-      "['1673-143396-0008', '1650-173552-0000', '2803-154320-0000', '6267-65525-0045', '7641-96684-0029', '5338-284437-0010', '8173-294714-0033', '5543-27761-0047', '8254-115543-0043', '6467-94831-0038']\n",
-      "(10, 1763, 83)\n",
+      "['4572-112383-0005', '6313-66125-0015', '251-137823-0022', '2277-149896-0030', '652-130726-0032', '5895-34615-0013', '1462-170138-0002', '777-126732-0008', '3660-172182-0021', '2277-149896-0027']\n",
+      "(10, 1174, 83)\n",
      "(10,)\n",
-      "[1763 1214 1146  757  751  661  625  512  426  329]\n",
-      "(10, 73)\n",
-      "[[2896  621 4502 2176  404  198 3538  391  278  407  389 3719 4577  846\n",
-      "  4501  482 1004  103  116  178 4222  624 4689  176  459   89  101 3465\n",
-      "  3204 4502 2029 1834 2298  829 3366  278 4705 4925  482 2920 3204 2481\n",
-      "   448  627 1254  404   20  202   36 2047  627 2495 4504  481  479   99\n",
-      "    18 2079 4502 1628  202  226 4512 3267  210  278  483  234  367 4502\n",
-      "  2438 3204 1141]\n",
-      " [ 742 4501 4768 4569  742 4483 2495 4502 3040 3204 4502 3961 3204 3992\n",
-      "  3089 4832 4258  621 2391 4642 3218 4502 3439  235  270  313 2385 2833\n",
-      "   742 4502 3282  332    3  280 4237 3252  830 2387   -1   -1   -1   -1\n",
-      "    -1   -1   -1   -1   -1   -1   -1   -1   -1   -1   -1   -1   -1   -1\n",
-      "    -1   -1   -1   -1   -1   -1   -1   -1   -1   -1   -1   -1   -1   -1\n",
-      "    -1   -1   -1]\n",
-      " [2099  278 4904 2302  124 4832 3158  482 2888 2495  482 2450  627 1560\n",
-      "  3158 4729  482 3514 3204 1027 3233 2391 2862  399  389 4962 2495  121\n",
-      "   221    7 2340 1216 1658   -1   -1   -1   -1   -1   -1   -1   -1   -1\n",
-      "    -1   -1   -1   -1   -1   -1   -1   -1   -1   -1   -1   -1   -1   -1\n",
-      "    -1   -1   -1   -1   -1   -1   -1   -1   -1   -1   -1   -1   -1   -1\n",
-      "    -1   -1   -1]\n",
-      " [2458 2659 1362    2  404 4975 4995  487 3079 2785 2371 3158  824 2603\n",
-      "  4832 2323  999 2603 4832 4156 4678  627 1784   -1   -1   -1   -1   -1\n",
-      "    -1   -1   -1   -1   -1   -1   -1   -1   -1   -1   -1   -1   -1   -1\n",
-      "    -1   -1   -1   -1   -1   -1   -1   -1   -1   -1   -1   -1   -1   -1\n",
-      "    -1   -1   -1   -1   -1   -1   -1   -1   -1   -1   -1   -1   -1   -1\n",
-      "    -1   -1   -1]\n",
-      " [2458 2340 1661  101 4723 2138 4502 4690  463  332  251 2345 4534 4502\n",
-      "  2396  444 4501 2287  389 4531 4894 1466  959  389 1658 2584 4502 3681\n",
-      "   279 3204 4502 2228 3204 4502 4690  463  332  251   -1   -1   -1   -1\n",
-      "    -1   -1   -1   -1   -1   -1   -1   -1   -1   -1   -1   -1   -1   -1\n",
-      "    -1   -1   -1   -1   -1   -1   -1   -1   -1   -1   -1   -1   -1   -1\n",
-      "    -1   -1   -1]\n",
-      " [2368 1248  208 4832 3158  482 1473 3401  999  482 4159 3838  389  478\n",
-      "  4572  404 3158 3063 1481  113 4499 4501 3204 4643    2  389 4111   -1\n",
-      "    -1   -1   -1   -1   -1   -1   -1   -1   -1   -1   -1   -1   -1   -1\n",
-      "    -1   -1   -1   -1   -1   -1   -1   -1   -1   -1   -1   -1   -1   -1\n",
-      "    -1   -1   -1   -1   -1   -1   -1   -1   -1   -1   -1   -1   -1   -1\n",
-      "    -1   -1   -1]\n",
-      " [2882 2932 4329 1808 4577 4350 4577  482 1636    2  389 1841 3204 3079\n",
-      "  1091  389 3204 2816 2079 4172 4986 4990   -1   -1   -1   -1   -1   -1\n",
-      "    -1   -1   -1   -1   -1   -1   -1   -1   -1   -1   -1   -1   -1   -1\n",
-      "    -1   -1   -1   -1   -1   -1   -1   -1   -1   -1   -1   -1   -1   -1\n",
-      "    -1   -1   -1   -1   -1   -1   -1   -1   -1   -1   -1   -1   -1   -1\n",
-      "    -1   -1   -1]\n",
-      " [4869 2598 2603 1976   96  389  478    3 4031  721 4925 2263 1259 2598\n",
-      "  4508  653 4979 4925 2741  252   72  236   -1   -1   -1   -1   -1   -1\n",
-      "    -1   -1   -1   -1   -1   -1   -1   -1   -1   -1   -1   -1   -1   -1\n",
-      "    -1   -1   -1   -1   -1   -1   -1   -1   -1   -1   -1   -1   -1   -1\n",
-      "    -1   -1   -1   -1   -1   -1   -1   -1   -1   -1   -1   -1   -1   -1\n",
-      "    -1   -1   -1]\n",
-      " [2458 4447 4505  713  624 3207  206 4577 4502 2404 3837 3458 2812 4936\n",
-      "    -1   -1   -1   -1   -1   -1   -1   -1   -1   -1   -1   -1   -1   -1\n",
-      "    -1   -1   -1   -1   -1   -1   -1   -1   -1   -1   -1   -1   -1   -1\n",
-      "    -1   -1   -1   -1   -1   -1   -1   -1   -1   -1   -1   -1   -1   -1\n",
-      "    -1   -1   -1   -1   -1   -1   -1   -1   -1   -1   -1   -1   -1   -1\n",
-      "    -1   -1   -1]\n",
-      " [1501 3897 2537  278 2601    2  404 2603  482 2235 3388   -1   -1   -1\n",
-      "    -1   -1   -1   -1   -1   -1   -1   -1   -1   -1   -1   -1   -1   -1\n",
-      "    -1   -1   -1   -1   -1   -1   -1   -1   -1   -1   -1   -1   -1   -1\n",
-      "    -1   -1   -1   -1   -1   -1   -1   -1   -1   -1   -1   -1   -1   -1\n",
-      "    -1   -1   -1   -1   -1   -1   -1   -1   -1   -1   -1   -1   -1   -1\n",
-      "    -1   -1   -1]]\n",
-      "[73 38 33 23 38 27 22 22 14 11]\n",
+      "[1174  821  716  628  597  473  463  441  419  358]\n",
+      "(10, 32)\n",
+      "[[4502 2404 4223 3204 4502  587 1018 3861 2932  713 2458 2916  253 4508\n",
+      "   627 1395  713 4504  957 2761  209 2967 3173 3918 2598 4100    3 2816\n",
+      "  4990   -1   -1   -1]\n",
+      " [1005  451  210  278 3411  206  482 2307  573 4502 3848 4577 4273 2388\n",
+      "  4444   89 4919  278 1264 4501 2371    3  139  113 2603 4962 3158 3325\n",
+      "  4577  814 4587 1422]\n",
+      " [2345 4144 2291  200  713 2345  532  999 2458 3076  545 2458 4832 3038\n",
+      "  4499  482 2812 1260 3080   -1   -1   -1   -1   -1   -1   -1   -1   -1\n",
+      "    -1   -1   -1   -1]\n",
+      " [2345  832 4577 4920 4501 2345 2298 1236  381  288  389  101 2495 4172\n",
+      "  4843 3233 3245 4501 2345 2298 3987 4502 3023 3353 2345 1361 1635 2603\n",
+      "  4723 2371   -1   -1]\n",
+      " [4502 4207  432 3204 4502 2396  125  935  433 2598  483   18  327    2\n",
+      "   389  627 4512 2340  713  482 1981 4525 4031  269 2030 1340  101 2495\n",
+      "  4013 4844   -1   -1]\n",
+      " [4502 4892 3204 1892 3780  389  482 2774 3013   89  192 2495 4502 3475\n",
+      "   389   66  370  343  404   -1   -1   -1   -1   -1   -1   -1   -1   -1\n",
+      "    -1   -1   -1   -1]\n",
+      " [2458 2314 4577 2340 2863 1254  303  269    2  389  932 2079 4577  299\n",
+      "   195 3233 4508    2   89  814 3144 1091 3204 3250 2193 3414   -1   -1\n",
+      "    -1   -1   -1   -1]\n",
+      " [2391 1785  443   78   39 4962 2340  829  599 4593  278 4681  202  407\n",
+      "   269  194  182 4577  482 4308   -1   -1   -1   -1   -1   -1   -1   -1\n",
+      "    -1   -1   -1   -1]\n",
+      " [ 627 4873 2175  363  202  404 1018 4577 4502 3412 4875 2286  107  122\n",
+      "  4832 2345 3896   89 2368   -1   -1   -1   -1   -1   -1   -1   -1   -1\n",
+      "    -1   -1   -1   -1]\n",
+      " [ 481  174  474  599 1881 3252 2842  742 4502 2545  107   88 3204 4525\n",
+      "  4517   -1   -1   -1   -1   -1   -1   -1   -1   -1   -1   -1   -1   -1\n",
+      "    -1   -1   -1   -1]]\n",
+      "[29 32 19 30 30 19 26 20 19 15]\n",
      "float32\n",
      "int64\n",
      "int64\n",
@ -1302,42 +1237,281 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 88,
+   "execution_count": 110,
   "id": "72e9ba60",
   "metadata": {},
   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 230,
+   "id": "64593e5f",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "\n",
+    "from paddle.io import DataLoader\n",
+    "\n",
+    "from deepspeech.frontend.utility import read_manifest\n",
+    "from deepspeech.io.batchfy import make_batchset\n",
+    "from deepspeech.io.converter import CustomConverter\n",
+    "from deepspeech.io.dataset import TransformDataset\n",
+    "from deepspeech.io.reader import LoadInputsAndTargets\n",
+    "from deepspeech.utils.log import Log\n",
+    "\n",
+    "\n",
+    "logger = Log(__name__).getlog()\n",
+    "\n",
+    "\n",
+    "class BatchDataLoader():\n",
+    "    def __init__(self,\n",
+    "                 json_file: str,\n",
+    "                 train_mode: bool,\n",
+    "                 sortagrad: bool=False,\n",
+    "                 batch_size: int=0,\n",
+    "                 maxlen_in: float=float('inf'),\n",
+    "                 maxlen_out: float=float('inf'),\n",
+    "                 minibatches: int=0,\n",
+    "                 mini_batch_size: int=1,\n",
+    "                 batch_count: str='auto',\n",
+    "                 batch_bins: int=0,\n",
+    "                 batch_frames_in: int=0,\n",
+    "                 batch_frames_out: int=0,\n",
+    "                 batch_frames_inout: int=0,\n",
+    "                 preprocess_conf=None,\n",
+    "                 n_iter_processes: int=1,\n",
+    "                 subsampling_factor: int=1,\n",
+    "                 num_encs: int=1):\n",
+    "        self.json_file = json_file\n",
+    "        self.train_mode = train_mode\n",
+    "        self.use_sortagrad = sortagrad == -1 or sortagrad > 0\n",
+    "        self.batch_size = batch_size\n",
+    "        self.maxlen_in = maxlen_in\n",
+    "        self.maxlen_out = maxlen_out\n",
+    "        self.batch_count = batch_count\n",
+    "        self.batch_bins = batch_bins\n",
+    "        self.batch_frames_in = batch_frames_in\n",
+    "        self.batch_frames_out = batch_frames_out\n",
+    "        self.batch_frames_inout = batch_frames_inout\n",
+    "        self.subsampling_factor = subsampling_factor\n",
+    "        self.num_encs = num_encs\n",
+    "        self.preprocess_conf = preprocess_conf\n",
+    "        self.n_iter_processes = n_iter_processes\n",
+    "\n",
+    "        \n",
+    "        # read json data\n",
+    "        self.data_json = read_manifest(json_file)\n",
+    "\n",
+    "        # make minibatch list (variable length)\n",
+    "        self.minibaches = make_batchset(\n",
+    "            self.data_json,\n",
+    "            batch_size,\n",
+    "            maxlen_in,\n",
+    "            maxlen_out,\n",
+    "            minibatches,  # for debug\n",
+    "            min_batch_size=mini_batch_size,\n",
+    "            shortest_first=self.use_sortagrad,\n",
+    "            count=batch_count,\n",
+    "            batch_bins=batch_bins,\n",
+    "            batch_frames_in=batch_frames_in,\n",
+    "            batch_frames_out=batch_frames_out,\n",
+    "            batch_frames_inout=batch_frames_inout,\n",
+    "            iaxis=0,\n",
+    "            oaxis=0, )\n",
+    "\n",
+    "        # data reader\n",
+    "        self.reader = LoadInputsAndTargets(\n",
+    "            mode=\"asr\",\n",
+    "            load_output=True,\n",
+    "            preprocess_conf=preprocess_conf,\n",
+    "            preprocess_args={\"train\":\n",
+    "                             train_mode},  # Switch the mode of preprocessing\n",
+    "        )\n",
+    "\n",
+    "        # Setup a converter\n",
+    "        if num_encs == 1:\n",
+    "            self.converter = CustomConverter(\n",
+    "                subsampling_factor=subsampling_factor, dtype=np.float32)\n",
+    "        else:\n",
+    "            assert NotImplementedError(\"not impl CustomConverterMulEnc.\")\n",
+    "\n",
+    "        # hack to make batchsize argument as 1\n",
+    "        # actual bathsize is included in a list\n",
+    "        # default collate function converts numpy array to pytorch tensor\n",
+    "        # we used an empty collate function instead which returns list\n",
+    "        self.dataset = TransformDataset(self.minibaches, \n",
+    "                                        lambda data: self.converter([self.reader(data, return_uttid=True)]))\n",
+    "        self.dataloader = DataLoader(\n",
+    "            dataset=self.dataset,\n",
+    "            batch_size=1,\n",
+    "            shuffle=not use_sortagrad if train_mode else False,\n",
+    "            collate_fn=lambda x: x[0],\n",
+    "            num_workers=n_iter_processes, )\n",
+    "\n",
+    "    def __repr__(self):\n",
+    "        echo = f\"<{self.__class__.__module__}.{self.__class__.__name__} object at {hex(id(self))}> \"\n",
+    "        echo += f\"train_mode: {self.train_mode}, \"\n",
+    "        echo += f\"sortagrad: {self.use_sortagrad}, \"\n",
+    "        echo += f\"batch_size: {self.batch_size}, \"\n",
+    "        echo += f\"maxlen_in: {self.maxlen_in}, \"\n",
+    "        echo += f\"maxlen_out: {self.maxlen_out}, \"\n",
+    "        echo += f\"batch_count: {self.batch_count}, \"\n",
+    "        echo += f\"batch_bins: {self.batch_bins}, \"\n",
+    "        echo += f\"batch_frames_in: {self.batch_frames_in}, \"\n",
+    "        echo += f\"batch_frames_out: {self.batch_frames_out}, \"\n",
+    "        echo += f\"batch_frames_inout: {self.batch_frames_inout}, \"\n",
+    "        echo += f\"subsampling_factor: {self.subsampling_factor}, \"\n",
+    "        echo += f\"num_encs: {self.num_encs}, \"\n",
+    "        echo += f\"num_workers: {self.n_iter_processes}, \"\n",
+    "        echo += f\"file: {self.json_file}\"\n",
+    "        return echo\n",
+    "    \n",
+    "    def __len__(self):\n",
+    "        return len(self.dataloader)\n",
+    "    \n",
+    "    def __iter__(self):\n",
+    "        return self.dataloader.__iter__()\n",
+    "    \n",
+    "    def __call__(self):\n",
+    "        return self.__iter__()\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 231,
+   "id": "fcea3fd0",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "[INFO 2021/08/18 07:42:23 batchfy.py:399] count is auto detected as seq\n",
+      "[INFO 2021/08/18 07:42:23 batchfy.py:423] # utts: 5542\n",
+      "[INFO 2021/08/18 07:42:23 batchfy.py:466] # minibatches: 278\n"
+     ]
+    }
+   ],
   "source": [
-    "from pathlib import Path"
+    "train = BatchDataLoader(dev_data, True, batch_size=20)"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 90,
-   "id": "64593e5f",
+   "execution_count": 232,
+   "id": "e2a2c9a8",
   "metadata": {},
   "outputs": [
    {
-     "ename": "AttributeError",
-     "evalue": "'str' object has no attribute 'stat'",
-     "output_type": "error",
-     "traceback": [
-      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
-      "\u001b[0;31mAttributeError\u001b[0m                            Traceback (most recent call last)",
-      "\u001b[0;32m/tmp/ipykernel_48616/3505477735.py\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m      1\u001b[0m \u001b[0ms\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m'xxxxxxxx'\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m \u001b[0mPath\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mis_file\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0ms\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
-      "\u001b[0;32m/usr/local/lib/python3.7/pathlib.py\u001b[0m in \u001b[0;36mis_file\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m   1342\u001b[0m         \"\"\"\n\u001b[1;32m   1343\u001b[0m         \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1344\u001b[0;31m             \u001b[0;32mreturn\u001b[0m \u001b[0mS_ISREG\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstat\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mst_mode\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m   1345\u001b[0m         \u001b[0;32mexcept\u001b[0m \u001b[0mOSError\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0me\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1346\u001b[0m             \u001b[0;32mif\u001b[0m \u001b[0me\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0merrno\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32min\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mENOENT\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mENOTDIR\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
-      "\u001b[0;31mAttributeError\u001b[0m: 'str' object has no attribute 'stat'"
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "278\n",
+      "['__call__', '__class__', '__delattr__', '__dict__', '__dir__', '__doc__', '__eq__', '__format__', '__ge__', '__getattribute__', '__gt__', '__hash__', '__init__', '__init_subclass__', '__iter__', '__le__', '__len__', '__lt__', '__module__', '__ne__', '__new__', '__reduce__', '__reduce_ex__', '__repr__', '__setattr__', '__sizeof__', '__str__', '__subclasshook__', '__weakref__', 'auto_collate_batch', 'batch_sampler', 'batch_size', 'collate_fn', 'dataset', 'dataset_kind', 'feed_list', 'from_dataset', 'from_generator', 'num_workers', 'pin_memory', 'places', 'return_list', 'timeout', 'use_buffer_reader', 'use_shared_memory', 'worker_init_fn']\n",
+      "<__main__.BatchDataLoader object at 0x7fdddba35470> train_mode: True, sortagrad: False, batch_size: 20, maxlen_in: inf, maxlen_out: inf, batch_count: auto, batch_bins: 0, batch_frames_in: 0, batch_frames_out: 0, batch_frames_inout: 0, subsampling_factor: 1, num_encs: 1, num_workers: 1, file: /workspace/zhanghui/DeepSpeech-2.x/examples/librispeech/s2/data/manifest.dev\n",
+      "278\n"
     ]
    }
   ],
   "source": [
-    "s='xxxxxxxx'\n",
-    "Path.is_file(s)"
+    "print(len(train.dataloader))\n",
+    "print(dir(train.dataloader))\n",
+    "print(train)\n",
+    "print(len(train))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 220,
+   "id": "a5ba7d6e",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "['7601-101619-0003', '1255-138279-0000', '1272-128104-0004', '6123-59150-0027', '2078-142845-0025', '7850-73752-0018', '4570-24733-0004', '2506-169427-0002', '7601-101619-0004', '3170-137482-0000', '6267-53049-0019', '4570-14911-0009', '174-168635-0018', '7601-291468-0004', '3576-138058-0022', '1919-142785-0007', '6467-62797-0007', '4153-61735-0005', '1686-142278-0003', '2506-169427-0000']\n",
+      "Tensor(shape=[20, 2961, 83], dtype=float32, place=CUDAPinnedPlace, stop_gradient=True,\n",
+      "       [[[-1.99415934, -1.80315673, -1.88801885, ...,  0.86933994, -0.59853148,  0.02596200],\n",
+      "         [-1.95346808, -1.84891188, -2.17492867, ...,  0.83640492, -0.59853148, -0.11333394],\n",
+      "         [-2.27899861, -2.21495342, -2.58480024, ...,  0.91874266, -0.59853148, -0.31453922],\n",
+      "         ...,\n",
+      "         [-2.64522028, -2.35221887, -2.91269732, ...,  1.48994756, -0.16100442,  0.36646330],\n",
+      "         [-2.40107250, -2.21495342, -2.37986445, ...,  1.44072104, -0.13220564,  0.12656468],\n",
+      "         [-2.15692472, -1.89466715, -2.25690317, ...,  1.31273174, -0.09620714, -0.15202725]],\n",
+      "\n",
+      "        [[-0.28859532, -0.29033494, -0.86576819, ...,  1.37753224, -0.30570769,  0.25806731],\n",
+      "         [-0.20149794, -0.17814466, -0.59891301, ...,  1.35188794, -0.30570769, -0.02964944],\n",
+      "         [-0.34947991, -0.33597648, -0.96877253, ...,  1.38394332, -0.30570769, -0.38376236],\n",
+      "         ...,\n",
+      "         [ 0.        ,  0.        ,  0.        , ...,  0.        ,  0.        ,  0.        ],\n",
+      "         [ 0.        ,  0.        ,  0.        , ...,  0.        ,  0.        ,  0.        ],\n",
+      "         [ 0.        ,  0.        ,  0.        , ...,  0.        ,  0.        ,  0.        ]],\n",
+      "\n",
+      "        [[-0.44914246, -0.33902276, -0.78237975, ...,  1.38218808,  0.29214793, -0.16815147],\n",
+      "         [-0.55490732, -0.41596055, -0.84425378, ...,  1.34530187,  0.25002354, -0.04004869],\n",
+      "         [-0.83694696, -0.62112784, -1.07112527, ...,  1.19160914,  0.20789915,  0.37984371],\n",
+      "         ...,\n",
+      "         [ 0.        ,  0.        ,  0.        , ...,  0.        ,  0.        ,  0.        ],\n",
+      "         [ 0.        ,  0.        ,  0.        , ...,  0.        ,  0.        ,  0.        ],\n",
+      "         [ 0.        ,  0.        ,  0.        , ...,  0.        ,  0.        ,  0.        ]],\n",
+      "\n",
+      "        ...,\n",
+      "\n",
+      "        [[-1.24343657, -0.94188881, -1.41092563, ...,  0.96716309,  0.60345763,  0.15360183],\n",
+      "         [-1.19466043, -0.80585432, -0.49723154, ...,  1.06735480,  0.60345763,  0.14511746],\n",
+      "         [-0.94079566, -0.59330046, -0.40948665, ...,  0.82244170,  0.55614340,  0.28086722],\n",
+      "         ...,\n",
+      "         [ 0.        ,  0.        ,  0.        , ...,  0.        ,  0.        ,  0.        ],\n",
+      "         [ 0.        ,  0.        ,  0.        , ...,  0.        ,  0.        ,  0.        ],\n",
+      "         [ 0.        ,  0.        ,  0.        , ...,  0.        ,  0.        ,  0.        ]],\n",
+      "\n",
+      "        [[ 0.21757117,  0.11361472, -0.33262897, ...,  0.76338506, -0.10711290, -0.57754958],\n",
+      "         [-1.00205481, -0.61152041, -0.47124696, ...,  1.11897349, -0.10711290,  0.24931324],\n",
+      "         [-1.03929281, -1.20336759, -1.16433656, ...,  0.88888687, -0.10711290, -0.04115745],\n",
+      "         ...,\n",
+      "         [ 0.        ,  0.        ,  0.        , ...,  0.        ,  0.        ,  0.        ],\n",
+      "         [ 0.        ,  0.        ,  0.        , ...,  0.        ,  0.        ,  0.        ],\n",
+      "         [ 0.        ,  0.        ,  0.        , ...,  0.        ,  0.        ,  0.        ]],\n",
+      "\n",
+      "        [[-1.25289667, -1.05046368, -0.82881606, ...,  1.23991334,  0.61702502,  0.05275881],\n",
+      "         [-1.19659519, -0.78677225, -0.80407262, ...,  1.27644968,  0.61702502, -0.35079369],\n",
+      "         [-1.49687004, -1.01750231, -0.82881606, ...,  1.29106426,  0.65006059,  0.17958963],\n",
+      "         ...,\n",
+      "         [ 0.        ,  0.        ,  0.        , ...,  0.        ,  0.        ,  0.        ],\n",
+      "         [ 0.        ,  0.        ,  0.        , ...,  0.        ,  0.        ,  0.        ],\n",
+      "         [ 0.        ,  0.        ,  0.        , ...,  0.        ,  0.        ,  0.        ]]])\n",
+      "Tensor(shape=[20], dtype=int64, place=CUDAPinnedPlace, stop_gradient=True,\n",
+      "       [2961, 2948, 2938, 2907, 2904, 2838, 2832, 2819, 2815, 2797, 2775, 2710, 2709, 2696, 2688, 2661, 2616, 2595, 2589, 2576])\n",
+      "Tensor(shape=[20, 133], dtype=int64, place=CUDAPinnedPlace, stop_gradient=True,\n",
+      "       [[3098, 1595,  389, ..., -1  , -1  , -1  ],\n",
+      "        [2603, 4832,  482, ..., -1  , -1  , -1  ],\n",
+      "        [2796,  303,  269, ..., -1  , -1  , -1  ],\n",
+      "        ...,\n",
+      "        [3218, 3673,  206, ..., -1  , -1  , -1  ],\n",
+      "        [2371, 4832, 4031, ..., -1  , -1  , -1  ],\n",
+      "        [2570, 2433, 4285, ..., -1  , -1  , -1  ]])\n",
+      "Tensor(shape=[20], dtype=int64, place=CUDAPinnedPlace, stop_gradient=True,\n",
+      "       [80 , 83 , 102, 133, 82 , 102, 71 , 91 , 68 , 81 , 86 , 67 , 71 , 95 , 65 , 88 , 97 , 98 , 89 , 72 ])\n"
+     ]
+    }
+   ],
+   "source": [
+    "for batch in train:\n",
+    "    utts, xs, ilens, ys, olens = batch\n",
+    "    print(utts)\n",
+    "    print(xs)\n",
+    "    print(ilens)\n",
+    "    print(ys)\n",
+    "    print(olens)\n",
+    "    break"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
-   "id": "fcea3fd0",
+   "id": "3c974a1e",
   "metadata": {},
   "outputs": [],
   "source": []