From a0c6c5e30a01dbeba95259b2ca9e992b5edd4ca1 Mon Sep 17 00:00:00 2001
From: Hui Zhang <zhtclz@foxmail.com>
Date: Wed, 24 Mar 2021 03:43:21 +0000
Subject: [PATCH] fix typo, python infer fix rnn mem opt name error and
 batchnorm1d, will be available at 2.0.2

---
 .notebook/jit_infer.ipynb      | 223 +++++++++++++++++++--------------
 deepspeech/frontend/utility.py |  12 +-
 2 files changed, 138 insertions(+), 97 deletions(-)

diff --git a/.notebook/jit_infer.ipynb b/.notebook/jit_infer.ipynb
index 49e395b38..af89827f9 100644
--- a/.notebook/jit_infer.ipynb
+++ b/.notebook/jit_infer.ipynb
@@ -37,15 +37,26 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "2021-03-17 11:09:34,972 - WARNING - override cat of paddle.Tensor if exists or register, remove this when fixed!\n",
-      "2021-03-17 11:09:34,973 - WARNING - override size of paddle.Tensor if exists or register (`to_static` do not process `size` property, maybe some `paddle` api dependent on it), remove this when fixed!\n",
-      "2021-03-17 11:09:34,974 - WARNING - register user masked_fill to paddle.Tensor, remove this when fixed!\n",
-      "2021-03-17 11:09:34,975 - WARNING - register user masked_fill_ to paddle.Tensor, remove this when fixed!\n",
-      "2021-03-17 11:09:34,975 - WARNING - register user repeat to paddle.Tensor, remove this when fixed!\n",
-      "2021-03-17 11:09:34,976 - WARNING - register user glu to paddle.nn.functional, remove this when fixed!\n",
-      "2021-03-17 11:09:34,976 - WARNING - register user GLU to paddle.nn, remove this when fixed!\n",
-      "2021-03-17 11:09:34,977 - WARNING - register user ConstantPad2d to paddle.nn, remove this when fixed!\n",
-      "2021-03-17 11:09:34,977 - WARNING - override ctc_loss of paddle.nn.functional if exists, remove this when fixed!\n",
+      "2021-03-24 03:20:39,129 - WARNING - register user softmax to paddle, remove this when fixed!\n",
+      "2021-03-24 03:20:39,130 - WARNING - register user sigmoid to paddle, remove this when fixed!\n",
+      "2021-03-24 03:20:39,131 - WARNING - register user relu to paddle, remove this when fixed!\n",
+      "2021-03-24 03:20:39,132 - WARNING - override cat of paddle if exists or register, remove this when fixed!\n",
+      "2021-03-24 03:20:39,133 - WARNING - override eq of paddle.Tensor if exists or register, remove this when fixed!\n",
+      "2021-03-24 03:20:39,133 - WARNING - override contiguous of paddle.Tensor if exists or register, remove this when fixed!\n",
+      "2021-03-24 03:20:39,135 - WARNING - override size of paddle.Tensor (`to_static` do not process `size` property, maybe some `paddle` api dependent on it), remove this when fixed!\n",
+      "2021-03-24 03:20:39,135 - WARNING - register user view to paddle.Tensor, remove this when fixed!\n",
+      "2021-03-24 03:20:39,136 - WARNING - register user view_as to paddle.Tensor, remove this when fixed!\n",
+      "2021-03-24 03:20:39,137 - WARNING - register user masked_fill to paddle.Tensor, remove this when fixed!\n",
+      "2021-03-24 03:20:39,137 - WARNING - register user masked_fill_ to paddle.Tensor, remove this when fixed!\n",
+      "2021-03-24 03:20:39,138 - WARNING - register user fill_ to paddle.Tensor, remove this when fixed!\n",
+      "2021-03-24 03:20:39,138 - WARNING - register user repeat to paddle.Tensor, remove this when fixed!\n",
+      "2021-03-24 03:20:39,139 - WARNING - register user softmax to paddle.Tensor, remove this when fixed!\n",
+      "2021-03-24 03:20:39,140 - WARNING - register user sigmoid to paddle.Tensor, remove this when fixed!\n",
+      "2021-03-24 03:20:39,140 - WARNING - register user relu to paddle.Tensor, remove this when fixed!\n",
+      "2021-03-24 03:20:39,141 - WARNING - register user glu to paddle.nn.functional, remove this when fixed!\n",
+      "2021-03-24 03:20:39,141 - WARNING - override ctc_loss of paddle.nn.functional if exists, remove this when fixed!\n",
+      "2021-03-24 03:20:39,142 - WARNING - register user GLU to paddle.nn, remove this when fixed!\n",
+      "2021-03-24 03:20:39,142 - WARNING - register user ConstantPad2d to paddle.nn, remove this when fixed!\n",
       "/home/ssd5/zhanghui/DeepSpeech2.x/tools/venv-dev/lib/python3.7/site-packages/scipy/fftpack/__init__.py:103: DeprecationWarning: The module numpy.dual is deprecated.  Instead of using dual, use the functions directly from numpy or scipy.\n",
       "  from numpy.dual import register_func\n",
       "/home/ssd5/zhanghui/DeepSpeech2.x/tools/venv-dev/lib/python3.7/site-packages/scipy/special/orthogonal.py:81: DeprecationWarning: `np.int` is a deprecated alias for the builtin `int`. To silence this warning, use `int` by itself. Doing this will not modify any behavior and is safe. When replacing `np.int`, you may wish to use e.g. `np.int64` or `np.int32` to specify the precision. If you wish to review your current use, check the release note link for additional information.\n",
@@ -93,7 +104,12 @@
      "output_type": "stream",
      "text": [
       "0.0.0\n",
-      "<module 'paddle.version' from '/home/ssd5/zhanghui/DeepSpeech2.x/tools/venv-dev/lib/python3.7/site-packages/paddlepaddle_gpu-0.0.0-py3.7-linux-x86_64.egg/paddle/version.py'>\n"
+      "607856a949ed7356237ed8148947f7fd2b0f4631\n",
+      "ON\n",
+      "ON\n",
+      "commit: 607856a949ed7356237ed8148947f7fd2b0f4631\n",
+      "None\n",
+      "0\n"
      ]
     },
     {
@@ -103,11 +119,43 @@
       "/home/ssd5/zhanghui/DeepSpeech2.x/tools/venv-dev/lib/python3.7/site-packages/ipykernel/ipkernel.py:283: DeprecationWarning: `should_run_async` will not call `transform_cell` automatically in the future. Please pass the result to `transformed_cell` argument and any exception that happen during thetransform in `preprocessing_exc_tuple` in IPython 7.17 and above.\n",
       "  and should_run_async(code)\n"
      ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "['__builtins__',\n",
+       " '__cached__',\n",
+       " '__doc__',\n",
+       " '__file__',\n",
+       " '__loader__',\n",
+       " '__name__',\n",
+       " '__package__',\n",
+       " '__spec__',\n",
+       " 'commit',\n",
+       " 'full_version',\n",
+       " 'istaged',\n",
+       " 'major',\n",
+       " 'minor',\n",
+       " 'mkl',\n",
+       " 'patch',\n",
+       " 'rc',\n",
+       " 'show',\n",
+       " 'with_mkl']"
+      ]
+     },
+     "execution_count": 3,
+     "metadata": {},
+     "output_type": "execute_result"
     }
    ],
    "source": [
     "print(paddle.__version__)\n",
-    "print(paddle.version)"
+    "print(paddle.version.commit)\n",
+    "print(paddle.version.with_mkl)\n",
+    "print(paddle.version.mkl())\n",
+    "print(paddle.version.show())\n",
+    "print(paddle.version.patch)\n",
+    "dir(paddle.version)"
    ]
   },
   {
@@ -166,7 +214,7 @@
       "  n_epoch: 30\n",
       "  weight_decay: 1e-06\n",
       "-----------  Configuration Arguments -----------\n",
-      "checkpoint_path: examples/aishell/ckpt/checkpoints/step-1876\n",
+      "checkpoint_path: examples/aishell/ckpt-loss2e-3-0.83-5/checkpoints/step-11725\n",
       "config: examples/aishell/conf/deepspeech2.yaml\n",
       "device: gpu\n",
       "dump_config: None\n",
@@ -196,7 +244,10 @@
     "add_arg('speech_save_dir',  str,\n",
     "        'demo_cache',\n",
     "        \"Directory to save demo audios.\")\n",
-    "add_arg('warmup_manifest',  str, \"examples/aishell/data/manifest.test\", \"Filepath of manifest to warm up.\")\n",
+    "add_arg('warmup_manifest',  \n",
+    "        str, \n",
+    "        \"examples/aishell/data/manifest.test\", \n",
+    "        \"Filepath of manifest to warm up.\")\n",
     "add_arg(\n",
     "    \"--model_file\",\n",
     "    type=str,\n",
@@ -218,7 +269,11 @@
     "    \"Model dir, If you load a non-combined model, specify the directory of the model.\"\n",
     ")\n",
     "add_arg(\"--use_gpu\",type=bool,default=True, help=\"Whether use gpu.\")\n",
-    "args = parser.parse_args(\"--checkpoint_path examples/aishell/ckpt/checkpoints/step-1876 --config examples/aishell/conf/deepspeech2.yaml --opts data.test_manifest examples/aishell/data/manifest.test data.mean_std_filepath examples/aishell/data/mean_std.npz  data.vocab_filepath examples/aishell/data/vocab.txt\".split())\n",
+    "\n",
+    "\n",
+    "args = parser.parse_args(\n",
+    "    \"--checkpoint_path examples/aishell/ckpt-loss2e-3-0.83-5/checkpoints/step-11725 --config examples/aishell/conf/deepspeech2.yaml --opts data.test_manifest examples/aishell/data/manifest.test data.mean_std_filepath examples/aishell/data/mean_std.npz  data.vocab_filepath examples/aishell/data/vocab.txt\".split()\n",
+    ")\n",
     "\n",
     "\n",
     "config = get_cfg_defaults()\n",
@@ -230,16 +285,13 @@
     "print(config)\n",
     "\n",
     "args.warmup_manifest = config.data.test_manifest\n",
-    "print_arguments(args)\n",
     "\n",
-    "if args.dump_config:\n",
-    "    with open(args.dump_config, 'w') as f:\n",
-    "        print(config, file=f)"
+    "print_arguments(args)"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 17,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -264,18 +316,14 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 18,
    "metadata": {},
    "outputs": [
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "/home/ssd5/zhanghui/DeepSpeech2.x/tools/venv-dev/lib/python3.7/site-packages/paddlepaddle_gpu-0.0.0-py3.7-linux-x86_64.egg/paddle/fluid/dygraph/layers.py:1303: UserWarning: Skip loading for decoder.ctc_lo.weight. decoder.ctc_lo.weight is not found in the provided dict.\n",
-      "  warnings.warn((\"Skip loading for {}. \".format(key) + str(err)))\n",
-      "/home/ssd5/zhanghui/DeepSpeech2.x/tools/venv-dev/lib/python3.7/site-packages/paddlepaddle_gpu-0.0.0-py3.7-linux-x86_64.egg/paddle/fluid/dygraph/layers.py:1303: UserWarning: Skip loading for decoder.ctc_lo.bias. decoder.ctc_lo.bias is not found in the provided dict.\n",
-      "  warnings.warn((\"Skip loading for {}. \".format(key) + str(err)))\n",
-      "2021-03-17 11:10:00,017 - INFO - [checkpoint] Rank 0: loaded model from examples/aishell/ckpt/checkpoints/step-1876.pdparams\n"
+      "2021-03-24 03:26:27,422 - INFO - [checkpoint] Rank 0: loaded model from examples/aishell/ckpt-loss2e-3-0.83-5/checkpoints/step-11725.pdparams\n"
      ]
     },
     {
@@ -361,7 +409,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 11,
+   "execution_count": 19,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -379,7 +427,6 @@
     "        # The thread num should not be greater than the number of cores in the CPU.\n",
     "        config.set_cpu_math_library_num_threads(4)\n",
     "        #config.enable_mkldnn()\n",
-    "        \n",
     "    config.switch_ir_optim(False)\n",
     "\n",
     "    predictor = create_predictor(config)\n",
@@ -462,47 +509,61 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 12,
+   "execution_count": 20,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Warm-up Test Case %d: %s 0 /home/ssd5/zhanghui/DeepSpeech2.x/examples/aishell/../..//examples/dataset/aishell/data_aishell/wav/test/S0764/BAC009S0764W0121.wav\n",
-      "/home/ssd5/zhanghui/DeepSpeech2.x/examples/aishell/../..//examples/dataset/aishell/data_aishell/wav/test/S0764/BAC009S0764W0121.wav\n",
+      "Warm-up Test Case %d: %s 0 /home/ssd5/zhanghui/DeepSpeech2.x/examples/aishell/../dataset/aishell/data_aishell/wav/test/S0764/BAC009S0764W0124.wav\n",
+      "/home/ssd5/zhanghui/DeepSpeech2.x/examples/aishell/../dataset/aishell/data_aishell/wav/test/S0764/BAC009S0764W0124.wav\n",
       "input: 0 audio\n",
       "input: 1 audio_len\n",
       "output: 0 tmp_75\n",
-      "jit: [[[1.40282078e-04 3.31296207e-04 5.57157793e-04 ... 1.07916087e-04\n",
-      "   8.73636964e-05 1.96113906e-04]\n",
-      "  [1.38032061e-04 2.70526099e-04 4.53807996e-04 ... 1.02293277e-04\n",
-      "   8.40202629e-05 1.90612729e-04]\n",
-      "  [1.38912103e-04 2.45687814e-04 3.99624696e-04 ... 9.70420660e-05\n",
-      "   7.88255784e-05 1.80084753e-04]\n",
+      "jit: [[[8.91791242e-12 4.45650548e-12 3.67574104e-09 ... 8.91772593e-12\n",
+      "   8.91578738e-12 4.64319072e-08]\n",
+      "  [1.55952011e-15 2.62797088e-14 4.50428670e-12 ... 1.55946061e-15\n",
+      "   1.55893121e-15 9.99992609e-01]\n",
+      "  [1.24638590e-17 7.61802427e-16 2.93266930e-14 ... 1.24633842e-17\n",
+      "   1.24587735e-17 1.00000000e+00]\n",
       "  ...\n",
-      "  [3.28999187e-04 2.59723864e-04 3.03535169e-04 ... 2.82066030e-04\n",
-      "   1.11002744e-04 1.27009131e-04]\n",
-      "  [2.91427423e-04 2.20203598e-04 2.85082555e-04 ... 3.27318383e-04\n",
-      "   1.09202861e-04 1.17112293e-04]\n",
-      "  [3.63971514e-04 1.47859042e-04 2.24457763e-04 ... 3.63016297e-04\n",
-      "   1.34765272e-04 1.61947115e-04]]] <class 'numpy.ndarray'>\n",
-      "[1, 161, 419]\n",
-      "[1]\n",
-      "paddle: [[[3.4913886e-04 2.5836096e-04 4.2449642e-04 ... 7.2210147e-05\n",
-      "   7.1211573e-05 2.0057644e-04]\n",
-      "  [3.8406707e-04 2.4088801e-04 5.0910388e-04 ... 6.1701416e-05\n",
-      "   6.7852285e-05 2.3967208e-04]\n",
-      "  [4.1069370e-04 2.5478008e-04 6.7985675e-04 ... 5.8369777e-05\n",
-      "   6.2065104e-05 2.5938542e-04]\n",
+      "  [4.37491543e-15 2.43678580e-12 1.98772032e-12 ... 4.37483242e-15\n",
+      "   4.37358093e-15 1.00000000e+00]\n",
+      "  [3.89338410e-13 1.66756747e-11 1.42901749e-11 ... 3.89333233e-13\n",
+      "   3.89255983e-13 1.00000000e+00]\n",
+      "  [1.00350561e-10 2.56295180e-10 2.91178692e-10 ... 1.00348452e-10\n",
+      "   1.00334671e-10 9.99998808e-01]]] <class 'numpy.ndarray'>\n",
+      "[1, 161, 522]\n",
+      "[1]\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/home/ssd5/zhanghui/DeepSpeech2.x/tools/venv-dev/lib/python3.7/site-packages/paddlepaddle_gpu-0.0.0-py3.7-linux-x86_64.egg/paddle/fluid/layers/utils.py:77: DeprecationWarning: Using or importing the ABCs from 'collections' instead of from 'collections.abc' is deprecated, and in 3.8 it will stop working\n",
+      "  return (isinstance(seq, collections.Sequence) and\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "paddle: [[[8.91791242e-12 4.45650548e-12 3.67574104e-09 ... 8.91772593e-12\n",
+      "   8.91578738e-12 4.64319072e-08]\n",
+      "  [1.55952011e-15 2.62797088e-14 4.50428670e-12 ... 1.55946061e-15\n",
+      "   1.55893121e-15 9.99992609e-01]\n",
+      "  [1.24638590e-17 7.61802427e-16 2.93266930e-14 ... 1.24633842e-17\n",
+      "   1.24587735e-17 1.00000000e+00]\n",
       "  ...\n",
-      "  [6.6656910e-04 3.1835871e-04 7.5929717e-04 ... 1.1990797e-04\n",
-      "   3.7087579e-05 3.4520373e-04]\n",
-      "  [4.7881933e-04 2.7979453e-04 6.7949941e-04 ... 1.2511105e-04\n",
-      "   4.5631223e-05 3.7984925e-04]\n",
-      "  [2.8661705e-04 2.9201157e-04 4.5970027e-04 ... 1.4581002e-04\n",
-      "   7.8281126e-05 3.8263199e-04]]]\n",
-      "False\n"
+      "  [4.37491543e-15 2.43678580e-12 1.98772032e-12 ... 4.37483242e-15\n",
+      "   4.37358093e-15 1.00000000e+00]\n",
+      "  [3.89338410e-13 1.66756747e-11 1.42901749e-11 ... 3.89333233e-13\n",
+      "   3.89255983e-13 1.00000000e+00]\n",
+      "  [1.00350561e-10 2.56295180e-10 2.91178692e-10 ... 1.00348452e-10\n",
+      "   1.00334671e-10 9.99998808e-01]]]\n",
+      "True\n"
      ]
     }
    ],
@@ -521,30 +582,30 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 13,
+   "execution_count": 21,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "(1, 161, 419) (1,)\n",
+      "(1, 161, 522) (1,)\n",
       "input: 0 audio\n",
       "input: 1 audio_len\n",
       "output: 0 tmp_75\n",
-      "jit: [[[1.40282078e-04 3.31296207e-04 5.57157793e-04 ... 1.07916087e-04\n",
-      "   8.73636964e-05 1.96113906e-04]\n",
-      "  [1.38032061e-04 2.70526099e-04 4.53807996e-04 ... 1.02293277e-04\n",
-      "   8.40202629e-05 1.90612729e-04]\n",
-      "  [1.38912103e-04 2.45687814e-04 3.99624696e-04 ... 9.70420660e-05\n",
-      "   7.88255784e-05 1.80084753e-04]\n",
+      "jit: [[[8.91791242e-12 4.45650548e-12 3.67574104e-09 ... 8.91772593e-12\n",
+      "   8.91578738e-12 4.64319072e-08]\n",
+      "  [1.55952011e-15 2.62797088e-14 4.50428670e-12 ... 1.55946061e-15\n",
+      "   1.55893121e-15 9.99992609e-01]\n",
+      "  [1.24638590e-17 7.61802427e-16 2.93266930e-14 ... 1.24633842e-17\n",
+      "   1.24587735e-17 1.00000000e+00]\n",
       "  ...\n",
-      "  [3.28999187e-04 2.59723864e-04 3.03535169e-04 ... 2.82066030e-04\n",
-      "   1.11002744e-04 1.27009131e-04]\n",
-      "  [2.91427423e-04 2.20203598e-04 2.85082555e-04 ... 3.27318383e-04\n",
-      "   1.09202861e-04 1.17112293e-04]\n",
-      "  [3.63971514e-04 1.47859042e-04 2.24457763e-04 ... 3.63016297e-04\n",
-      "   1.34765272e-04 1.61947115e-04]]]\n"
+      "  [4.37491543e-15 2.43678580e-12 1.98772032e-12 ... 4.37483242e-15\n",
+      "   4.37358093e-15 1.00000000e+00]\n",
+      "  [3.89338410e-13 1.66756747e-11 1.42901749e-11 ... 3.89333233e-13\n",
+      "   3.89255983e-13 1.00000000e+00]\n",
+      "  [1.00350561e-10 2.56295180e-10 2.91178692e-10 ... 1.00348452e-10\n",
+      "   1.00334671e-10 9.99998808e-01]]]\n"
      ]
     }
    ],
@@ -564,26 +625,6 @@
     "probs = test(sample['audio_filepath'])"
    ]
   },
-  {
-   "cell_type": "code",
-   "execution_count": 10,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/home/ssd5/zhanghui/DeepSpeech2.x/tools/venv-dev/lib/python3.7/site-packages/paddlepaddle_gpu-0.0.0-py3.7-linux-x86_64.egg/paddle/tensor/creation.py:143: DeprecationWarning: `np.object` is a deprecated alias for the builtin `object`. To silence this warning, use `object` by itself. Doing this will not modify any behavior and is safe. \n",
-      "Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations\n",
-      "  if data.dtype == np.object:\n"
-     ]
-    }
-   ],
-   "source": [
-    "a = paddle.to_tensor([1,3,4])\n",
-    "a.numpy?"
-   ]
-  },
   {
    "cell_type": "code",
    "execution_count": null,
diff --git a/deepspeech/frontend/utility.py b/deepspeech/frontend/utility.py
index a5ab7b260..a9e0e5c51 100644
--- a/deepspeech/frontend/utility.py
+++ b/deepspeech/frontend/utility.py
@@ -83,11 +83,11 @@ def rms_to_dbfs(rms: float):
     return rms_to_db(rms) - 3.0103
 
 
-def max_dbfs(sample_data: np.ndarry):
+def max_dbfs(sample_data: np.ndarray):
     """Peak dBFS based on the maximum energy sample. 
 
     Args:
-        sample_data ([np.ndarry]): float array, [-1, 1].
+        sample_data ([np.ndarray]): float array, [-1, 1].
 
     Returns:
         float: dBFS 
@@ -100,7 +100,7 @@ def mean_dbfs(sample_data):
     """Peak dBFS based on the RMS energy. 
 
     Args:
-        sample_data ([np.ndarry]): float array, [-1, 1].
+        sample_data ([np.ndarray]): float array, [-1, 1].
 
     Returns:
         float: dBFS 
@@ -121,15 +121,15 @@ def gain_db_to_ratio(gain_db: float):
     return math.pow(10.0, gain_db / 20.0)
 
 
-def normalize_audio(sample_data: np.ndarry, dbfs: float=-3.0103):
+def normalize_audio(sample_data: np.ndarray, dbfs: float=-3.0103):
     """Nomalize audio to dBFS.
     
     Args:
-        sample_data (np.ndarry): input wave samples, [-1, 1].
+        sample_data (np.ndarray): input wave samples, [-1, 1].
         dbfs (float, optional): target dBFS. Defaults to -3.0103.
 
     Returns:
-        np.ndarry: normalized wave
+        np.ndarray: normalized wave
     """
     return np.maximum(
         np.minimum(sample_data * gain_db_to_ratio(dbfs - max_dbfs(sample_data)),