From a0c6c5e30a01dbeba95259b2ca9e992b5edd4ca1 Mon Sep 17 00:00:00 2001 From: Hui Zhang Date: Wed, 24 Mar 2021 03:43:21 +0000 Subject: [PATCH] fix typo, python infer fix rnn mem opt name error and batchnorm1d, will be available at 2.0.2 --- .notebook/jit_infer.ipynb | 223 +++++++++++++++++++-------------- deepspeech/frontend/utility.py | 12 +- 2 files changed, 138 insertions(+), 97 deletions(-) diff --git a/.notebook/jit_infer.ipynb b/.notebook/jit_infer.ipynb index 49e395b38..af89827f9 100644 --- a/.notebook/jit_infer.ipynb +++ b/.notebook/jit_infer.ipynb @@ -37,15 +37,26 @@ "name": "stderr", "output_type": "stream", "text": [ - "2021-03-17 11:09:34,972 - WARNING - override cat of paddle.Tensor if exists or register, remove this when fixed!\n", - "2021-03-17 11:09:34,973 - WARNING - override size of paddle.Tensor if exists or register (`to_static` do not process `size` property, maybe some `paddle` api dependent on it), remove this when fixed!\n", - "2021-03-17 11:09:34,974 - WARNING - register user masked_fill to paddle.Tensor, remove this when fixed!\n", - "2021-03-17 11:09:34,975 - WARNING - register user masked_fill_ to paddle.Tensor, remove this when fixed!\n", - "2021-03-17 11:09:34,975 - WARNING - register user repeat to paddle.Tensor, remove this when fixed!\n", - "2021-03-17 11:09:34,976 - WARNING - register user glu to paddle.nn.functional, remove this when fixed!\n", - "2021-03-17 11:09:34,976 - WARNING - register user GLU to paddle.nn, remove this when fixed!\n", - "2021-03-17 11:09:34,977 - WARNING - register user ConstantPad2d to paddle.nn, remove this when fixed!\n", - "2021-03-17 11:09:34,977 - WARNING - override ctc_loss of paddle.nn.functional if exists, remove this when fixed!\n", + "2021-03-24 03:20:39,129 - WARNING - register user softmax to paddle, remove this when fixed!\n", + "2021-03-24 03:20:39,130 - WARNING - register user sigmoid to paddle, remove this when fixed!\n", + "2021-03-24 03:20:39,131 - WARNING - register user relu to paddle, remove this when fixed!\n", + "2021-03-24 03:20:39,132 - WARNING - override cat of paddle if exists or register, remove this when fixed!\n", + "2021-03-24 03:20:39,133 - WARNING - override eq of paddle.Tensor if exists or register, remove this when fixed!\n", + "2021-03-24 03:20:39,133 - WARNING - override contiguous of paddle.Tensor if exists or register, remove this when fixed!\n", + "2021-03-24 03:20:39,135 - WARNING - override size of paddle.Tensor (`to_static` do not process `size` property, maybe some `paddle` api dependent on it), remove this when fixed!\n", + "2021-03-24 03:20:39,135 - WARNING - register user view to paddle.Tensor, remove this when fixed!\n", + "2021-03-24 03:20:39,136 - WARNING - register user view_as to paddle.Tensor, remove this when fixed!\n", + "2021-03-24 03:20:39,137 - WARNING - register user masked_fill to paddle.Tensor, remove this when fixed!\n", + "2021-03-24 03:20:39,137 - WARNING - register user masked_fill_ to paddle.Tensor, remove this when fixed!\n", + "2021-03-24 03:20:39,138 - WARNING - register user fill_ to paddle.Tensor, remove this when fixed!\n", + "2021-03-24 03:20:39,138 - WARNING - register user repeat to paddle.Tensor, remove this when fixed!\n", + "2021-03-24 03:20:39,139 - WARNING - register user softmax to paddle.Tensor, remove this when fixed!\n", + "2021-03-24 03:20:39,140 - WARNING - register user sigmoid to paddle.Tensor, remove this when fixed!\n", + "2021-03-24 03:20:39,140 - WARNING - register user relu to paddle.Tensor, remove this when fixed!\n", + "2021-03-24 03:20:39,141 - WARNING - register user glu to paddle.nn.functional, remove this when fixed!\n", + "2021-03-24 03:20:39,141 - WARNING - override ctc_loss of paddle.nn.functional if exists, remove this when fixed!\n", + "2021-03-24 03:20:39,142 - WARNING - register user GLU to paddle.nn, remove this when fixed!\n", + "2021-03-24 03:20:39,142 - WARNING - register user ConstantPad2d to paddle.nn, remove this when fixed!\n", "/home/ssd5/zhanghui/DeepSpeech2.x/tools/venv-dev/lib/python3.7/site-packages/scipy/fftpack/__init__.py:103: DeprecationWarning: The module numpy.dual is deprecated. Instead of using dual, use the functions directly from numpy or scipy.\n", " from numpy.dual import register_func\n", "/home/ssd5/zhanghui/DeepSpeech2.x/tools/venv-dev/lib/python3.7/site-packages/scipy/special/orthogonal.py:81: DeprecationWarning: `np.int` is a deprecated alias for the builtin `int`. To silence this warning, use `int` by itself. Doing this will not modify any behavior and is safe. When replacing `np.int`, you may wish to use e.g. `np.int64` or `np.int32` to specify the precision. If you wish to review your current use, check the release note link for additional information.\n", @@ -93,7 +104,12 @@ "output_type": "stream", "text": [ "0.0.0\n", - "\n" + "607856a949ed7356237ed8148947f7fd2b0f4631\n", + "ON\n", + "ON\n", + "commit: 607856a949ed7356237ed8148947f7fd2b0f4631\n", + "None\n", + "0\n" ] }, { @@ -103,11 +119,43 @@ "/home/ssd5/zhanghui/DeepSpeech2.x/tools/venv-dev/lib/python3.7/site-packages/ipykernel/ipkernel.py:283: DeprecationWarning: `should_run_async` will not call `transform_cell` automatically in the future. Please pass the result to `transformed_cell` argument and any exception that happen during thetransform in `preprocessing_exc_tuple` in IPython 7.17 and above.\n", " and should_run_async(code)\n" ] + }, + { + "data": { + "text/plain": [ + "['__builtins__',\n", + " '__cached__',\n", + " '__doc__',\n", + " '__file__',\n", + " '__loader__',\n", + " '__name__',\n", + " '__package__',\n", + " '__spec__',\n", + " 'commit',\n", + " 'full_version',\n", + " 'istaged',\n", + " 'major',\n", + " 'minor',\n", + " 'mkl',\n", + " 'patch',\n", + " 'rc',\n", + " 'show',\n", + " 'with_mkl']" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" } ], "source": [ "print(paddle.__version__)\n", - "print(paddle.version)" + "print(paddle.version.commit)\n", + "print(paddle.version.with_mkl)\n", + "print(paddle.version.mkl())\n", + "print(paddle.version.show())\n", + "print(paddle.version.patch)\n", + "dir(paddle.version)" ] }, { @@ -166,7 +214,7 @@ " n_epoch: 30\n", " weight_decay: 1e-06\n", "----------- Configuration Arguments -----------\n", - "checkpoint_path: examples/aishell/ckpt/checkpoints/step-1876\n", + "checkpoint_path: examples/aishell/ckpt-loss2e-3-0.83-5/checkpoints/step-11725\n", "config: examples/aishell/conf/deepspeech2.yaml\n", "device: gpu\n", "dump_config: None\n", @@ -196,7 +244,10 @@ "add_arg('speech_save_dir', str,\n", " 'demo_cache',\n", " \"Directory to save demo audios.\")\n", - "add_arg('warmup_manifest', str, \"examples/aishell/data/manifest.test\", \"Filepath of manifest to warm up.\")\n", + "add_arg('warmup_manifest', \n", + " str, \n", + " \"examples/aishell/data/manifest.test\", \n", + " \"Filepath of manifest to warm up.\")\n", "add_arg(\n", " \"--model_file\",\n", " type=str,\n", @@ -218,7 +269,11 @@ " \"Model dir, If you load a non-combined model, specify the directory of the model.\"\n", ")\n", "add_arg(\"--use_gpu\",type=bool,default=True, help=\"Whether use gpu.\")\n", - "args = parser.parse_args(\"--checkpoint_path examples/aishell/ckpt/checkpoints/step-1876 --config examples/aishell/conf/deepspeech2.yaml --opts data.test_manifest examples/aishell/data/manifest.test data.mean_std_filepath examples/aishell/data/mean_std.npz data.vocab_filepath examples/aishell/data/vocab.txt\".split())\n", + "\n", + "\n", + "args = parser.parse_args(\n", + " \"--checkpoint_path examples/aishell/ckpt-loss2e-3-0.83-5/checkpoints/step-11725 --config examples/aishell/conf/deepspeech2.yaml --opts data.test_manifest examples/aishell/data/manifest.test data.mean_std_filepath examples/aishell/data/mean_std.npz data.vocab_filepath examples/aishell/data/vocab.txt\".split()\n", + ")\n", "\n", "\n", "config = get_cfg_defaults()\n", @@ -230,16 +285,13 @@ "print(config)\n", "\n", "args.warmup_manifest = config.data.test_manifest\n", - "print_arguments(args)\n", "\n", - "if args.dump_config:\n", - " with open(args.dump_config, 'w') as f:\n", - " print(config, file=f)" + "print_arguments(args)" ] }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 17, "metadata": {}, "outputs": [], "source": [ @@ -264,18 +316,14 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 18, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ - "/home/ssd5/zhanghui/DeepSpeech2.x/tools/venv-dev/lib/python3.7/site-packages/paddlepaddle_gpu-0.0.0-py3.7-linux-x86_64.egg/paddle/fluid/dygraph/layers.py:1303: UserWarning: Skip loading for decoder.ctc_lo.weight. decoder.ctc_lo.weight is not found in the provided dict.\n", - " warnings.warn((\"Skip loading for {}. \".format(key) + str(err)))\n", - "/home/ssd5/zhanghui/DeepSpeech2.x/tools/venv-dev/lib/python3.7/site-packages/paddlepaddle_gpu-0.0.0-py3.7-linux-x86_64.egg/paddle/fluid/dygraph/layers.py:1303: UserWarning: Skip loading for decoder.ctc_lo.bias. decoder.ctc_lo.bias is not found in the provided dict.\n", - " warnings.warn((\"Skip loading for {}. \".format(key) + str(err)))\n", - "2021-03-17 11:10:00,017 - INFO - [checkpoint] Rank 0: loaded model from examples/aishell/ckpt/checkpoints/step-1876.pdparams\n" + "2021-03-24 03:26:27,422 - INFO - [checkpoint] Rank 0: loaded model from examples/aishell/ckpt-loss2e-3-0.83-5/checkpoints/step-11725.pdparams\n" ] }, { @@ -361,7 +409,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 19, "metadata": {}, "outputs": [], "source": [ @@ -379,7 +427,6 @@ " # The thread num should not be greater than the number of cores in the CPU.\n", " config.set_cpu_math_library_num_threads(4)\n", " #config.enable_mkldnn()\n", - " \n", " config.switch_ir_optim(False)\n", "\n", " predictor = create_predictor(config)\n", @@ -462,47 +509,61 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 20, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Warm-up Test Case %d: %s 0 /home/ssd5/zhanghui/DeepSpeech2.x/examples/aishell/../..//examples/dataset/aishell/data_aishell/wav/test/S0764/BAC009S0764W0121.wav\n", - "/home/ssd5/zhanghui/DeepSpeech2.x/examples/aishell/../..//examples/dataset/aishell/data_aishell/wav/test/S0764/BAC009S0764W0121.wav\n", + "Warm-up Test Case %d: %s 0 /home/ssd5/zhanghui/DeepSpeech2.x/examples/aishell/../dataset/aishell/data_aishell/wav/test/S0764/BAC009S0764W0124.wav\n", + "/home/ssd5/zhanghui/DeepSpeech2.x/examples/aishell/../dataset/aishell/data_aishell/wav/test/S0764/BAC009S0764W0124.wav\n", "input: 0 audio\n", "input: 1 audio_len\n", "output: 0 tmp_75\n", - "jit: [[[1.40282078e-04 3.31296207e-04 5.57157793e-04 ... 1.07916087e-04\n", - " 8.73636964e-05 1.96113906e-04]\n", - " [1.38032061e-04 2.70526099e-04 4.53807996e-04 ... 1.02293277e-04\n", - " 8.40202629e-05 1.90612729e-04]\n", - " [1.38912103e-04 2.45687814e-04 3.99624696e-04 ... 9.70420660e-05\n", - " 7.88255784e-05 1.80084753e-04]\n", + "jit: [[[8.91791242e-12 4.45650548e-12 3.67574104e-09 ... 8.91772593e-12\n", + " 8.91578738e-12 4.64319072e-08]\n", + " [1.55952011e-15 2.62797088e-14 4.50428670e-12 ... 1.55946061e-15\n", + " 1.55893121e-15 9.99992609e-01]\n", + " [1.24638590e-17 7.61802427e-16 2.93266930e-14 ... 1.24633842e-17\n", + " 1.24587735e-17 1.00000000e+00]\n", " ...\n", - " [3.28999187e-04 2.59723864e-04 3.03535169e-04 ... 2.82066030e-04\n", - " 1.11002744e-04 1.27009131e-04]\n", - " [2.91427423e-04 2.20203598e-04 2.85082555e-04 ... 3.27318383e-04\n", - " 1.09202861e-04 1.17112293e-04]\n", - " [3.63971514e-04 1.47859042e-04 2.24457763e-04 ... 3.63016297e-04\n", - " 1.34765272e-04 1.61947115e-04]]] \n", - "[1, 161, 419]\n", - "[1]\n", - "paddle: [[[3.4913886e-04 2.5836096e-04 4.2449642e-04 ... 7.2210147e-05\n", - " 7.1211573e-05 2.0057644e-04]\n", - " [3.8406707e-04 2.4088801e-04 5.0910388e-04 ... 6.1701416e-05\n", - " 6.7852285e-05 2.3967208e-04]\n", - " [4.1069370e-04 2.5478008e-04 6.7985675e-04 ... 5.8369777e-05\n", - " 6.2065104e-05 2.5938542e-04]\n", + " [4.37491543e-15 2.43678580e-12 1.98772032e-12 ... 4.37483242e-15\n", + " 4.37358093e-15 1.00000000e+00]\n", + " [3.89338410e-13 1.66756747e-11 1.42901749e-11 ... 3.89333233e-13\n", + " 3.89255983e-13 1.00000000e+00]\n", + " [1.00350561e-10 2.56295180e-10 2.91178692e-10 ... 1.00348452e-10\n", + " 1.00334671e-10 9.99998808e-01]]] \n", + "[1, 161, 522]\n", + "[1]\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/ssd5/zhanghui/DeepSpeech2.x/tools/venv-dev/lib/python3.7/site-packages/paddlepaddle_gpu-0.0.0-py3.7-linux-x86_64.egg/paddle/fluid/layers/utils.py:77: DeprecationWarning: Using or importing the ABCs from 'collections' instead of from 'collections.abc' is deprecated, and in 3.8 it will stop working\n", + " return (isinstance(seq, collections.Sequence) and\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "paddle: [[[8.91791242e-12 4.45650548e-12 3.67574104e-09 ... 8.91772593e-12\n", + " 8.91578738e-12 4.64319072e-08]\n", + " [1.55952011e-15 2.62797088e-14 4.50428670e-12 ... 1.55946061e-15\n", + " 1.55893121e-15 9.99992609e-01]\n", + " [1.24638590e-17 7.61802427e-16 2.93266930e-14 ... 1.24633842e-17\n", + " 1.24587735e-17 1.00000000e+00]\n", " ...\n", - " [6.6656910e-04 3.1835871e-04 7.5929717e-04 ... 1.1990797e-04\n", - " 3.7087579e-05 3.4520373e-04]\n", - " [4.7881933e-04 2.7979453e-04 6.7949941e-04 ... 1.2511105e-04\n", - " 4.5631223e-05 3.7984925e-04]\n", - " [2.8661705e-04 2.9201157e-04 4.5970027e-04 ... 1.4581002e-04\n", - " 7.8281126e-05 3.8263199e-04]]]\n", - "False\n" + " [4.37491543e-15 2.43678580e-12 1.98772032e-12 ... 4.37483242e-15\n", + " 4.37358093e-15 1.00000000e+00]\n", + " [3.89338410e-13 1.66756747e-11 1.42901749e-11 ... 3.89333233e-13\n", + " 3.89255983e-13 1.00000000e+00]\n", + " [1.00350561e-10 2.56295180e-10 2.91178692e-10 ... 1.00348452e-10\n", + " 1.00334671e-10 9.99998808e-01]]]\n", + "True\n" ] } ], @@ -521,30 +582,30 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 21, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "(1, 161, 419) (1,)\n", + "(1, 161, 522) (1,)\n", "input: 0 audio\n", "input: 1 audio_len\n", "output: 0 tmp_75\n", - "jit: [[[1.40282078e-04 3.31296207e-04 5.57157793e-04 ... 1.07916087e-04\n", - " 8.73636964e-05 1.96113906e-04]\n", - " [1.38032061e-04 2.70526099e-04 4.53807996e-04 ... 1.02293277e-04\n", - " 8.40202629e-05 1.90612729e-04]\n", - " [1.38912103e-04 2.45687814e-04 3.99624696e-04 ... 9.70420660e-05\n", - " 7.88255784e-05 1.80084753e-04]\n", + "jit: [[[8.91791242e-12 4.45650548e-12 3.67574104e-09 ... 8.91772593e-12\n", + " 8.91578738e-12 4.64319072e-08]\n", + " [1.55952011e-15 2.62797088e-14 4.50428670e-12 ... 1.55946061e-15\n", + " 1.55893121e-15 9.99992609e-01]\n", + " [1.24638590e-17 7.61802427e-16 2.93266930e-14 ... 1.24633842e-17\n", + " 1.24587735e-17 1.00000000e+00]\n", " ...\n", - " [3.28999187e-04 2.59723864e-04 3.03535169e-04 ... 2.82066030e-04\n", - " 1.11002744e-04 1.27009131e-04]\n", - " [2.91427423e-04 2.20203598e-04 2.85082555e-04 ... 3.27318383e-04\n", - " 1.09202861e-04 1.17112293e-04]\n", - " [3.63971514e-04 1.47859042e-04 2.24457763e-04 ... 3.63016297e-04\n", - " 1.34765272e-04 1.61947115e-04]]]\n" + " [4.37491543e-15 2.43678580e-12 1.98772032e-12 ... 4.37483242e-15\n", + " 4.37358093e-15 1.00000000e+00]\n", + " [3.89338410e-13 1.66756747e-11 1.42901749e-11 ... 3.89333233e-13\n", + " 3.89255983e-13 1.00000000e+00]\n", + " [1.00350561e-10 2.56295180e-10 2.91178692e-10 ... 1.00348452e-10\n", + " 1.00334671e-10 9.99998808e-01]]]\n" ] } ], @@ -564,26 +625,6 @@ "probs = test(sample['audio_filepath'])" ] }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/home/ssd5/zhanghui/DeepSpeech2.x/tools/venv-dev/lib/python3.7/site-packages/paddlepaddle_gpu-0.0.0-py3.7-linux-x86_64.egg/paddle/tensor/creation.py:143: DeprecationWarning: `np.object` is a deprecated alias for the builtin `object`. To silence this warning, use `object` by itself. Doing this will not modify any behavior and is safe. \n", - "Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations\n", - " if data.dtype == np.object:\n" - ] - } - ], - "source": [ - "a = paddle.to_tensor([1,3,4])\n", - "a.numpy?" - ] - }, { "cell_type": "code", "execution_count": null, diff --git a/deepspeech/frontend/utility.py b/deepspeech/frontend/utility.py index a5ab7b260..a9e0e5c51 100644 --- a/deepspeech/frontend/utility.py +++ b/deepspeech/frontend/utility.py @@ -83,11 +83,11 @@ def rms_to_dbfs(rms: float): return rms_to_db(rms) - 3.0103 -def max_dbfs(sample_data: np.ndarry): +def max_dbfs(sample_data: np.ndarray): """Peak dBFS based on the maximum energy sample. Args: - sample_data ([np.ndarry]): float array, [-1, 1]. + sample_data ([np.ndarray]): float array, [-1, 1]. Returns: float: dBFS @@ -100,7 +100,7 @@ def mean_dbfs(sample_data): """Peak dBFS based on the RMS energy. Args: - sample_data ([np.ndarry]): float array, [-1, 1]. + sample_data ([np.ndarray]): float array, [-1, 1]. Returns: float: dBFS @@ -121,15 +121,15 @@ def gain_db_to_ratio(gain_db: float): return math.pow(10.0, gain_db / 20.0) -def normalize_audio(sample_data: np.ndarry, dbfs: float=-3.0103): +def normalize_audio(sample_data: np.ndarray, dbfs: float=-3.0103): """Nomalize audio to dBFS. Args: - sample_data (np.ndarry): input wave samples, [-1, 1]. + sample_data (np.ndarray): input wave samples, [-1, 1]. dbfs (float, optional): target dBFS. Defaults to -3.0103. Returns: - np.ndarry: normalized wave + np.ndarray: normalized wave """ return np.maximum( np.minimum(sample_data * gain_db_to_ratio(dbfs - max_dbfs(sample_data)),