From 28658cc1690b6eff378ebc390a3549bb3aea175f Mon Sep 17 00:00:00 2001 From: Hui Zhang Date: Fri, 16 Apr 2021 08:34:25 +0000 Subject: [PATCH] fix cmvn and print prarams --- .notebook/u2_model.ipynb | 1555 +++++++++++++++++++++++++++++ deepspeech/frontend/normalizer.py | 16 +- deepspeech/frontend/utility.py | 6 +- deepspeech/modules/mask.py | 2 +- deepspeech/utils/layer_tools.py | 51 +- 5 files changed, 1593 insertions(+), 37 deletions(-) create mode 100644 .notebook/u2_model.ipynb diff --git a/.notebook/u2_model.ipynb b/.notebook/u2_model.ipynb new file mode 100644 index 000000000..9658af0ef --- /dev/null +++ b/.notebook/u2_model.ipynb @@ -0,0 +1,1555 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "comic-scotland", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "/workspace/DeepSpeech-2.x\n" + ] + }, + { + "data": { + "text/plain": [ + "'/workspace/DeepSpeech-2.x'" + ] + }, + "execution_count": 1, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "%cd ..\n", + "%pwd" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "trying-palestinian", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/workspace/DeepSpeech-2.x/tools/venv/lib/python3.7/site-packages/paddle/fluid/layers/utils.py:26: DeprecationWarning: `np.int` is a deprecated alias for the builtin `int`. To silence this warning, use `int` by itself. Doing this will not modify any behavior and is safe. When replacing `np.int`, you may wish to use e.g. `np.int64` or `np.int32` to specify the precision. If you wish to review your current use, check the release note link for additional information.\n", + "Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations\n", + " def convert_to_list(value, n, name, dtype=np.int):\n", + "[WARNING 2021/04/16 08:20:33 __init__.py:93] register user softmax to paddle, remove this when fixed!\n", + "[WARNING 2021/04/16 08:20:33 __init__.py:97] register user log_softmax to paddle, remove this when fixed!\n", + "[WARNING 2021/04/16 08:20:33 __init__.py:101] register user sigmoid to paddle, remove this when fixed!\n", + "[WARNING 2021/04/16 08:20:33 __init__.py:105] register user log_sigmoid to paddle, remove this when fixed!\n", + "[WARNING 2021/04/16 08:20:33 __init__.py:109] register user relu to paddle, remove this when fixed!\n", + "[WARNING 2021/04/16 08:20:33 __init__.py:119] override cat of paddle if exists or register, remove this when fixed!\n", + "[WARNING 2021/04/16 08:20:33 __init__.py:133] override item of paddle.Tensor if exists or register, remove this when fixed!\n", + "[WARNING 2021/04/16 08:20:33 __init__.py:144] override long of paddle.Tensor if exists or register, remove this when fixed!\n", + "[WARNING 2021/04/16 08:20:33 __init__.py:164] override new_full of paddle.Tensor if exists or register, remove this when fixed!\n", + "[WARNING 2021/04/16 08:20:33 __init__.py:179] override eq of paddle.Tensor if exists or register, remove this when fixed!\n", + "[WARNING 2021/04/16 08:20:33 __init__.py:185] override eq of paddle if exists or register, remove this when fixed!\n", + "[WARNING 2021/04/16 08:20:33 __init__.py:195] override contiguous of paddle.Tensor if exists or register, remove this when fixed!\n", + "[WARNING 2021/04/16 08:20:33 __init__.py:212] override size of paddle.Tensor (`to_static` do not process `size` property, maybe some `paddle` api dependent on it), remove this when fixed!\n", + "[WARNING 2021/04/16 08:20:33 __init__.py:223] register user view to paddle.Tensor, remove this when fixed!\n", + "[WARNING 2021/04/16 08:20:33 __init__.py:233] register user view_as to paddle.Tensor, remove this when fixed!\n", + "[WARNING 2021/04/16 08:20:33 __init__.py:259] register user masked_fill to paddle.Tensor, remove this when fixed!\n", + "[WARNING 2021/04/16 08:20:33 __init__.py:277] register user masked_fill_ to paddle.Tensor, remove this when fixed!\n", + "[WARNING 2021/04/16 08:20:33 __init__.py:288] register user fill_ to paddle.Tensor, remove this when fixed!\n", + "[WARNING 2021/04/16 08:20:33 __init__.py:298] register user repeat to paddle.Tensor, remove this when fixed!\n", + "[WARNING 2021/04/16 08:20:33 __init__.py:303] register user softmax to paddle.Tensor, remove this when fixed!\n", + "[WARNING 2021/04/16 08:20:33 __init__.py:308] register user sigmoid to paddle.Tensor, remove this when fixed!\n", + "[WARNING 2021/04/16 08:20:33 __init__.py:312] register user relu to paddle.Tensor, remove this when fixed!\n", + "[WARNING 2021/04/16 08:20:33 __init__.py:322] register user type_as to paddle.Tensor, remove this when fixed!\n", + "[WARNING 2021/04/16 08:20:33 __init__.py:337] register user to to paddle.Tensor, remove this when fixed!\n", + "[WARNING 2021/04/16 08:20:33 __init__.py:346] register user float to paddle.Tensor, remove this when fixed!\n", + "[WARNING 2021/04/16 08:20:33 __init__.py:356] register user tolist to paddle.Tensor, remove this when fixed!\n", + "[WARNING 2021/04/16 08:20:33 __init__.py:371] register user glu to paddle.nn.functional, remove this when fixed!\n", + "[WARNING 2021/04/16 08:20:33 __init__.py:422] override ctc_loss of paddle.nn.functional if exists, remove this when fixed!\n", + "[WARNING 2021/04/16 08:20:33 __init__.py:428] register user Module to paddle.nn, remove this when fixed!\n", + "[WARNING 2021/04/16 08:20:33 __init__.py:434] register user ModuleList to paddle.nn, remove this when fixed!\n", + "[WARNING 2021/04/16 08:20:33 __init__.py:450] register user GLU to paddle.nn, remove this when fixed!\n", + "[WARNING 2021/04/16 08:20:33 __init__.py:483] register user ConstantPad2d to paddle.nn, remove this when fixed!\n", + "[WARNING 2021/04/16 08:20:33 __init__.py:489] register user export to paddle.jit, remove this when fixed!\n" + ] + } + ], + "source": [ + "import numpy as np\n", + "import paddle\n", + "from yacs.config import CfgNode as CN\n", + "\n", + "from deepspeech.models.u2 import U2Model\n", + "from deepspeech.utils.layer_tools import print_params\n", + "from deepspeech.utils.layer_tools import summary" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "committed-glance", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/workspace/DeepSpeech-2.x/tools/venv/lib/python3.7/site-packages/ipykernel/ipkernel.py:283: DeprecationWarning: `should_run_async` will not call `transform_cell` automatically in the future. Please pass the result to `transformed_cell` argument and any exception that happen during thetransform in `preprocessing_exc_tuple` in IPython 7.17 and above.\n", + " and should_run_async(code)\n", + "[INFO 2021/04/16 08:20:34 u2.py:834] U2 Encoder type: conformer\n", + "[INFO 2021/04/16 08:20:34 u2.py:834] U2 Encoder type: conformer\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "encoder.embed.conv.0.weight | [256, 1, 3, 3] | 2304 | True\n", + "encoder.embed.conv.0.bias | [256] | 256 | True\n", + "encoder.embed.conv.2.weight | [256, 256, 3, 3] | 589824 | True\n", + "encoder.embed.conv.2.bias | [256] | 256 | True\n", + "encoder.embed.linear.weight | [4864, 256] | 1245184 | True\n", + "encoder.embed.linear.bias | [256] | 256 | True\n", + "encoder.after_norm.weight | [256] | 256 | True\n", + "encoder.after_norm.bias | [256] | 256 | True\n", + "encoder.encoders.0.self_attn.pos_bias_u | [4, 64] | 256 | True\n", + "encoder.encoders.0.self_attn.pos_bias_v | [4, 64] | 256 | True\n", + "encoder.encoders.0.self_attn.linear_q.weight | [256, 256] | 65536 | True\n", + "encoder.encoders.0.self_attn.linear_q.bias | [256] | 256 | True\n", + "encoder.encoders.0.self_attn.linear_k.weight | [256, 256] | 65536 | True\n", + "encoder.encoders.0.self_attn.linear_k.bias | [256] | 256 | True\n", + "encoder.encoders.0.self_attn.linear_v.weight | [256, 256] | 65536 | True\n", + "encoder.encoders.0.self_attn.linear_v.bias | [256] | 256 | True\n", + "encoder.encoders.0.self_attn.linear_out.weight | [256, 256] | 65536 | True\n", + "encoder.encoders.0.self_attn.linear_out.bias | [256] | 256 | True\n", + "encoder.encoders.0.self_attn.linear_pos.weight | [256, 256] | 65536 | True\n", + "encoder.encoders.0.feed_forward.w_1.weight | [256, 2048] | 524288 | True\n", + "encoder.encoders.0.feed_forward.w_1.bias | [2048] | 2048 | True\n", + "encoder.encoders.0.feed_forward.w_2.weight | [2048, 256] | 524288 | True\n", + "encoder.encoders.0.feed_forward.w_2.bias | [256] | 256 | True\n", + "encoder.encoders.0.feed_forward_macaron.w_1.weight | [256, 2048] | 524288 | True\n", + "encoder.encoders.0.feed_forward_macaron.w_1.bias | [2048] | 2048 | True\n", + "encoder.encoders.0.feed_forward_macaron.w_2.weight | [2048, 256] | 524288 | True\n", + "encoder.encoders.0.feed_forward_macaron.w_2.bias | [256] | 256 | True\n", + "encoder.encoders.0.conv_module.pointwise_conv1.weight | [512, 256, 1] | 131072 | True\n", + "encoder.encoders.0.conv_module.pointwise_conv1.bias | [512] | 512 | True\n", + "encoder.encoders.0.conv_module.depthwise_conv.weight | [256, 1, 15] | 3840 | True\n", + "encoder.encoders.0.conv_module.depthwise_conv.bias | [256] | 256 | True\n", + "encoder.encoders.0.conv_module.norm.weight | [256] | 256 | True\n", + "encoder.encoders.0.conv_module.norm.bias | [256] | 256 | True\n", + "encoder.encoders.0.conv_module.norm._mean | [256] | 256 | False\n", + "encoder.encoders.0.conv_module.norm._variance | [256] | 256 | False\n", + "encoder.encoders.0.conv_module.pointwise_conv2.weight | [256, 256, 1] | 65536 | True\n", + "encoder.encoders.0.conv_module.pointwise_conv2.bias | [256] | 256 | True\n", + "encoder.encoders.0.norm_ff.weight | [256] | 256 | True\n", + "encoder.encoders.0.norm_ff.bias | [256] | 256 | True\n", + "encoder.encoders.0.norm_mha.weight | [256] | 256 | True\n", + "encoder.encoders.0.norm_mha.bias | [256] | 256 | True\n", + "encoder.encoders.0.norm_ff_macaron.weight | [256] | 256 | True\n", + "encoder.encoders.0.norm_ff_macaron.bias | [256] | 256 | True\n", + "encoder.encoders.0.norm_conv.weight | [256] | 256 | True\n", + "encoder.encoders.0.norm_conv.bias | [256] | 256 | True\n", + "encoder.encoders.0.norm_final.weight | [256] | 256 | True\n", + "encoder.encoders.0.norm_final.bias | [256] | 256 | True\n", + "encoder.encoders.0.concat_linear.weight | [512, 256] | 131072 | True\n", + "encoder.encoders.0.concat_linear.bias | [256] | 256 | True\n", + "encoder.encoders.1.self_attn.pos_bias_u | [4, 64] | 256 | True\n", + "encoder.encoders.1.self_attn.pos_bias_v | [4, 64] | 256 | True\n", + "encoder.encoders.1.self_attn.linear_q.weight | [256, 256] | 65536 | True\n", + "encoder.encoders.1.self_attn.linear_q.bias | [256] | 256 | True\n", + "encoder.encoders.1.self_attn.linear_k.weight | [256, 256] | 65536 | True\n", + "encoder.encoders.1.self_attn.linear_k.bias | [256] | 256 | True\n", + "encoder.encoders.1.self_attn.linear_v.weight | [256, 256] | 65536 | True\n", + "encoder.encoders.1.self_attn.linear_v.bias | [256] | 256 | True\n", + "encoder.encoders.1.self_attn.linear_out.weight | [256, 256] | 65536 | True\n", + "encoder.encoders.1.self_attn.linear_out.bias | [256] | 256 | True\n", + "encoder.encoders.1.self_attn.linear_pos.weight | [256, 256] | 65536 | True\n", + "encoder.encoders.1.feed_forward.w_1.weight | [256, 2048] | 524288 | True\n", + "encoder.encoders.1.feed_forward.w_1.bias | [2048] | 2048 | True\n", + "encoder.encoders.1.feed_forward.w_2.weight | [2048, 256] | 524288 | True\n", + "encoder.encoders.1.feed_forward.w_2.bias | [256] | 256 | True\n", + "encoder.encoders.1.feed_forward_macaron.w_1.weight | [256, 2048] | 524288 | True\n", + "encoder.encoders.1.feed_forward_macaron.w_1.bias | [2048] | 2048 | True\n", + "encoder.encoders.1.feed_forward_macaron.w_2.weight | [2048, 256] | 524288 | True\n", + "encoder.encoders.1.feed_forward_macaron.w_2.bias | [256] | 256 | True\n", + "encoder.encoders.1.conv_module.pointwise_conv1.weight | [512, 256, 1] | 131072 | True\n", + "encoder.encoders.1.conv_module.pointwise_conv1.bias | [512] | 512 | True\n", + "encoder.encoders.1.conv_module.depthwise_conv.weight | [256, 1, 15] | 3840 | True\n", + "encoder.encoders.1.conv_module.depthwise_conv.bias | [256] | 256 | True\n", + "encoder.encoders.1.conv_module.norm.weight | [256] | 256 | True\n", + "encoder.encoders.1.conv_module.norm.bias | [256] | 256 | True\n", + "encoder.encoders.1.conv_module.norm._mean | [256] | 256 | False\n", + "encoder.encoders.1.conv_module.norm._variance | [256] | 256 | False\n", + "encoder.encoders.1.conv_module.pointwise_conv2.weight | [256, 256, 1] | 65536 | True\n", + "encoder.encoders.1.conv_module.pointwise_conv2.bias | [256] | 256 | True\n", + "encoder.encoders.1.norm_ff.weight | [256] | 256 | True\n", + "encoder.encoders.1.norm_ff.bias | [256] | 256 | True\n", + "encoder.encoders.1.norm_mha.weight | [256] | 256 | True\n", + "encoder.encoders.1.norm_mha.bias | [256] | 256 | True\n", + "encoder.encoders.1.norm_ff_macaron.weight | [256] | 256 | True\n", + "encoder.encoders.1.norm_ff_macaron.bias | [256] | 256 | True\n", + "encoder.encoders.1.norm_conv.weight | [256] | 256 | True\n", + "encoder.encoders.1.norm_conv.bias | [256] | 256 | True\n", + "encoder.encoders.1.norm_final.weight | [256] | 256 | True\n", + "encoder.encoders.1.norm_final.bias | [256] | 256 | True\n", + "encoder.encoders.1.concat_linear.weight | [512, 256] | 131072 | True\n", + "encoder.encoders.1.concat_linear.bias | [256] | 256 | True\n", + "encoder.encoders.2.self_attn.pos_bias_u | [4, 64] | 256 | True\n", + "encoder.encoders.2.self_attn.pos_bias_v | [4, 64] | 256 | True\n", + "encoder.encoders.2.self_attn.linear_q.weight | [256, 256] | 65536 | True\n", + "encoder.encoders.2.self_attn.linear_q.bias | [256] | 256 | True\n", + "encoder.encoders.2.self_attn.linear_k.weight | [256, 256] | 65536 | True\n", + "encoder.encoders.2.self_attn.linear_k.bias | [256] | 256 | True\n", + "encoder.encoders.2.self_attn.linear_v.weight | [256, 256] | 65536 | True\n", + "encoder.encoders.2.self_attn.linear_v.bias | [256] | 256 | True\n", + "encoder.encoders.2.self_attn.linear_out.weight | [256, 256] | 65536 | True\n", + "encoder.encoders.2.self_attn.linear_out.bias | [256] | 256 | True\n", + "encoder.encoders.2.self_attn.linear_pos.weight | [256, 256] | 65536 | True\n", + "encoder.encoders.2.feed_forward.w_1.weight | [256, 2048] | 524288 | True\n", + "encoder.encoders.2.feed_forward.w_1.bias | [2048] | 2048 | True\n", + "encoder.encoders.2.feed_forward.w_2.weight | [2048, 256] | 524288 | True\n", + "encoder.encoders.2.feed_forward.w_2.bias | [256] | 256 | True\n", + "encoder.encoders.2.feed_forward_macaron.w_1.weight | [256, 2048] | 524288 | True\n", + "encoder.encoders.2.feed_forward_macaron.w_1.bias | [2048] | 2048 | True\n", + "encoder.encoders.2.feed_forward_macaron.w_2.weight | [2048, 256] | 524288 | True\n", + "encoder.encoders.2.feed_forward_macaron.w_2.bias | [256] | 256 | True\n", + "encoder.encoders.2.conv_module.pointwise_conv1.weight | [512, 256, 1] | 131072 | True\n", + "encoder.encoders.2.conv_module.pointwise_conv1.bias | [512] | 512 | True\n", + "encoder.encoders.2.conv_module.depthwise_conv.weight | [256, 1, 15] | 3840 | True\n", + "encoder.encoders.2.conv_module.depthwise_conv.bias | [256] | 256 | True\n", + "encoder.encoders.2.conv_module.norm.weight | [256] | 256 | True\n", + "encoder.encoders.2.conv_module.norm.bias | [256] | 256 | True\n", + "encoder.encoders.2.conv_module.norm._mean | [256] | 256 | False\n", + "encoder.encoders.2.conv_module.norm._variance | [256] | 256 | False\n", + "encoder.encoders.2.conv_module.pointwise_conv2.weight | [256, 256, 1] | 65536 | True\n", + "encoder.encoders.2.conv_module.pointwise_conv2.bias | [256] | 256 | True\n", + "encoder.encoders.2.norm_ff.weight | [256] | 256 | True\n", + "encoder.encoders.2.norm_ff.bias | [256] | 256 | True\n", + "encoder.encoders.2.norm_mha.weight | [256] | 256 | True\n", + "encoder.encoders.2.norm_mha.bias | [256] | 256 | True\n", + "encoder.encoders.2.norm_ff_macaron.weight | [256] | 256 | True\n", + "encoder.encoders.2.norm_ff_macaron.bias | [256] | 256 | True\n", + "encoder.encoders.2.norm_conv.weight | [256] | 256 | True\n", + "encoder.encoders.2.norm_conv.bias | [256] | 256 | True\n", + "encoder.encoders.2.norm_final.weight | [256] | 256 | True\n", + "encoder.encoders.2.norm_final.bias | [256] | 256 | True\n", + "encoder.encoders.2.concat_linear.weight | [512, 256] | 131072 | True\n", + "encoder.encoders.2.concat_linear.bias | [256] | 256 | True\n", + "encoder.encoders.3.self_attn.pos_bias_u | [4, 64] | 256 | True\n", + "encoder.encoders.3.self_attn.pos_bias_v | [4, 64] | 256 | True\n", + "encoder.encoders.3.self_attn.linear_q.weight | [256, 256] | 65536 | True\n", + "encoder.encoders.3.self_attn.linear_q.bias | [256] | 256 | True\n", + "encoder.encoders.3.self_attn.linear_k.weight | [256, 256] | 65536 | True\n", + "encoder.encoders.3.self_attn.linear_k.bias | [256] | 256 | True\n", + "encoder.encoders.3.self_attn.linear_v.weight | [256, 256] | 65536 | True\n", + "encoder.encoders.3.self_attn.linear_v.bias | [256] | 256 | True\n", + "encoder.encoders.3.self_attn.linear_out.weight | [256, 256] | 65536 | True\n", + "encoder.encoders.3.self_attn.linear_out.bias | [256] | 256 | True\n", + "encoder.encoders.3.self_attn.linear_pos.weight | [256, 256] | 65536 | True\n", + "encoder.encoders.3.feed_forward.w_1.weight | [256, 2048] | 524288 | True\n", + "encoder.encoders.3.feed_forward.w_1.bias | [2048] | 2048 | True\n", + "encoder.encoders.3.feed_forward.w_2.weight | [2048, 256] | 524288 | True\n", + "encoder.encoders.3.feed_forward.w_2.bias | [256] | 256 | True\n", + "encoder.encoders.3.feed_forward_macaron.w_1.weight | [256, 2048] | 524288 | True\n", + "encoder.encoders.3.feed_forward_macaron.w_1.bias | [2048] | 2048 | True\n", + "encoder.encoders.3.feed_forward_macaron.w_2.weight | [2048, 256] | 524288 | True\n", + "encoder.encoders.3.feed_forward_macaron.w_2.bias | [256] | 256 | True\n", + "encoder.encoders.3.conv_module.pointwise_conv1.weight | [512, 256, 1] | 131072 | True\n", + "encoder.encoders.3.conv_module.pointwise_conv1.bias | [512] | 512 | True\n", + "encoder.encoders.3.conv_module.depthwise_conv.weight | [256, 1, 15] | 3840 | True\n", + "encoder.encoders.3.conv_module.depthwise_conv.bias | [256] | 256 | True\n", + "encoder.encoders.3.conv_module.norm.weight | [256] | 256 | True\n", + "encoder.encoders.3.conv_module.norm.bias | [256] | 256 | True\n", + "encoder.encoders.3.conv_module.norm._mean | [256] | 256 | False\n", + "encoder.encoders.3.conv_module.norm._variance | [256] | 256 | False\n", + "encoder.encoders.3.conv_module.pointwise_conv2.weight | [256, 256, 1] | 65536 | True\n", + "encoder.encoders.3.conv_module.pointwise_conv2.bias | [256] | 256 | True\n", + "encoder.encoders.3.norm_ff.weight | [256] | 256 | True\n", + "encoder.encoders.3.norm_ff.bias | [256] | 256 | True\n", + "encoder.encoders.3.norm_mha.weight | [256] | 256 | True\n", + "encoder.encoders.3.norm_mha.bias | [256] | 256 | True\n", + "encoder.encoders.3.norm_ff_macaron.weight | [256] | 256 | True\n", + "encoder.encoders.3.norm_ff_macaron.bias | [256] | 256 | True\n", + "encoder.encoders.3.norm_conv.weight | [256] | 256 | True\n", + "encoder.encoders.3.norm_conv.bias | [256] | 256 | True\n", + "encoder.encoders.3.norm_final.weight | [256] | 256 | True\n", + "encoder.encoders.3.norm_final.bias | [256] | 256 | True\n", + "encoder.encoders.3.concat_linear.weight | [512, 256] | 131072 | True\n", + "encoder.encoders.3.concat_linear.bias | [256] | 256 | True\n", + "encoder.encoders.4.self_attn.pos_bias_u | [4, 64] | 256 | True\n", + "encoder.encoders.4.self_attn.pos_bias_v | [4, 64] | 256 | True\n", + "encoder.encoders.4.self_attn.linear_q.weight | [256, 256] | 65536 | True\n", + "encoder.encoders.4.self_attn.linear_q.bias | [256] | 256 | True\n", + "encoder.encoders.4.self_attn.linear_k.weight | [256, 256] | 65536 | True\n", + "encoder.encoders.4.self_attn.linear_k.bias | [256] | 256 | True\n", + "encoder.encoders.4.self_attn.linear_v.weight | [256, 256] | 65536 | True\n", + "encoder.encoders.4.self_attn.linear_v.bias | [256] | 256 | True\n", + "encoder.encoders.4.self_attn.linear_out.weight | [256, 256] | 65536 | True\n", + "encoder.encoders.4.self_attn.linear_out.bias | [256] | 256 | True\n", + "encoder.encoders.4.self_attn.linear_pos.weight | [256, 256] | 65536 | True\n", + "encoder.encoders.4.feed_forward.w_1.weight | [256, 2048] | 524288 | True\n", + "encoder.encoders.4.feed_forward.w_1.bias | [2048] | 2048 | True\n", + "encoder.encoders.4.feed_forward.w_2.weight | [2048, 256] | 524288 | True\n", + "encoder.encoders.4.feed_forward.w_2.bias | [256] | 256 | True\n", + "encoder.encoders.4.feed_forward_macaron.w_1.weight | [256, 2048] | 524288 | True\n", + "encoder.encoders.4.feed_forward_macaron.w_1.bias | [2048] | 2048 | True\n", + "encoder.encoders.4.feed_forward_macaron.w_2.weight | [2048, 256] | 524288 | True\n", + "encoder.encoders.4.feed_forward_macaron.w_2.bias | [256] | 256 | True\n", + "encoder.encoders.4.conv_module.pointwise_conv1.weight | [512, 256, 1] | 131072 | True\n", + "encoder.encoders.4.conv_module.pointwise_conv1.bias | [512] | 512 | True\n", + "encoder.encoders.4.conv_module.depthwise_conv.weight | [256, 1, 15] | 3840 | True\n", + "encoder.encoders.4.conv_module.depthwise_conv.bias | [256] | 256 | True\n", + "encoder.encoders.4.conv_module.norm.weight | [256] | 256 | True\n", + "encoder.encoders.4.conv_module.norm.bias | [256] | 256 | True\n", + "encoder.encoders.4.conv_module.norm._mean | [256] | 256 | False\n", + "encoder.encoders.4.conv_module.norm._variance | [256] | 256 | False\n", + "encoder.encoders.4.conv_module.pointwise_conv2.weight | [256, 256, 1] | 65536 | True\n", + "encoder.encoders.4.conv_module.pointwise_conv2.bias | [256] | 256 | True\n", + "encoder.encoders.4.norm_ff.weight | [256] | 256 | True\n", + "encoder.encoders.4.norm_ff.bias | [256] | 256 | True\n", + "encoder.encoders.4.norm_mha.weight | [256] | 256 | True\n", + "encoder.encoders.4.norm_mha.bias | [256] | 256 | True\n", + "encoder.encoders.4.norm_ff_macaron.weight | [256] | 256 | True\n", + "encoder.encoders.4.norm_ff_macaron.bias | [256] | 256 | True\n", + "encoder.encoders.4.norm_conv.weight | [256] | 256 | True\n", + "encoder.encoders.4.norm_conv.bias | [256] | 256 | True\n", + "encoder.encoders.4.norm_final.weight | [256] | 256 | True\n", + "encoder.encoders.4.norm_final.bias | [256] | 256 | True\n", + "encoder.encoders.4.concat_linear.weight | [512, 256] | 131072 | True\n", + "encoder.encoders.4.concat_linear.bias | [256] | 256 | True\n", + "encoder.encoders.5.self_attn.pos_bias_u | [4, 64] | 256 | True\n", + "encoder.encoders.5.self_attn.pos_bias_v | [4, 64] | 256 | True\n", + "encoder.encoders.5.self_attn.linear_q.weight | [256, 256] | 65536 | True\n", + "encoder.encoders.5.self_attn.linear_q.bias | [256] | 256 | True\n", + "encoder.encoders.5.self_attn.linear_k.weight | [256, 256] | 65536 | True\n", + "encoder.encoders.5.self_attn.linear_k.bias | [256] | 256 | True\n", + "encoder.encoders.5.self_attn.linear_v.weight | [256, 256] | 65536 | True\n", + "encoder.encoders.5.self_attn.linear_v.bias | [256] | 256 | True\n", + "encoder.encoders.5.self_attn.linear_out.weight | [256, 256] | 65536 | True\n", + "encoder.encoders.5.self_attn.linear_out.bias | [256] | 256 | True\n", + "encoder.encoders.5.self_attn.linear_pos.weight | [256, 256] | 65536 | True\n", + "encoder.encoders.5.feed_forward.w_1.weight | [256, 2048] | 524288 | True\n", + "encoder.encoders.5.feed_forward.w_1.bias | [2048] | 2048 | True\n", + "encoder.encoders.5.feed_forward.w_2.weight | [2048, 256] | 524288 | True\n", + "encoder.encoders.5.feed_forward.w_2.bias | [256] | 256 | True\n", + "encoder.encoders.5.feed_forward_macaron.w_1.weight | [256, 2048] | 524288 | True\n", + "encoder.encoders.5.feed_forward_macaron.w_1.bias | [2048] | 2048 | True\n", + "encoder.encoders.5.feed_forward_macaron.w_2.weight | [2048, 256] | 524288 | True\n", + "encoder.encoders.5.feed_forward_macaron.w_2.bias | [256] | 256 | True\n", + "encoder.encoders.5.conv_module.pointwise_conv1.weight | [512, 256, 1] | 131072 | True\n", + "encoder.encoders.5.conv_module.pointwise_conv1.bias | [512] | 512 | True\n", + "encoder.encoders.5.conv_module.depthwise_conv.weight | [256, 1, 15] | 3840 | True\n", + "encoder.encoders.5.conv_module.depthwise_conv.bias | [256] | 256 | True\n", + "encoder.encoders.5.conv_module.norm.weight | [256] | 256 | True\n", + "encoder.encoders.5.conv_module.norm.bias | [256] | 256 | True\n", + "encoder.encoders.5.conv_module.norm._mean | [256] | 256 | False\n", + "encoder.encoders.5.conv_module.norm._variance | [256] | 256 | False\n", + "encoder.encoders.5.conv_module.pointwise_conv2.weight | [256, 256, 1] | 65536 | True\n", + "encoder.encoders.5.conv_module.pointwise_conv2.bias | [256] | 256 | True\n", + "encoder.encoders.5.norm_ff.weight | [256] | 256 | True\n", + "encoder.encoders.5.norm_ff.bias | [256] | 256 | True\n", + "encoder.encoders.5.norm_mha.weight | [256] | 256 | True\n", + "encoder.encoders.5.norm_mha.bias | [256] | 256 | True\n", + "encoder.encoders.5.norm_ff_macaron.weight | [256] | 256 | True\n", + "encoder.encoders.5.norm_ff_macaron.bias | [256] | 256 | True\n", + "encoder.encoders.5.norm_conv.weight | [256] | 256 | True\n", + "encoder.encoders.5.norm_conv.bias | [256] | 256 | True\n", + "encoder.encoders.5.norm_final.weight | [256] | 256 | True\n", + "encoder.encoders.5.norm_final.bias | [256] | 256 | True\n", + "encoder.encoders.5.concat_linear.weight | [512, 256] | 131072 | True\n", + "encoder.encoders.5.concat_linear.bias | [256] | 256 | True\n", + "encoder.encoders.6.self_attn.pos_bias_u | [4, 64] | 256 | True\n", + "encoder.encoders.6.self_attn.pos_bias_v | [4, 64] | 256 | True\n", + "encoder.encoders.6.self_attn.linear_q.weight | [256, 256] | 65536 | True\n", + "encoder.encoders.6.self_attn.linear_q.bias | [256] | 256 | True\n", + "encoder.encoders.6.self_attn.linear_k.weight | [256, 256] | 65536 | True\n", + "encoder.encoders.6.self_attn.linear_k.bias | [256] | 256 | True\n", + "encoder.encoders.6.self_attn.linear_v.weight | [256, 256] | 65536 | True\n", + "encoder.encoders.6.self_attn.linear_v.bias | [256] | 256 | True\n", + "encoder.encoders.6.self_attn.linear_out.weight | [256, 256] | 65536 | True\n", + "encoder.encoders.6.self_attn.linear_out.bias | [256] | 256 | True\n", + "encoder.encoders.6.self_attn.linear_pos.weight | [256, 256] | 65536 | True\n", + "encoder.encoders.6.feed_forward.w_1.weight | [256, 2048] | 524288 | True\n", + "encoder.encoders.6.feed_forward.w_1.bias | [2048] | 2048 | True\n", + "encoder.encoders.6.feed_forward.w_2.weight | [2048, 256] | 524288 | True\n", + "encoder.encoders.6.feed_forward.w_2.bias | [256] | 256 | True\n", + "encoder.encoders.6.feed_forward_macaron.w_1.weight | [256, 2048] | 524288 | True\n", + "encoder.encoders.6.feed_forward_macaron.w_1.bias | [2048] | 2048 | True\n", + "encoder.encoders.6.feed_forward_macaron.w_2.weight | [2048, 256] | 524288 | True\n", + "encoder.encoders.6.feed_forward_macaron.w_2.bias | [256] | 256 | True\n", + "encoder.encoders.6.conv_module.pointwise_conv1.weight | [512, 256, 1] | 131072 | True\n", + "encoder.encoders.6.conv_module.pointwise_conv1.bias | [512] | 512 | True\n", + "encoder.encoders.6.conv_module.depthwise_conv.weight | [256, 1, 15] | 3840 | True\n", + "encoder.encoders.6.conv_module.depthwise_conv.bias | [256] | 256 | True\n", + "encoder.encoders.6.conv_module.norm.weight | [256] | 256 | True\n", + "encoder.encoders.6.conv_module.norm.bias | [256] | 256 | True\n", + "encoder.encoders.6.conv_module.norm._mean | [256] | 256 | False\n", + "encoder.encoders.6.conv_module.norm._variance | [256] | 256 | False\n", + "encoder.encoders.6.conv_module.pointwise_conv2.weight | [256, 256, 1] | 65536 | True\n", + "encoder.encoders.6.conv_module.pointwise_conv2.bias | [256] | 256 | True\n", + "encoder.encoders.6.norm_ff.weight | [256] | 256 | True\n", + "encoder.encoders.6.norm_ff.bias | [256] | 256 | True\n", + "encoder.encoders.6.norm_mha.weight | [256] | 256 | True\n", + "encoder.encoders.6.norm_mha.bias | [256] | 256 | True\n", + "encoder.encoders.6.norm_ff_macaron.weight | [256] | 256 | True\n", + "encoder.encoders.6.norm_ff_macaron.bias | [256] | 256 | True\n", + "encoder.encoders.6.norm_conv.weight | [256] | 256 | True\n", + "encoder.encoders.6.norm_conv.bias | [256] | 256 | True\n", + "encoder.encoders.6.norm_final.weight | [256] | 256 | True\n", + "encoder.encoders.6.norm_final.bias | [256] | 256 | True\n", + "encoder.encoders.6.concat_linear.weight | [512, 256] | 131072 | True\n", + "encoder.encoders.6.concat_linear.bias | [256] | 256 | True\n", + "encoder.encoders.7.self_attn.pos_bias_u | [4, 64] | 256 | True\n", + "encoder.encoders.7.self_attn.pos_bias_v | [4, 64] | 256 | True\n", + "encoder.encoders.7.self_attn.linear_q.weight | [256, 256] | 65536 | True\n", + "encoder.encoders.7.self_attn.linear_q.bias | [256] | 256 | True\n", + "encoder.encoders.7.self_attn.linear_k.weight | [256, 256] | 65536 | True\n", + "encoder.encoders.7.self_attn.linear_k.bias | [256] | 256 | True\n", + "encoder.encoders.7.self_attn.linear_v.weight | [256, 256] | 65536 | True\n", + "encoder.encoders.7.self_attn.linear_v.bias | [256] | 256 | True\n", + "encoder.encoders.7.self_attn.linear_out.weight | [256, 256] | 65536 | True\n", + "encoder.encoders.7.self_attn.linear_out.bias | [256] | 256 | True\n", + "encoder.encoders.7.self_attn.linear_pos.weight | [256, 256] | 65536 | True\n", + "encoder.encoders.7.feed_forward.w_1.weight | [256, 2048] | 524288 | True\n", + "encoder.encoders.7.feed_forward.w_1.bias | [2048] | 2048 | True\n", + "encoder.encoders.7.feed_forward.w_2.weight | [2048, 256] | 524288 | True\n", + "encoder.encoders.7.feed_forward.w_2.bias | [256] | 256 | True\n", + "encoder.encoders.7.feed_forward_macaron.w_1.weight | [256, 2048] | 524288 | True\n", + "encoder.encoders.7.feed_forward_macaron.w_1.bias | [2048] | 2048 | True\n", + "encoder.encoders.7.feed_forward_macaron.w_2.weight | [2048, 256] | 524288 | True\n", + "encoder.encoders.7.feed_forward_macaron.w_2.bias | [256] | 256 | True\n", + "encoder.encoders.7.conv_module.pointwise_conv1.weight | [512, 256, 1] | 131072 | True\n", + "encoder.encoders.7.conv_module.pointwise_conv1.bias | [512] | 512 | True\n", + "encoder.encoders.7.conv_module.depthwise_conv.weight | [256, 1, 15] | 3840 | True\n", + "encoder.encoders.7.conv_module.depthwise_conv.bias | [256] | 256 | True\n", + "encoder.encoders.7.conv_module.norm.weight | [256] | 256 | True\n", + "encoder.encoders.7.conv_module.norm.bias | [256] | 256 | True\n", + "encoder.encoders.7.conv_module.norm._mean | [256] | 256 | False\n", + "encoder.encoders.7.conv_module.norm._variance | [256] | 256 | False\n", + "encoder.encoders.7.conv_module.pointwise_conv2.weight | [256, 256, 1] | 65536 | True\n", + "encoder.encoders.7.conv_module.pointwise_conv2.bias | [256] | 256 | True\n", + "encoder.encoders.7.norm_ff.weight | [256] | 256 | True\n", + "encoder.encoders.7.norm_ff.bias | [256] | 256 | True\n", + "encoder.encoders.7.norm_mha.weight | [256] | 256 | True\n", + "encoder.encoders.7.norm_mha.bias | [256] | 256 | True\n", + "encoder.encoders.7.norm_ff_macaron.weight | [256] | 256 | True\n", + "encoder.encoders.7.norm_ff_macaron.bias | [256] | 256 | True\n", + "encoder.encoders.7.norm_conv.weight | [256] | 256 | True\n", + "encoder.encoders.7.norm_conv.bias | [256] | 256 | True\n", + "encoder.encoders.7.norm_final.weight | [256] | 256 | True\n", + "encoder.encoders.7.norm_final.bias | [256] | 256 | True\n", + "encoder.encoders.7.concat_linear.weight | [512, 256] | 131072 | True\n", + "encoder.encoders.7.concat_linear.bias | [256] | 256 | True\n", + "encoder.encoders.8.self_attn.pos_bias_u | [4, 64] | 256 | True\n", + "encoder.encoders.8.self_attn.pos_bias_v | [4, 64] | 256 | True\n", + "encoder.encoders.8.self_attn.linear_q.weight | [256, 256] | 65536 | True\n", + "encoder.encoders.8.self_attn.linear_q.bias | [256] | 256 | True\n", + "encoder.encoders.8.self_attn.linear_k.weight | [256, 256] | 65536 | True\n", + "encoder.encoders.8.self_attn.linear_k.bias | [256] | 256 | True\n", + "encoder.encoders.8.self_attn.linear_v.weight | [256, 256] | 65536 | True\n", + "encoder.encoders.8.self_attn.linear_v.bias | [256] | 256 | True\n", + "encoder.encoders.8.self_attn.linear_out.weight | [256, 256] | 65536 | True\n", + "encoder.encoders.8.self_attn.linear_out.bias | [256] | 256 | True\n", + "encoder.encoders.8.self_attn.linear_pos.weight | [256, 256] | 65536 | True\n", + "encoder.encoders.8.feed_forward.w_1.weight | [256, 2048] | 524288 | True\n", + "encoder.encoders.8.feed_forward.w_1.bias | [2048] | 2048 | True\n", + "encoder.encoders.8.feed_forward.w_2.weight | [2048, 256] | 524288 | True\n", + "encoder.encoders.8.feed_forward.w_2.bias | [256] | 256 | True\n", + "encoder.encoders.8.feed_forward_macaron.w_1.weight | [256, 2048] | 524288 | True\n", + "encoder.encoders.8.feed_forward_macaron.w_1.bias | [2048] | 2048 | True\n", + "encoder.encoders.8.feed_forward_macaron.w_2.weight | [2048, 256] | 524288 | True\n", + "encoder.encoders.8.feed_forward_macaron.w_2.bias | [256] | 256 | True\n", + "encoder.encoders.8.conv_module.pointwise_conv1.weight | [512, 256, 1] | 131072 | True\n", + "encoder.encoders.8.conv_module.pointwise_conv1.bias | [512] | 512 | True\n", + "encoder.encoders.8.conv_module.depthwise_conv.weight | [256, 1, 15] | 3840 | True\n", + "encoder.encoders.8.conv_module.depthwise_conv.bias | [256] | 256 | True\n", + "encoder.encoders.8.conv_module.norm.weight | [256] | 256 | True\n", + "encoder.encoders.8.conv_module.norm.bias | [256] | 256 | True\n", + "encoder.encoders.8.conv_module.norm._mean | [256] | 256 | False\n", + "encoder.encoders.8.conv_module.norm._variance | [256] | 256 | False\n", + "encoder.encoders.8.conv_module.pointwise_conv2.weight | [256, 256, 1] | 65536 | True\n", + "encoder.encoders.8.conv_module.pointwise_conv2.bias | [256] | 256 | True\n", + "encoder.encoders.8.norm_ff.weight | [256] | 256 | True\n", + "encoder.encoders.8.norm_ff.bias | [256] | 256 | True\n", + "encoder.encoders.8.norm_mha.weight | [256] | 256 | True\n", + "encoder.encoders.8.norm_mha.bias | [256] | 256 | True\n", + "encoder.encoders.8.norm_ff_macaron.weight | [256] | 256 | True\n", + "encoder.encoders.8.norm_ff_macaron.bias | [256] | 256 | True\n", + "encoder.encoders.8.norm_conv.weight | [256] | 256 | True\n", + "encoder.encoders.8.norm_conv.bias | [256] | 256 | True\n", + "encoder.encoders.8.norm_final.weight | [256] | 256 | True\n", + "encoder.encoders.8.norm_final.bias | [256] | 256 | True\n", + "encoder.encoders.8.concat_linear.weight | [512, 256] | 131072 | True\n", + "encoder.encoders.8.concat_linear.bias | [256] | 256 | True\n", + "encoder.encoders.9.self_attn.pos_bias_u | [4, 64] | 256 | True\n", + "encoder.encoders.9.self_attn.pos_bias_v | [4, 64] | 256 | True\n", + "encoder.encoders.9.self_attn.linear_q.weight | [256, 256] | 65536 | True\n", + "encoder.encoders.9.self_attn.linear_q.bias | [256] | 256 | True\n", + "encoder.encoders.9.self_attn.linear_k.weight | [256, 256] | 65536 | True\n", + "encoder.encoders.9.self_attn.linear_k.bias | [256] | 256 | True\n", + "encoder.encoders.9.self_attn.linear_v.weight | [256, 256] | 65536 | True\n", + "encoder.encoders.9.self_attn.linear_v.bias | [256] | 256 | True\n", + "encoder.encoders.9.self_attn.linear_out.weight | [256, 256] | 65536 | True\n", + "encoder.encoders.9.self_attn.linear_out.bias | [256] | 256 | True\n", + "encoder.encoders.9.self_attn.linear_pos.weight | [256, 256] | 65536 | True\n", + "encoder.encoders.9.feed_forward.w_1.weight | [256, 2048] | 524288 | True\n", + "encoder.encoders.9.feed_forward.w_1.bias | [2048] | 2048 | True\n", + "encoder.encoders.9.feed_forward.w_2.weight | [2048, 256] | 524288 | True\n", + "encoder.encoders.9.feed_forward.w_2.bias | [256] | 256 | True\n", + "encoder.encoders.9.feed_forward_macaron.w_1.weight | [256, 2048] | 524288 | True\n", + "encoder.encoders.9.feed_forward_macaron.w_1.bias | [2048] | 2048 | True\n", + "encoder.encoders.9.feed_forward_macaron.w_2.weight | [2048, 256] | 524288 | True\n", + "encoder.encoders.9.feed_forward_macaron.w_2.bias | [256] | 256 | True\n", + "encoder.encoders.9.conv_module.pointwise_conv1.weight | [512, 256, 1] | 131072 | True\n", + "encoder.encoders.9.conv_module.pointwise_conv1.bias | [512] | 512 | True\n", + "encoder.encoders.9.conv_module.depthwise_conv.weight | [256, 1, 15] | 3840 | True\n", + "encoder.encoders.9.conv_module.depthwise_conv.bias | [256] | 256 | True\n", + "encoder.encoders.9.conv_module.norm.weight | [256] | 256 | True\n", + "encoder.encoders.9.conv_module.norm.bias | [256] | 256 | True\n", + "encoder.encoders.9.conv_module.norm._mean | [256] | 256 | False\n", + "encoder.encoders.9.conv_module.norm._variance | [256] | 256 | False\n", + "encoder.encoders.9.conv_module.pointwise_conv2.weight | [256, 256, 1] | 65536 | True\n", + "encoder.encoders.9.conv_module.pointwise_conv2.bias | [256] | 256 | True\n", + "encoder.encoders.9.norm_ff.weight | [256] | 256 | True\n", + "encoder.encoders.9.norm_ff.bias | [256] | 256 | True\n", + "encoder.encoders.9.norm_mha.weight | [256] | 256 | True\n", + "encoder.encoders.9.norm_mha.bias | [256] | 256 | True\n", + "encoder.encoders.9.norm_ff_macaron.weight | [256] | 256 | True\n", + "encoder.encoders.9.norm_ff_macaron.bias | [256] | 256 | True\n", + "encoder.encoders.9.norm_conv.weight | [256] | 256 | True\n", + "encoder.encoders.9.norm_conv.bias | [256] | 256 | True\n", + "encoder.encoders.9.norm_final.weight | [256] | 256 | True\n", + "encoder.encoders.9.norm_final.bias | [256] | 256 | True\n", + "encoder.encoders.9.concat_linear.weight | [512, 256] | 131072 | True\n", + "encoder.encoders.9.concat_linear.bias | [256] | 256 | True\n", + "encoder.encoders.10.self_attn.pos_bias_u | [4, 64] | 256 | True\n", + "encoder.encoders.10.self_attn.pos_bias_v | [4, 64] | 256 | True\n", + "encoder.encoders.10.self_attn.linear_q.weight | [256, 256] | 65536 | True\n", + "encoder.encoders.10.self_attn.linear_q.bias | [256] | 256 | True\n", + "encoder.encoders.10.self_attn.linear_k.weight | [256, 256] | 65536 | True\n", + "encoder.encoders.10.self_attn.linear_k.bias | [256] | 256 | True\n", + "encoder.encoders.10.self_attn.linear_v.weight | [256, 256] | 65536 | True\n", + "encoder.encoders.10.self_attn.linear_v.bias | [256] | 256 | True\n", + "encoder.encoders.10.self_attn.linear_out.weight | [256, 256] | 65536 | True\n", + "encoder.encoders.10.self_attn.linear_out.bias | [256] | 256 | True\n", + "encoder.encoders.10.self_attn.linear_pos.weight | [256, 256] | 65536 | True\n", + "encoder.encoders.10.feed_forward.w_1.weight | [256, 2048] | 524288 | True\n", + "encoder.encoders.10.feed_forward.w_1.bias | [2048] | 2048 | True\n", + "encoder.encoders.10.feed_forward.w_2.weight | [2048, 256] | 524288 | True\n", + "encoder.encoders.10.feed_forward.w_2.bias | [256] | 256 | True\n", + "encoder.encoders.10.feed_forward_macaron.w_1.weight | [256, 2048] | 524288 | True\n", + "encoder.encoders.10.feed_forward_macaron.w_1.bias | [2048] | 2048 | True\n", + "encoder.encoders.10.feed_forward_macaron.w_2.weight | [2048, 256] | 524288 | True\n", + "encoder.encoders.10.feed_forward_macaron.w_2.bias | [256] | 256 | True\n", + "encoder.encoders.10.conv_module.pointwise_conv1.weight | [512, 256, 1] | 131072 | True\n", + "encoder.encoders.10.conv_module.pointwise_conv1.bias | [512] | 512 | True\n", + "encoder.encoders.10.conv_module.depthwise_conv.weight | [256, 1, 15] | 3840 | True\n", + "encoder.encoders.10.conv_module.depthwise_conv.bias | [256] | 256 | True\n", + "encoder.encoders.10.conv_module.norm.weight | [256] | 256 | True\n", + "encoder.encoders.10.conv_module.norm.bias | [256] | 256 | True\n", + "encoder.encoders.10.conv_module.norm._mean | [256] | 256 | False\n", + "encoder.encoders.10.conv_module.norm._variance | [256] | 256 | False\n", + "encoder.encoders.10.conv_module.pointwise_conv2.weight | [256, 256, 1] | 65536 | True\n", + "encoder.encoders.10.conv_module.pointwise_conv2.bias | [256] | 256 | True\n", + "encoder.encoders.10.norm_ff.weight | [256] | 256 | True\n", + "encoder.encoders.10.norm_ff.bias | [256] | 256 | True\n", + "encoder.encoders.10.norm_mha.weight | [256] | 256 | True\n", + "encoder.encoders.10.norm_mha.bias | [256] | 256 | True\n", + "encoder.encoders.10.norm_ff_macaron.weight | [256] | 256 | True\n", + "encoder.encoders.10.norm_ff_macaron.bias | [256] | 256 | True\n", + "encoder.encoders.10.norm_conv.weight | [256] | 256 | True\n", + "encoder.encoders.10.norm_conv.bias | [256] | 256 | True\n", + "encoder.encoders.10.norm_final.weight | [256] | 256 | True\n", + "encoder.encoders.10.norm_final.bias | [256] | 256 | True\n", + "encoder.encoders.10.concat_linear.weight | [512, 256] | 131072 | True\n", + "encoder.encoders.10.concat_linear.bias | [256] | 256 | True\n", + "encoder.encoders.11.self_attn.pos_bias_u | [4, 64] | 256 | True\n", + "encoder.encoders.11.self_attn.pos_bias_v | [4, 64] | 256 | True\n", + "encoder.encoders.11.self_attn.linear_q.weight | [256, 256] | 65536 | True\n", + "encoder.encoders.11.self_attn.linear_q.bias | [256] | 256 | True\n", + "encoder.encoders.11.self_attn.linear_k.weight | [256, 256] | 65536 | True\n", + "encoder.encoders.11.self_attn.linear_k.bias | [256] | 256 | True\n", + "encoder.encoders.11.self_attn.linear_v.weight | [256, 256] | 65536 | True\n", + "encoder.encoders.11.self_attn.linear_v.bias | [256] | 256 | True\n", + "encoder.encoders.11.self_attn.linear_out.weight | [256, 256] | 65536 | True\n", + "encoder.encoders.11.self_attn.linear_out.bias | [256] | 256 | True\n", + "encoder.encoders.11.self_attn.linear_pos.weight | [256, 256] | 65536 | True\n", + "encoder.encoders.11.feed_forward.w_1.weight | [256, 2048] | 524288 | True\n", + "encoder.encoders.11.feed_forward.w_1.bias | [2048] | 2048 | True\n", + "encoder.encoders.11.feed_forward.w_2.weight | [2048, 256] | 524288 | True\n", + "encoder.encoders.11.feed_forward.w_2.bias | [256] | 256 | True\n", + "encoder.encoders.11.feed_forward_macaron.w_1.weight | [256, 2048] | 524288 | True\n", + "encoder.encoders.11.feed_forward_macaron.w_1.bias | [2048] | 2048 | True\n", + "encoder.encoders.11.feed_forward_macaron.w_2.weight | [2048, 256] | 524288 | True\n", + "encoder.encoders.11.feed_forward_macaron.w_2.bias | [256] | 256 | True\n", + "encoder.encoders.11.conv_module.pointwise_conv1.weight | [512, 256, 1] | 131072 | True\n", + "encoder.encoders.11.conv_module.pointwise_conv1.bias | [512] | 512 | True\n", + "encoder.encoders.11.conv_module.depthwise_conv.weight | [256, 1, 15] | 3840 | True\n", + "encoder.encoders.11.conv_module.depthwise_conv.bias | [256] | 256 | True\n", + "encoder.encoders.11.conv_module.norm.weight | [256] | 256 | True\n", + "encoder.encoders.11.conv_module.norm.bias | [256] | 256 | True\n", + "encoder.encoders.11.conv_module.norm._mean | [256] | 256 | False\n", + "encoder.encoders.11.conv_module.norm._variance | [256] | 256 | False\n", + "encoder.encoders.11.conv_module.pointwise_conv2.weight | [256, 256, 1] | 65536 | True\n", + "encoder.encoders.11.conv_module.pointwise_conv2.bias | [256] | 256 | True\n", + "encoder.encoders.11.norm_ff.weight | [256] | 256 | True\n", + "encoder.encoders.11.norm_ff.bias | [256] | 256 | True\n", + "encoder.encoders.11.norm_mha.weight | [256] | 256 | True\n", + "encoder.encoders.11.norm_mha.bias | [256] | 256 | True\n", + "encoder.encoders.11.norm_ff_macaron.weight | [256] | 256 | True\n", + "encoder.encoders.11.norm_ff_macaron.bias | [256] | 256 | True\n", + "encoder.encoders.11.norm_conv.weight | [256] | 256 | True\n", + "encoder.encoders.11.norm_conv.bias | [256] | 256 | True\n", + "encoder.encoders.11.norm_final.weight | [256] | 256 | True\n", + "encoder.encoders.11.norm_final.bias | [256] | 256 | True\n", + "encoder.encoders.11.concat_linear.weight | [512, 256] | 131072 | True\n", + "encoder.encoders.11.concat_linear.bias | [256] | 256 | True\n", + "decoder.embed.0.weight | [4223, 256] | 1081088 | True\n", + "decoder.after_norm.weight | [256] | 256 | True\n", + "decoder.after_norm.bias | [256] | 256 | True\n", + "decoder.output_layer.weight | [256, 4223] | 1081088 | True\n", + "decoder.output_layer.bias | [4223] | 4223 | True\n", + "decoder.decoders.0.self_attn.linear_q.weight | [256, 256] | 65536 | True\n", + "decoder.decoders.0.self_attn.linear_q.bias | [256] | 256 | True\n", + "decoder.decoders.0.self_attn.linear_k.weight | [256, 256] | 65536 | True\n", + "decoder.decoders.0.self_attn.linear_k.bias | [256] | 256 | True\n", + "decoder.decoders.0.self_attn.linear_v.weight | [256, 256] | 65536 | True\n", + "decoder.decoders.0.self_attn.linear_v.bias | [256] | 256 | True\n", + "decoder.decoders.0.self_attn.linear_out.weight | [256, 256] | 65536 | True\n", + "decoder.decoders.0.self_attn.linear_out.bias | [256] | 256 | True\n", + "decoder.decoders.0.src_attn.linear_q.weight | [256, 256] | 65536 | True\n", + "decoder.decoders.0.src_attn.linear_q.bias | [256] | 256 | True\n", + "decoder.decoders.0.src_attn.linear_k.weight | [256, 256] | 65536 | True\n", + "decoder.decoders.0.src_attn.linear_k.bias | [256] | 256 | True\n", + "decoder.decoders.0.src_attn.linear_v.weight | [256, 256] | 65536 | True\n", + "decoder.decoders.0.src_attn.linear_v.bias | [256] | 256 | True\n", + "decoder.decoders.0.src_attn.linear_out.weight | [256, 256] | 65536 | True\n", + "decoder.decoders.0.src_attn.linear_out.bias | [256] | 256 | True\n", + "decoder.decoders.0.feed_forward.w_1.weight | [256, 2048] | 524288 | True\n", + "decoder.decoders.0.feed_forward.w_1.bias | [2048] | 2048 | True\n", + "decoder.decoders.0.feed_forward.w_2.weight | [2048, 256] | 524288 | True\n", + "decoder.decoders.0.feed_forward.w_2.bias | [256] | 256 | True\n", + "decoder.decoders.0.norm1.weight | [256] | 256 | True\n", + "decoder.decoders.0.norm1.bias | [256] | 256 | True\n", + "decoder.decoders.0.norm2.weight | [256] | 256 | True\n", + "decoder.decoders.0.norm2.bias | [256] | 256 | True\n", + "decoder.decoders.0.norm3.weight | [256] | 256 | True\n", + "decoder.decoders.0.norm3.bias | [256] | 256 | True\n", + "decoder.decoders.0.concat_linear1.weight | [512, 256] | 131072 | True\n", + "decoder.decoders.0.concat_linear1.bias | [256] | 256 | True\n", + "decoder.decoders.0.concat_linear2.weight | [512, 256] | 131072 | True\n", + "decoder.decoders.0.concat_linear2.bias | [256] | 256 | True\n", + "decoder.decoders.1.self_attn.linear_q.weight | [256, 256] | 65536 | True\n", + "decoder.decoders.1.self_attn.linear_q.bias | [256] | 256 | True\n", + "decoder.decoders.1.self_attn.linear_k.weight | [256, 256] | 65536 | True\n", + "decoder.decoders.1.self_attn.linear_k.bias | [256] | 256 | True\n", + "decoder.decoders.1.self_attn.linear_v.weight | [256, 256] | 65536 | True\n", + "decoder.decoders.1.self_attn.linear_v.bias | [256] | 256 | True\n", + "decoder.decoders.1.self_attn.linear_out.weight | [256, 256] | 65536 | True\n", + "decoder.decoders.1.self_attn.linear_out.bias | [256] | 256 | True\n", + "decoder.decoders.1.src_attn.linear_q.weight | [256, 256] | 65536 | True\n", + "decoder.decoders.1.src_attn.linear_q.bias | [256] | 256 | True\n", + "decoder.decoders.1.src_attn.linear_k.weight | [256, 256] | 65536 | True\n", + "decoder.decoders.1.src_attn.linear_k.bias | [256] | 256 | True\n", + "decoder.decoders.1.src_attn.linear_v.weight | [256, 256] | 65536 | True\n", + "decoder.decoders.1.src_attn.linear_v.bias | [256] | 256 | True\n", + "decoder.decoders.1.src_attn.linear_out.weight | [256, 256] | 65536 | True\n", + "decoder.decoders.1.src_attn.linear_out.bias | [256] | 256 | True\n", + "decoder.decoders.1.feed_forward.w_1.weight | [256, 2048] | 524288 | True\n", + "decoder.decoders.1.feed_forward.w_1.bias | [2048] | 2048 | True\n", + "decoder.decoders.1.feed_forward.w_2.weight | [2048, 256] | 524288 | True\n", + "decoder.decoders.1.feed_forward.w_2.bias | [256] | 256 | True\n", + "decoder.decoders.1.norm1.weight | [256] | 256 | True\n", + "decoder.decoders.1.norm1.bias | [256] | 256 | True\n", + "decoder.decoders.1.norm2.weight | [256] | 256 | True\n", + "decoder.decoders.1.norm2.bias | [256] | 256 | True\n", + "decoder.decoders.1.norm3.weight | [256] | 256 | True\n", + "decoder.decoders.1.norm3.bias | [256] | 256 | True\n", + "decoder.decoders.1.concat_linear1.weight | [512, 256] | 131072 | True\n", + "decoder.decoders.1.concat_linear1.bias | [256] | 256 | True\n", + "decoder.decoders.1.concat_linear2.weight | [512, 256] | 131072 | True\n", + "decoder.decoders.1.concat_linear2.bias | [256] | 256 | True\n", + "decoder.decoders.2.self_attn.linear_q.weight | [256, 256] | 65536 | True\n", + "decoder.decoders.2.self_attn.linear_q.bias | [256] | 256 | True\n", + "decoder.decoders.2.self_attn.linear_k.weight | [256, 256] | 65536 | True\n", + "decoder.decoders.2.self_attn.linear_k.bias | [256] | 256 | True\n", + "decoder.decoders.2.self_attn.linear_v.weight | [256, 256] | 65536 | True\n", + "decoder.decoders.2.self_attn.linear_v.bias | [256] | 256 | True\n", + "decoder.decoders.2.self_attn.linear_out.weight | [256, 256] | 65536 | True\n", + "decoder.decoders.2.self_attn.linear_out.bias | [256] | 256 | True\n", + "decoder.decoders.2.src_attn.linear_q.weight | [256, 256] | 65536 | True\n", + "decoder.decoders.2.src_attn.linear_q.bias | [256] | 256 | True\n", + "decoder.decoders.2.src_attn.linear_k.weight | [256, 256] | 65536 | True\n", + "decoder.decoders.2.src_attn.linear_k.bias | [256] | 256 | True\n", + "decoder.decoders.2.src_attn.linear_v.weight | [256, 256] | 65536 | True\n", + "decoder.decoders.2.src_attn.linear_v.bias | [256] | 256 | True\n", + "decoder.decoders.2.src_attn.linear_out.weight | [256, 256] | 65536 | True\n", + "decoder.decoders.2.src_attn.linear_out.bias | [256] | 256 | True\n", + "decoder.decoders.2.feed_forward.w_1.weight | [256, 2048] | 524288 | True\n", + "decoder.decoders.2.feed_forward.w_1.bias | [2048] | 2048 | True\n", + "decoder.decoders.2.feed_forward.w_2.weight | [2048, 256] | 524288 | True\n", + "decoder.decoders.2.feed_forward.w_2.bias | [256] | 256 | True\n", + "decoder.decoders.2.norm1.weight | [256] | 256 | True\n", + "decoder.decoders.2.norm1.bias | [256] | 256 | True\n", + "decoder.decoders.2.norm2.weight | [256] | 256 | True\n", + "decoder.decoders.2.norm2.bias | [256] | 256 | True\n", + "decoder.decoders.2.norm3.weight | [256] | 256 | True\n", + "decoder.decoders.2.norm3.bias | [256] | 256 | True\n", + "decoder.decoders.2.concat_linear1.weight | [512, 256] | 131072 | True\n", + "decoder.decoders.2.concat_linear1.bias | [256] | 256 | True\n", + "decoder.decoders.2.concat_linear2.weight | [512, 256] | 131072 | True\n", + "decoder.decoders.2.concat_linear2.bias | [256] | 256 | True\n", + "decoder.decoders.3.self_attn.linear_q.weight | [256, 256] | 65536 | True\n", + "decoder.decoders.3.self_attn.linear_q.bias | [256] | 256 | True\n", + "decoder.decoders.3.self_attn.linear_k.weight | [256, 256] | 65536 | True\n", + "decoder.decoders.3.self_attn.linear_k.bias | [256] | 256 | True\n", + "decoder.decoders.3.self_attn.linear_v.weight | [256, 256] | 65536 | True\n", + "decoder.decoders.3.self_attn.linear_v.bias | [256] | 256 | True\n", + "decoder.decoders.3.self_attn.linear_out.weight | [256, 256] | 65536 | True\n", + "decoder.decoders.3.self_attn.linear_out.bias | [256] | 256 | True\n", + "decoder.decoders.3.src_attn.linear_q.weight | [256, 256] | 65536 | True\n", + "decoder.decoders.3.src_attn.linear_q.bias | [256] | 256 | True\n", + "decoder.decoders.3.src_attn.linear_k.weight | [256, 256] | 65536 | True\n", + "decoder.decoders.3.src_attn.linear_k.bias | [256] | 256 | True\n", + "decoder.decoders.3.src_attn.linear_v.weight | [256, 256] | 65536 | True\n", + "decoder.decoders.3.src_attn.linear_v.bias | [256] | 256 | True\n", + "decoder.decoders.3.src_attn.linear_out.weight | [256, 256] | 65536 | True\n", + "decoder.decoders.3.src_attn.linear_out.bias | [256] | 256 | True\n", + "decoder.decoders.3.feed_forward.w_1.weight | [256, 2048] | 524288 | True\n", + "decoder.decoders.3.feed_forward.w_1.bias | [2048] | 2048 | True\n", + "decoder.decoders.3.feed_forward.w_2.weight | [2048, 256] | 524288 | True\n", + "decoder.decoders.3.feed_forward.w_2.bias | [256] | 256 | True\n", + "decoder.decoders.3.norm1.weight | [256] | 256 | True\n", + "decoder.decoders.3.norm1.bias | [256] | 256 | True\n", + "decoder.decoders.3.norm2.weight | [256] | 256 | True\n", + "decoder.decoders.3.norm2.bias | [256] | 256 | True\n", + "decoder.decoders.3.norm3.weight | [256] | 256 | True\n", + "decoder.decoders.3.norm3.bias | [256] | 256 | True\n", + "decoder.decoders.3.concat_linear1.weight | [512, 256] | 131072 | True\n", + "decoder.decoders.3.concat_linear1.bias | [256] | 256 | True\n", + "decoder.decoders.3.concat_linear2.weight | [512, 256] | 131072 | True\n", + "decoder.decoders.3.concat_linear2.bias | [256] | 256 | True\n", + "decoder.decoders.4.self_attn.linear_q.weight | [256, 256] | 65536 | True\n", + "decoder.decoders.4.self_attn.linear_q.bias | [256] | 256 | True\n", + "decoder.decoders.4.self_attn.linear_k.weight | [256, 256] | 65536 | True\n", + "decoder.decoders.4.self_attn.linear_k.bias | [256] | 256 | True\n", + "decoder.decoders.4.self_attn.linear_v.weight | [256, 256] | 65536 | True\n", + "decoder.decoders.4.self_attn.linear_v.bias | [256] | 256 | True\n", + "decoder.decoders.4.self_attn.linear_out.weight | [256, 256] | 65536 | True\n", + "decoder.decoders.4.self_attn.linear_out.bias | [256] | 256 | True\n", + "decoder.decoders.4.src_attn.linear_q.weight | [256, 256] | 65536 | True\n", + "decoder.decoders.4.src_attn.linear_q.bias | [256] | 256 | True\n", + "decoder.decoders.4.src_attn.linear_k.weight | [256, 256] | 65536 | True\n", + "decoder.decoders.4.src_attn.linear_k.bias | [256] | 256 | True\n", + "decoder.decoders.4.src_attn.linear_v.weight | [256, 256] | 65536 | True\n", + "decoder.decoders.4.src_attn.linear_v.bias | [256] | 256 | True\n", + "decoder.decoders.4.src_attn.linear_out.weight | [256, 256] | 65536 | True\n", + "decoder.decoders.4.src_attn.linear_out.bias | [256] | 256 | True\n", + "decoder.decoders.4.feed_forward.w_1.weight | [256, 2048] | 524288 | True\n", + "decoder.decoders.4.feed_forward.w_1.bias | [2048] | 2048 | True\n", + "decoder.decoders.4.feed_forward.w_2.weight | [2048, 256] | 524288 | True\n", + "decoder.decoders.4.feed_forward.w_2.bias | [256] | 256 | True\n", + "decoder.decoders.4.norm1.weight | [256] | 256 | True\n", + "decoder.decoders.4.norm1.bias | [256] | 256 | True\n", + "decoder.decoders.4.norm2.weight | [256] | 256 | True\n", + "decoder.decoders.4.norm2.bias | [256] | 256 | True\n", + "decoder.decoders.4.norm3.weight | [256] | 256 | True\n", + "decoder.decoders.4.norm3.bias | [256] | 256 | True\n", + "decoder.decoders.4.concat_linear1.weight | [512, 256] | 131072 | True\n", + "decoder.decoders.4.concat_linear1.bias | [256] | 256 | True\n", + "decoder.decoders.4.concat_linear2.weight | [512, 256] | 131072 | True\n", + "decoder.decoders.4.concat_linear2.bias | [256] | 256 | True\n", + "decoder.decoders.5.self_attn.linear_q.weight | [256, 256] | 65536 | True\n", + "decoder.decoders.5.self_attn.linear_q.bias | [256] | 256 | True\n", + "decoder.decoders.5.self_attn.linear_k.weight | [256, 256] | 65536 | True\n", + "decoder.decoders.5.self_attn.linear_k.bias | [256] | 256 | True\n", + "decoder.decoders.5.self_attn.linear_v.weight | [256, 256] | 65536 | True\n", + "decoder.decoders.5.self_attn.linear_v.bias | [256] | 256 | True\n", + "decoder.decoders.5.self_attn.linear_out.weight | [256, 256] | 65536 | True\n", + "decoder.decoders.5.self_attn.linear_out.bias | [256] | 256 | True\n", + "decoder.decoders.5.src_attn.linear_q.weight | [256, 256] | 65536 | True\n", + "decoder.decoders.5.src_attn.linear_q.bias | [256] | 256 | True\n", + "decoder.decoders.5.src_attn.linear_k.weight | [256, 256] | 65536 | True\n", + "decoder.decoders.5.src_attn.linear_k.bias | [256] | 256 | True\n", + "decoder.decoders.5.src_attn.linear_v.weight | [256, 256] | 65536 | True\n", + "decoder.decoders.5.src_attn.linear_v.bias | [256] | 256 | True\n", + "decoder.decoders.5.src_attn.linear_out.weight | [256, 256] | 65536 | True\n", + "decoder.decoders.5.src_attn.linear_out.bias | [256] | 256 | True\n", + "decoder.decoders.5.feed_forward.w_1.weight | [256, 2048] | 524288 | True\n", + "decoder.decoders.5.feed_forward.w_1.bias | [2048] | 2048 | True\n", + "decoder.decoders.5.feed_forward.w_2.weight | [2048, 256] | 524288 | True\n", + "decoder.decoders.5.feed_forward.w_2.bias | [256] | 256 | True\n", + "decoder.decoders.5.norm1.weight | [256] | 256 | True\n", + "decoder.decoders.5.norm1.bias | [256] | 256 | True\n", + "decoder.decoders.5.norm2.weight | [256] | 256 | True\n", + "decoder.decoders.5.norm2.bias | [256] | 256 | True\n", + "decoder.decoders.5.norm3.weight | [256] | 256 | True\n", + "decoder.decoders.5.norm3.bias | [256] | 256 | True\n", + "decoder.decoders.5.concat_linear1.weight | [512, 256] | 131072 | True\n", + "decoder.decoders.5.concat_linear1.bias | [256] | 256 | True\n", + "decoder.decoders.5.concat_linear2.weight | [512, 256] | 131072 | True\n", + "decoder.decoders.5.concat_linear2.bias | [256] | 256 | True\n", + "ctc.ctc_lo.weight | [256, 4223] | 1081088 | True\n", + "ctc.ctc_lo.bias | [4223] | 4223 | True\n", + "Total parameters: 687.0, 49347582.0 elements.\n" + ] + } + ], + "source": [ + "conf_str='examples/aishell/s1/conf/conformer.yaml'\n", + "cfg = CN().load_cfg(open(conf_str))\n", + "cfg.model.input_dim = 80\n", + "cfg.model.output_dim = 4223\n", + "cfg.model.cmvn_file = \"/workspace/wenet/examples/aishell/s0/raw_wav/train/global_cmvn\"\n", + "cfg.model.cmvn_file_type = 'json'\n", + "cfg.freeze()\n", + "\n", + "model = U2Model(cfg.model)\n", + "print_params(model)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "reserved-nightlife", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "encoder.global_cmvn.mean | [80] | 80\n", + "encoder.global_cmvn.istd | [80] | 80\n", + "encoder.embed.conv.0.weight | [256, 1, 3, 3] | 2304\n", + "encoder.embed.conv.0.bias | [256] | 256\n", + "encoder.embed.conv.2.weight | [256, 256, 3, 3] | 589824\n", + "encoder.embed.conv.2.bias | [256] | 256\n", + "encoder.embed.linear.weight | [4864, 256] | 1245184\n", + "encoder.embed.linear.bias | [256] | 256\n", + "encoder.after_norm.weight | [256] | 256\n", + "encoder.after_norm.bias | [256] | 256\n", + "encoder.encoders.0.self_attn.pos_bias_u | [4, 64] | 256\n", + "encoder.encoders.0.self_attn.pos_bias_v | [4, 64] | 256\n", + "encoder.encoders.0.self_attn.linear_q.weight | [256, 256] | 65536\n", + "encoder.encoders.0.self_attn.linear_q.bias | [256] | 256\n", + "encoder.encoders.0.self_attn.linear_k.weight | [256, 256] | 65536\n", + "encoder.encoders.0.self_attn.linear_k.bias | [256] | 256\n", + "encoder.encoders.0.self_attn.linear_v.weight | [256, 256] | 65536\n", + "encoder.encoders.0.self_attn.linear_v.bias | [256] | 256\n", + "encoder.encoders.0.self_attn.linear_out.weight | [256, 256] | 65536\n", + "encoder.encoders.0.self_attn.linear_out.bias | [256] | 256\n", + "encoder.encoders.0.self_attn.linear_pos.weight | [256, 256] | 65536\n", + "encoder.encoders.0.feed_forward.w_1.weight | [256, 2048] | 524288\n", + "encoder.encoders.0.feed_forward.w_1.bias | [2048] | 2048\n", + "encoder.encoders.0.feed_forward.w_2.weight | [2048, 256] | 524288\n", + "encoder.encoders.0.feed_forward.w_2.bias | [256] | 256\n", + "encoder.encoders.0.feed_forward_macaron.w_1.weight | [256, 2048] | 524288\n", + "encoder.encoders.0.feed_forward_macaron.w_1.bias | [2048] | 2048\n", + "encoder.encoders.0.feed_forward_macaron.w_2.weight | [2048, 256] | 524288\n", + "encoder.encoders.0.feed_forward_macaron.w_2.bias | [256] | 256\n", + "encoder.encoders.0.conv_module.pointwise_conv1.weight | [512, 256, 1] | 131072\n", + "encoder.encoders.0.conv_module.pointwise_conv1.bias | [512] | 512\n", + "encoder.encoders.0.conv_module.depthwise_conv.weight | [256, 1, 15] | 3840\n", + "encoder.encoders.0.conv_module.depthwise_conv.bias | [256] | 256\n", + "encoder.encoders.0.conv_module.norm.weight | [256] | 256\n", + "encoder.encoders.0.conv_module.norm.bias | [256] | 256\n", + "encoder.encoders.0.conv_module.norm._mean | [256] | 256\n", + "encoder.encoders.0.conv_module.norm._variance | [256] | 256\n", + "encoder.encoders.0.conv_module.pointwise_conv2.weight | [256, 256, 1] | 65536\n", + "encoder.encoders.0.conv_module.pointwise_conv2.bias | [256] | 256\n", + "encoder.encoders.0.norm_ff.weight | [256] | 256\n", + "encoder.encoders.0.norm_ff.bias | [256] | 256\n", + "encoder.encoders.0.norm_mha.weight | [256] | 256\n", + "encoder.encoders.0.norm_mha.bias | [256] | 256\n", + "encoder.encoders.0.norm_ff_macaron.weight | [256] | 256\n", + "encoder.encoders.0.norm_ff_macaron.bias | [256] | 256\n", + "encoder.encoders.0.norm_conv.weight | [256] | 256\n", + "encoder.encoders.0.norm_conv.bias | [256] | 256\n", + "encoder.encoders.0.norm_final.weight | [256] | 256\n", + "encoder.encoders.0.norm_final.bias | [256] | 256\n", + "encoder.encoders.0.concat_linear.weight | [512, 256] | 131072\n", + "encoder.encoders.0.concat_linear.bias | [256] | 256\n", + "encoder.encoders.1.self_attn.pos_bias_u | [4, 64] | 256\n", + "encoder.encoders.1.self_attn.pos_bias_v | [4, 64] | 256\n", + "encoder.encoders.1.self_attn.linear_q.weight | [256, 256] | 65536\n", + "encoder.encoders.1.self_attn.linear_q.bias | [256] | 256\n", + "encoder.encoders.1.self_attn.linear_k.weight | [256, 256] | 65536\n", + "encoder.encoders.1.self_attn.linear_k.bias | [256] | 256\n", + "encoder.encoders.1.self_attn.linear_v.weight | [256, 256] | 65536\n", + "encoder.encoders.1.self_attn.linear_v.bias | [256] | 256\n", + "encoder.encoders.1.self_attn.linear_out.weight | [256, 256] | 65536\n", + "encoder.encoders.1.self_attn.linear_out.bias | [256] | 256\n", + "encoder.encoders.1.self_attn.linear_pos.weight | [256, 256] | 65536\n", + "encoder.encoders.1.feed_forward.w_1.weight | [256, 2048] | 524288\n", + "encoder.encoders.1.feed_forward.w_1.bias | [2048] | 2048\n", + "encoder.encoders.1.feed_forward.w_2.weight | [2048, 256] | 524288\n", + "encoder.encoders.1.feed_forward.w_2.bias | [256] | 256\n", + "encoder.encoders.1.feed_forward_macaron.w_1.weight | [256, 2048] | 524288\n", + "encoder.encoders.1.feed_forward_macaron.w_1.bias | [2048] | 2048\n", + "encoder.encoders.1.feed_forward_macaron.w_2.weight | [2048, 256] | 524288\n", + "encoder.encoders.1.feed_forward_macaron.w_2.bias | [256] | 256\n", + "encoder.encoders.1.conv_module.pointwise_conv1.weight | [512, 256, 1] | 131072\n", + "encoder.encoders.1.conv_module.pointwise_conv1.bias | [512] | 512\n", + "encoder.encoders.1.conv_module.depthwise_conv.weight | [256, 1, 15] | 3840\n", + "encoder.encoders.1.conv_module.depthwise_conv.bias | [256] | 256\n", + "encoder.encoders.1.conv_module.norm.weight | [256] | 256\n", + "encoder.encoders.1.conv_module.norm.bias | [256] | 256\n", + "encoder.encoders.1.conv_module.norm._mean | [256] | 256\n", + "encoder.encoders.1.conv_module.norm._variance | [256] | 256\n", + "encoder.encoders.1.conv_module.pointwise_conv2.weight | [256, 256, 1] | 65536\n", + "encoder.encoders.1.conv_module.pointwise_conv2.bias | [256] | 256\n", + "encoder.encoders.1.norm_ff.weight | [256] | 256\n", + "encoder.encoders.1.norm_ff.bias | [256] | 256\n", + "encoder.encoders.1.norm_mha.weight | [256] | 256\n", + "encoder.encoders.1.norm_mha.bias | [256] | 256\n", + "encoder.encoders.1.norm_ff_macaron.weight | [256] | 256\n", + "encoder.encoders.1.norm_ff_macaron.bias | [256] | 256\n", + "encoder.encoders.1.norm_conv.weight | [256] | 256\n", + "encoder.encoders.1.norm_conv.bias | [256] | 256\n", + "encoder.encoders.1.norm_final.weight | [256] | 256\n", + "encoder.encoders.1.norm_final.bias | [256] | 256\n", + "encoder.encoders.1.concat_linear.weight | [512, 256] | 131072\n", + "encoder.encoders.1.concat_linear.bias | [256] | 256\n", + "encoder.encoders.2.self_attn.pos_bias_u | [4, 64] | 256\n", + "encoder.encoders.2.self_attn.pos_bias_v | [4, 64] | 256\n", + "encoder.encoders.2.self_attn.linear_q.weight | [256, 256] | 65536\n", + "encoder.encoders.2.self_attn.linear_q.bias | [256] | 256\n", + "encoder.encoders.2.self_attn.linear_k.weight | [256, 256] | 65536\n", + "encoder.encoders.2.self_attn.linear_k.bias | [256] | 256\n", + "encoder.encoders.2.self_attn.linear_v.weight | [256, 256] | 65536\n", + "encoder.encoders.2.self_attn.linear_v.bias | [256] | 256\n", + "encoder.encoders.2.self_attn.linear_out.weight | [256, 256] | 65536\n", + "encoder.encoders.2.self_attn.linear_out.bias | [256] | 256\n", + "encoder.encoders.2.self_attn.linear_pos.weight | [256, 256] | 65536\n", + "encoder.encoders.2.feed_forward.w_1.weight | [256, 2048] | 524288\n", + "encoder.encoders.2.feed_forward.w_1.bias | [2048] | 2048\n", + "encoder.encoders.2.feed_forward.w_2.weight | [2048, 256] | 524288\n", + "encoder.encoders.2.feed_forward.w_2.bias | [256] | 256\n", + "encoder.encoders.2.feed_forward_macaron.w_1.weight | [256, 2048] | 524288\n", + "encoder.encoders.2.feed_forward_macaron.w_1.bias | [2048] | 2048\n", + "encoder.encoders.2.feed_forward_macaron.w_2.weight | [2048, 256] | 524288\n", + "encoder.encoders.2.feed_forward_macaron.w_2.bias | [256] | 256\n", + "encoder.encoders.2.conv_module.pointwise_conv1.weight | [512, 256, 1] | 131072\n", + "encoder.encoders.2.conv_module.pointwise_conv1.bias | [512] | 512\n", + "encoder.encoders.2.conv_module.depthwise_conv.weight | [256, 1, 15] | 3840\n", + "encoder.encoders.2.conv_module.depthwise_conv.bias | [256] | 256\n", + "encoder.encoders.2.conv_module.norm.weight | [256] | 256\n", + "encoder.encoders.2.conv_module.norm.bias | [256] | 256\n", + "encoder.encoders.2.conv_module.norm._mean | [256] | 256\n", + "encoder.encoders.2.conv_module.norm._variance | [256] | 256\n", + "encoder.encoders.2.conv_module.pointwise_conv2.weight | [256, 256, 1] | 65536\n", + "encoder.encoders.2.conv_module.pointwise_conv2.bias | [256] | 256\n", + "encoder.encoders.2.norm_ff.weight | [256] | 256\n", + "encoder.encoders.2.norm_ff.bias | [256] | 256\n", + "encoder.encoders.2.norm_mha.weight | [256] | 256\n", + "encoder.encoders.2.norm_mha.bias | [256] | 256\n", + "encoder.encoders.2.norm_ff_macaron.weight | [256] | 256\n", + "encoder.encoders.2.norm_ff_macaron.bias | [256] | 256\n", + "encoder.encoders.2.norm_conv.weight | [256] | 256\n", + "encoder.encoders.2.norm_conv.bias | [256] | 256\n", + "encoder.encoders.2.norm_final.weight | [256] | 256\n", + "encoder.encoders.2.norm_final.bias | [256] | 256\n", + "encoder.encoders.2.concat_linear.weight | [512, 256] | 131072\n", + "encoder.encoders.2.concat_linear.bias | [256] | 256\n", + "encoder.encoders.3.self_attn.pos_bias_u | [4, 64] | 256\n", + "encoder.encoders.3.self_attn.pos_bias_v | [4, 64] | 256\n", + "encoder.encoders.3.self_attn.linear_q.weight | [256, 256] | 65536\n", + "encoder.encoders.3.self_attn.linear_q.bias | [256] | 256\n", + "encoder.encoders.3.self_attn.linear_k.weight | [256, 256] | 65536\n", + "encoder.encoders.3.self_attn.linear_k.bias | [256] | 256\n", + "encoder.encoders.3.self_attn.linear_v.weight | [256, 256] | 65536\n", + "encoder.encoders.3.self_attn.linear_v.bias | [256] | 256\n", + "encoder.encoders.3.self_attn.linear_out.weight | [256, 256] | 65536\n", + "encoder.encoders.3.self_attn.linear_out.bias | [256] | 256\n", + "encoder.encoders.3.self_attn.linear_pos.weight | [256, 256] | 65536\n", + "encoder.encoders.3.feed_forward.w_1.weight | [256, 2048] | 524288\n", + "encoder.encoders.3.feed_forward.w_1.bias | [2048] | 2048\n", + "encoder.encoders.3.feed_forward.w_2.weight | [2048, 256] | 524288\n", + "encoder.encoders.3.feed_forward.w_2.bias | [256] | 256\n", + "encoder.encoders.3.feed_forward_macaron.w_1.weight | [256, 2048] | 524288\n", + "encoder.encoders.3.feed_forward_macaron.w_1.bias | [2048] | 2048\n", + "encoder.encoders.3.feed_forward_macaron.w_2.weight | [2048, 256] | 524288\n", + "encoder.encoders.3.feed_forward_macaron.w_2.bias | [256] | 256\n", + "encoder.encoders.3.conv_module.pointwise_conv1.weight | [512, 256, 1] | 131072\n", + "encoder.encoders.3.conv_module.pointwise_conv1.bias | [512] | 512\n", + "encoder.encoders.3.conv_module.depthwise_conv.weight | [256, 1, 15] | 3840\n", + "encoder.encoders.3.conv_module.depthwise_conv.bias | [256] | 256\n", + "encoder.encoders.3.conv_module.norm.weight | [256] | 256\n", + "encoder.encoders.3.conv_module.norm.bias | [256] | 256\n", + "encoder.encoders.3.conv_module.norm._mean | [256] | 256\n", + "encoder.encoders.3.conv_module.norm._variance | [256] | 256\n", + "encoder.encoders.3.conv_module.pointwise_conv2.weight | [256, 256, 1] | 65536\n", + "encoder.encoders.3.conv_module.pointwise_conv2.bias | [256] | 256\n", + "encoder.encoders.3.norm_ff.weight | [256] | 256\n", + "encoder.encoders.3.norm_ff.bias | [256] | 256\n", + "encoder.encoders.3.norm_mha.weight | [256] | 256\n", + "encoder.encoders.3.norm_mha.bias | [256] | 256\n", + "encoder.encoders.3.norm_ff_macaron.weight | [256] | 256\n", + "encoder.encoders.3.norm_ff_macaron.bias | [256] | 256\n", + "encoder.encoders.3.norm_conv.weight | [256] | 256\n", + "encoder.encoders.3.norm_conv.bias | [256] | 256\n", + "encoder.encoders.3.norm_final.weight | [256] | 256\n", + "encoder.encoders.3.norm_final.bias | [256] | 256\n", + "encoder.encoders.3.concat_linear.weight | [512, 256] | 131072\n", + "encoder.encoders.3.concat_linear.bias | [256] | 256\n", + "encoder.encoders.4.self_attn.pos_bias_u | [4, 64] | 256\n", + "encoder.encoders.4.self_attn.pos_bias_v | [4, 64] | 256\n", + "encoder.encoders.4.self_attn.linear_q.weight | [256, 256] | 65536\n", + "encoder.encoders.4.self_attn.linear_q.bias | [256] | 256\n", + "encoder.encoders.4.self_attn.linear_k.weight | [256, 256] | 65536\n", + "encoder.encoders.4.self_attn.linear_k.bias | [256] | 256\n", + "encoder.encoders.4.self_attn.linear_v.weight | [256, 256] | 65536\n", + "encoder.encoders.4.self_attn.linear_v.bias | [256] | 256\n", + "encoder.encoders.4.self_attn.linear_out.weight | [256, 256] | 65536\n", + "encoder.encoders.4.self_attn.linear_out.bias | [256] | 256\n", + "encoder.encoders.4.self_attn.linear_pos.weight | [256, 256] | 65536\n", + "encoder.encoders.4.feed_forward.w_1.weight | [256, 2048] | 524288\n", + "encoder.encoders.4.feed_forward.w_1.bias | [2048] | 2048\n", + "encoder.encoders.4.feed_forward.w_2.weight | [2048, 256] | 524288\n", + "encoder.encoders.4.feed_forward.w_2.bias | [256] | 256\n", + "encoder.encoders.4.feed_forward_macaron.w_1.weight | [256, 2048] | 524288\n", + "encoder.encoders.4.feed_forward_macaron.w_1.bias | [2048] | 2048\n", + "encoder.encoders.4.feed_forward_macaron.w_2.weight | [2048, 256] | 524288\n", + "encoder.encoders.4.feed_forward_macaron.w_2.bias | [256] | 256\n", + "encoder.encoders.4.conv_module.pointwise_conv1.weight | [512, 256, 1] | 131072\n", + "encoder.encoders.4.conv_module.pointwise_conv1.bias | [512] | 512\n", + "encoder.encoders.4.conv_module.depthwise_conv.weight | [256, 1, 15] | 3840\n", + "encoder.encoders.4.conv_module.depthwise_conv.bias | [256] | 256\n", + "encoder.encoders.4.conv_module.norm.weight | [256] | 256\n", + "encoder.encoders.4.conv_module.norm.bias | [256] | 256\n", + "encoder.encoders.4.conv_module.norm._mean | [256] | 256\n", + "encoder.encoders.4.conv_module.norm._variance | [256] | 256\n", + "encoder.encoders.4.conv_module.pointwise_conv2.weight | [256, 256, 1] | 65536\n", + "encoder.encoders.4.conv_module.pointwise_conv2.bias | [256] | 256\n", + "encoder.encoders.4.norm_ff.weight | [256] | 256\n", + "encoder.encoders.4.norm_ff.bias | [256] | 256\n", + "encoder.encoders.4.norm_mha.weight | [256] | 256\n", + "encoder.encoders.4.norm_mha.bias | [256] | 256\n", + "encoder.encoders.4.norm_ff_macaron.weight | [256] | 256\n", + "encoder.encoders.4.norm_ff_macaron.bias | [256] | 256\n", + "encoder.encoders.4.norm_conv.weight | [256] | 256\n", + "encoder.encoders.4.norm_conv.bias | [256] | 256\n", + "encoder.encoders.4.norm_final.weight | [256] | 256\n", + "encoder.encoders.4.norm_final.bias | [256] | 256\n", + "encoder.encoders.4.concat_linear.weight | [512, 256] | 131072\n", + "encoder.encoders.4.concat_linear.bias | [256] | 256\n", + "encoder.encoders.5.self_attn.pos_bias_u | [4, 64] | 256\n", + "encoder.encoders.5.self_attn.pos_bias_v | [4, 64] | 256\n", + "encoder.encoders.5.self_attn.linear_q.weight | [256, 256] | 65536\n", + "encoder.encoders.5.self_attn.linear_q.bias | [256] | 256\n", + "encoder.encoders.5.self_attn.linear_k.weight | [256, 256] | 65536\n", + "encoder.encoders.5.self_attn.linear_k.bias | [256] | 256\n", + "encoder.encoders.5.self_attn.linear_v.weight | [256, 256] | 65536\n", + "encoder.encoders.5.self_attn.linear_v.bias | [256] | 256\n", + "encoder.encoders.5.self_attn.linear_out.weight | [256, 256] | 65536\n", + "encoder.encoders.5.self_attn.linear_out.bias | [256] | 256\n", + "encoder.encoders.5.self_attn.linear_pos.weight | [256, 256] | 65536\n", + "encoder.encoders.5.feed_forward.w_1.weight | [256, 2048] | 524288\n", + "encoder.encoders.5.feed_forward.w_1.bias | [2048] | 2048\n", + "encoder.encoders.5.feed_forward.w_2.weight | [2048, 256] | 524288\n", + "encoder.encoders.5.feed_forward.w_2.bias | [256] | 256\n", + "encoder.encoders.5.feed_forward_macaron.w_1.weight | [256, 2048] | 524288\n", + "encoder.encoders.5.feed_forward_macaron.w_1.bias | [2048] | 2048\n", + "encoder.encoders.5.feed_forward_macaron.w_2.weight | [2048, 256] | 524288\n", + "encoder.encoders.5.feed_forward_macaron.w_2.bias | [256] | 256\n", + "encoder.encoders.5.conv_module.pointwise_conv1.weight | [512, 256, 1] | 131072\n", + "encoder.encoders.5.conv_module.pointwise_conv1.bias | [512] | 512\n", + "encoder.encoders.5.conv_module.depthwise_conv.weight | [256, 1, 15] | 3840\n", + "encoder.encoders.5.conv_module.depthwise_conv.bias | [256] | 256\n", + "encoder.encoders.5.conv_module.norm.weight | [256] | 256\n", + "encoder.encoders.5.conv_module.norm.bias | [256] | 256\n", + "encoder.encoders.5.conv_module.norm._mean | [256] | 256\n", + "encoder.encoders.5.conv_module.norm._variance | [256] | 256\n", + "encoder.encoders.5.conv_module.pointwise_conv2.weight | [256, 256, 1] | 65536\n", + "encoder.encoders.5.conv_module.pointwise_conv2.bias | [256] | 256\n", + "encoder.encoders.5.norm_ff.weight | [256] | 256\n", + "encoder.encoders.5.norm_ff.bias | [256] | 256\n", + "encoder.encoders.5.norm_mha.weight | [256] | 256\n", + "encoder.encoders.5.norm_mha.bias | [256] | 256\n", + "encoder.encoders.5.norm_ff_macaron.weight | [256] | 256\n", + "encoder.encoders.5.norm_ff_macaron.bias | [256] | 256\n", + "encoder.encoders.5.norm_conv.weight | [256] | 256\n", + "encoder.encoders.5.norm_conv.bias | [256] | 256\n", + "encoder.encoders.5.norm_final.weight | [256] | 256\n", + "encoder.encoders.5.norm_final.bias | [256] | 256\n", + "encoder.encoders.5.concat_linear.weight | [512, 256] | 131072\n", + "encoder.encoders.5.concat_linear.bias | [256] | 256\n", + "encoder.encoders.6.self_attn.pos_bias_u | [4, 64] | 256\n", + "encoder.encoders.6.self_attn.pos_bias_v | [4, 64] | 256\n", + "encoder.encoders.6.self_attn.linear_q.weight | [256, 256] | 65536\n", + "encoder.encoders.6.self_attn.linear_q.bias | [256] | 256\n", + "encoder.encoders.6.self_attn.linear_k.weight | [256, 256] | 65536\n", + "encoder.encoders.6.self_attn.linear_k.bias | [256] | 256\n", + "encoder.encoders.6.self_attn.linear_v.weight | [256, 256] | 65536\n", + "encoder.encoders.6.self_attn.linear_v.bias | [256] | 256\n", + "encoder.encoders.6.self_attn.linear_out.weight | [256, 256] | 65536\n", + "encoder.encoders.6.self_attn.linear_out.bias | [256] | 256\n", + "encoder.encoders.6.self_attn.linear_pos.weight | [256, 256] | 65536\n", + "encoder.encoders.6.feed_forward.w_1.weight | [256, 2048] | 524288\n", + "encoder.encoders.6.feed_forward.w_1.bias | [2048] | 2048\n", + "encoder.encoders.6.feed_forward.w_2.weight | [2048, 256] | 524288\n", + "encoder.encoders.6.feed_forward.w_2.bias | [256] | 256\n", + "encoder.encoders.6.feed_forward_macaron.w_1.weight | [256, 2048] | 524288\n", + "encoder.encoders.6.feed_forward_macaron.w_1.bias | [2048] | 2048\n", + "encoder.encoders.6.feed_forward_macaron.w_2.weight | [2048, 256] | 524288\n", + "encoder.encoders.6.feed_forward_macaron.w_2.bias | [256] | 256\n", + "encoder.encoders.6.conv_module.pointwise_conv1.weight | [512, 256, 1] | 131072\n", + "encoder.encoders.6.conv_module.pointwise_conv1.bias | [512] | 512\n", + "encoder.encoders.6.conv_module.depthwise_conv.weight | [256, 1, 15] | 3840\n", + "encoder.encoders.6.conv_module.depthwise_conv.bias | [256] | 256\n", + "encoder.encoders.6.conv_module.norm.weight | [256] | 256\n", + "encoder.encoders.6.conv_module.norm.bias | [256] | 256\n", + "encoder.encoders.6.conv_module.norm._mean | [256] | 256\n", + "encoder.encoders.6.conv_module.norm._variance | [256] | 256\n", + "encoder.encoders.6.conv_module.pointwise_conv2.weight | [256, 256, 1] | 65536\n", + "encoder.encoders.6.conv_module.pointwise_conv2.bias | [256] | 256\n", + "encoder.encoders.6.norm_ff.weight | [256] | 256\n", + "encoder.encoders.6.norm_ff.bias | [256] | 256\n", + "encoder.encoders.6.norm_mha.weight | [256] | 256\n", + "encoder.encoders.6.norm_mha.bias | [256] | 256\n", + "encoder.encoders.6.norm_ff_macaron.weight | [256] | 256\n", + "encoder.encoders.6.norm_ff_macaron.bias | [256] | 256\n", + "encoder.encoders.6.norm_conv.weight | [256] | 256\n", + "encoder.encoders.6.norm_conv.bias | [256] | 256\n", + "encoder.encoders.6.norm_final.weight | [256] | 256\n", + "encoder.encoders.6.norm_final.bias | [256] | 256\n", + "encoder.encoders.6.concat_linear.weight | [512, 256] | 131072\n", + "encoder.encoders.6.concat_linear.bias | [256] | 256\n", + "encoder.encoders.7.self_attn.pos_bias_u | [4, 64] | 256\n", + "encoder.encoders.7.self_attn.pos_bias_v | [4, 64] | 256\n", + "encoder.encoders.7.self_attn.linear_q.weight | [256, 256] | 65536\n", + "encoder.encoders.7.self_attn.linear_q.bias | [256] | 256\n", + "encoder.encoders.7.self_attn.linear_k.weight | [256, 256] | 65536\n", + "encoder.encoders.7.self_attn.linear_k.bias | [256] | 256\n", + "encoder.encoders.7.self_attn.linear_v.weight | [256, 256] | 65536\n", + "encoder.encoders.7.self_attn.linear_v.bias | [256] | 256\n", + "encoder.encoders.7.self_attn.linear_out.weight | [256, 256] | 65536\n", + "encoder.encoders.7.self_attn.linear_out.bias | [256] | 256\n", + "encoder.encoders.7.self_attn.linear_pos.weight | [256, 256] | 65536\n", + "encoder.encoders.7.feed_forward.w_1.weight | [256, 2048] | 524288\n", + "encoder.encoders.7.feed_forward.w_1.bias | [2048] | 2048\n", + "encoder.encoders.7.feed_forward.w_2.weight | [2048, 256] | 524288\n", + "encoder.encoders.7.feed_forward.w_2.bias | [256] | 256\n", + "encoder.encoders.7.feed_forward_macaron.w_1.weight | [256, 2048] | 524288\n", + "encoder.encoders.7.feed_forward_macaron.w_1.bias | [2048] | 2048\n", + "encoder.encoders.7.feed_forward_macaron.w_2.weight | [2048, 256] | 524288\n", + "encoder.encoders.7.feed_forward_macaron.w_2.bias | [256] | 256\n", + "encoder.encoders.7.conv_module.pointwise_conv1.weight | [512, 256, 1] | 131072\n", + "encoder.encoders.7.conv_module.pointwise_conv1.bias | [512] | 512\n", + "encoder.encoders.7.conv_module.depthwise_conv.weight | [256, 1, 15] | 3840\n", + "encoder.encoders.7.conv_module.depthwise_conv.bias | [256] | 256\n", + "encoder.encoders.7.conv_module.norm.weight | [256] | 256\n", + "encoder.encoders.7.conv_module.norm.bias | [256] | 256\n", + "encoder.encoders.7.conv_module.norm._mean | [256] | 256\n", + "encoder.encoders.7.conv_module.norm._variance | [256] | 256\n", + "encoder.encoders.7.conv_module.pointwise_conv2.weight | [256, 256, 1] | 65536\n", + "encoder.encoders.7.conv_module.pointwise_conv2.bias | [256] | 256\n", + "encoder.encoders.7.norm_ff.weight | [256] | 256\n", + "encoder.encoders.7.norm_ff.bias | [256] | 256\n", + "encoder.encoders.7.norm_mha.weight | [256] | 256\n", + "encoder.encoders.7.norm_mha.bias | [256] | 256\n", + "encoder.encoders.7.norm_ff_macaron.weight | [256] | 256\n", + "encoder.encoders.7.norm_ff_macaron.bias | [256] | 256\n", + "encoder.encoders.7.norm_conv.weight | [256] | 256\n", + "encoder.encoders.7.norm_conv.bias | [256] | 256\n", + "encoder.encoders.7.norm_final.weight | [256] | 256\n", + "encoder.encoders.7.norm_final.bias | [256] | 256\n", + "encoder.encoders.7.concat_linear.weight | [512, 256] | 131072\n", + "encoder.encoders.7.concat_linear.bias | [256] | 256\n", + "encoder.encoders.8.self_attn.pos_bias_u | [4, 64] | 256\n", + "encoder.encoders.8.self_attn.pos_bias_v | [4, 64] | 256\n", + "encoder.encoders.8.self_attn.linear_q.weight | [256, 256] | 65536\n", + "encoder.encoders.8.self_attn.linear_q.bias | [256] | 256\n", + "encoder.encoders.8.self_attn.linear_k.weight | [256, 256] | 65536\n", + "encoder.encoders.8.self_attn.linear_k.bias | [256] | 256\n", + "encoder.encoders.8.self_attn.linear_v.weight | [256, 256] | 65536\n", + "encoder.encoders.8.self_attn.linear_v.bias | [256] | 256\n", + "encoder.encoders.8.self_attn.linear_out.weight | [256, 256] | 65536\n", + "encoder.encoders.8.self_attn.linear_out.bias | [256] | 256\n", + "encoder.encoders.8.self_attn.linear_pos.weight | [256, 256] | 65536\n", + "encoder.encoders.8.feed_forward.w_1.weight | [256, 2048] | 524288\n", + "encoder.encoders.8.feed_forward.w_1.bias | [2048] | 2048\n", + "encoder.encoders.8.feed_forward.w_2.weight | [2048, 256] | 524288\n", + "encoder.encoders.8.feed_forward.w_2.bias | [256] | 256\n", + "encoder.encoders.8.feed_forward_macaron.w_1.weight | [256, 2048] | 524288\n", + "encoder.encoders.8.feed_forward_macaron.w_1.bias | [2048] | 2048\n", + "encoder.encoders.8.feed_forward_macaron.w_2.weight | [2048, 256] | 524288\n", + "encoder.encoders.8.feed_forward_macaron.w_2.bias | [256] | 256\n", + "encoder.encoders.8.conv_module.pointwise_conv1.weight | [512, 256, 1] | 131072\n", + "encoder.encoders.8.conv_module.pointwise_conv1.bias | [512] | 512\n", + "encoder.encoders.8.conv_module.depthwise_conv.weight | [256, 1, 15] | 3840\n", + "encoder.encoders.8.conv_module.depthwise_conv.bias | [256] | 256\n", + "encoder.encoders.8.conv_module.norm.weight | [256] | 256\n", + "encoder.encoders.8.conv_module.norm.bias | [256] | 256\n", + "encoder.encoders.8.conv_module.norm._mean | [256] | 256\n", + "encoder.encoders.8.conv_module.norm._variance | [256] | 256\n", + "encoder.encoders.8.conv_module.pointwise_conv2.weight | [256, 256, 1] | 65536\n", + "encoder.encoders.8.conv_module.pointwise_conv2.bias | [256] | 256\n", + "encoder.encoders.8.norm_ff.weight | [256] | 256\n", + "encoder.encoders.8.norm_ff.bias | [256] | 256\n", + "encoder.encoders.8.norm_mha.weight | [256] | 256\n", + "encoder.encoders.8.norm_mha.bias | [256] | 256\n", + "encoder.encoders.8.norm_ff_macaron.weight | [256] | 256\n", + "encoder.encoders.8.norm_ff_macaron.bias | [256] | 256\n", + "encoder.encoders.8.norm_conv.weight | [256] | 256\n", + "encoder.encoders.8.norm_conv.bias | [256] | 256\n", + "encoder.encoders.8.norm_final.weight | [256] | 256\n", + "encoder.encoders.8.norm_final.bias | [256] | 256\n", + "encoder.encoders.8.concat_linear.weight | [512, 256] | 131072\n", + "encoder.encoders.8.concat_linear.bias | [256] | 256\n", + "encoder.encoders.9.self_attn.pos_bias_u | [4, 64] | 256\n", + "encoder.encoders.9.self_attn.pos_bias_v | [4, 64] | 256\n", + "encoder.encoders.9.self_attn.linear_q.weight | [256, 256] | 65536\n", + "encoder.encoders.9.self_attn.linear_q.bias | [256] | 256\n", + "encoder.encoders.9.self_attn.linear_k.weight | [256, 256] | 65536\n", + "encoder.encoders.9.self_attn.linear_k.bias | [256] | 256\n", + "encoder.encoders.9.self_attn.linear_v.weight | [256, 256] | 65536\n", + "encoder.encoders.9.self_attn.linear_v.bias | [256] | 256\n", + "encoder.encoders.9.self_attn.linear_out.weight | [256, 256] | 65536\n", + "encoder.encoders.9.self_attn.linear_out.bias | [256] | 256\n", + "encoder.encoders.9.self_attn.linear_pos.weight | [256, 256] | 65536\n", + "encoder.encoders.9.feed_forward.w_1.weight | [256, 2048] | 524288\n", + "encoder.encoders.9.feed_forward.w_1.bias | [2048] | 2048\n", + "encoder.encoders.9.feed_forward.w_2.weight | [2048, 256] | 524288\n", + "encoder.encoders.9.feed_forward.w_2.bias | [256] | 256\n", + "encoder.encoders.9.feed_forward_macaron.w_1.weight | [256, 2048] | 524288\n", + "encoder.encoders.9.feed_forward_macaron.w_1.bias | [2048] | 2048\n", + "encoder.encoders.9.feed_forward_macaron.w_2.weight | [2048, 256] | 524288\n", + "encoder.encoders.9.feed_forward_macaron.w_2.bias | [256] | 256\n", + "encoder.encoders.9.conv_module.pointwise_conv1.weight | [512, 256, 1] | 131072\n", + "encoder.encoders.9.conv_module.pointwise_conv1.bias | [512] | 512\n", + "encoder.encoders.9.conv_module.depthwise_conv.weight | [256, 1, 15] | 3840\n", + "encoder.encoders.9.conv_module.depthwise_conv.bias | [256] | 256\n", + "encoder.encoders.9.conv_module.norm.weight | [256] | 256\n", + "encoder.encoders.9.conv_module.norm.bias | [256] | 256\n", + "encoder.encoders.9.conv_module.norm._mean | [256] | 256\n", + "encoder.encoders.9.conv_module.norm._variance | [256] | 256\n", + "encoder.encoders.9.conv_module.pointwise_conv2.weight | [256, 256, 1] | 65536\n", + "encoder.encoders.9.conv_module.pointwise_conv2.bias | [256] | 256\n", + "encoder.encoders.9.norm_ff.weight | [256] | 256\n", + "encoder.encoders.9.norm_ff.bias | [256] | 256\n", + "encoder.encoders.9.norm_mha.weight | [256] | 256\n", + "encoder.encoders.9.norm_mha.bias | [256] | 256\n", + "encoder.encoders.9.norm_ff_macaron.weight | [256] | 256\n", + "encoder.encoders.9.norm_ff_macaron.bias | [256] | 256\n", + "encoder.encoders.9.norm_conv.weight | [256] | 256\n", + "encoder.encoders.9.norm_conv.bias | [256] | 256\n", + "encoder.encoders.9.norm_final.weight | [256] | 256\n", + "encoder.encoders.9.norm_final.bias | [256] | 256\n", + "encoder.encoders.9.concat_linear.weight | [512, 256] | 131072\n", + "encoder.encoders.9.concat_linear.bias | [256] | 256\n", + "encoder.encoders.10.self_attn.pos_bias_u | [4, 64] | 256\n", + "encoder.encoders.10.self_attn.pos_bias_v | [4, 64] | 256\n", + "encoder.encoders.10.self_attn.linear_q.weight | [256, 256] | 65536\n", + "encoder.encoders.10.self_attn.linear_q.bias | [256] | 256\n", + "encoder.encoders.10.self_attn.linear_k.weight | [256, 256] | 65536\n", + "encoder.encoders.10.self_attn.linear_k.bias | [256] | 256\n", + "encoder.encoders.10.self_attn.linear_v.weight | [256, 256] | 65536\n", + "encoder.encoders.10.self_attn.linear_v.bias | [256] | 256\n", + "encoder.encoders.10.self_attn.linear_out.weight | [256, 256] | 65536\n", + "encoder.encoders.10.self_attn.linear_out.bias | [256] | 256\n", + "encoder.encoders.10.self_attn.linear_pos.weight | [256, 256] | 65536\n", + "encoder.encoders.10.feed_forward.w_1.weight | [256, 2048] | 524288\n", + "encoder.encoders.10.feed_forward.w_1.bias | [2048] | 2048\n", + "encoder.encoders.10.feed_forward.w_2.weight | [2048, 256] | 524288\n", + "encoder.encoders.10.feed_forward.w_2.bias | [256] | 256\n", + "encoder.encoders.10.feed_forward_macaron.w_1.weight | [256, 2048] | 524288\n", + "encoder.encoders.10.feed_forward_macaron.w_1.bias | [2048] | 2048\n", + "encoder.encoders.10.feed_forward_macaron.w_2.weight | [2048, 256] | 524288\n", + "encoder.encoders.10.feed_forward_macaron.w_2.bias | [256] | 256\n", + "encoder.encoders.10.conv_module.pointwise_conv1.weight | [512, 256, 1] | 131072\n", + "encoder.encoders.10.conv_module.pointwise_conv1.bias | [512] | 512\n", + "encoder.encoders.10.conv_module.depthwise_conv.weight | [256, 1, 15] | 3840\n", + "encoder.encoders.10.conv_module.depthwise_conv.bias | [256] | 256\n", + "encoder.encoders.10.conv_module.norm.weight | [256] | 256\n", + "encoder.encoders.10.conv_module.norm.bias | [256] | 256\n", + "encoder.encoders.10.conv_module.norm._mean | [256] | 256\n", + "encoder.encoders.10.conv_module.norm._variance | [256] | 256\n", + "encoder.encoders.10.conv_module.pointwise_conv2.weight | [256, 256, 1] | 65536\n", + "encoder.encoders.10.conv_module.pointwise_conv2.bias | [256] | 256\n", + "encoder.encoders.10.norm_ff.weight | [256] | 256\n", + "encoder.encoders.10.norm_ff.bias | [256] | 256\n", + "encoder.encoders.10.norm_mha.weight | [256] | 256\n", + "encoder.encoders.10.norm_mha.bias | [256] | 256\n", + "encoder.encoders.10.norm_ff_macaron.weight | [256] | 256\n", + "encoder.encoders.10.norm_ff_macaron.bias | [256] | 256\n", + "encoder.encoders.10.norm_conv.weight | [256] | 256\n", + "encoder.encoders.10.norm_conv.bias | [256] | 256\n", + "encoder.encoders.10.norm_final.weight | [256] | 256\n", + "encoder.encoders.10.norm_final.bias | [256] | 256\n", + "encoder.encoders.10.concat_linear.weight | [512, 256] | 131072\n", + "encoder.encoders.10.concat_linear.bias | [256] | 256\n", + "encoder.encoders.11.self_attn.pos_bias_u | [4, 64] | 256\n", + "encoder.encoders.11.self_attn.pos_bias_v | [4, 64] | 256\n", + "encoder.encoders.11.self_attn.linear_q.weight | [256, 256] | 65536\n", + "encoder.encoders.11.self_attn.linear_q.bias | [256] | 256\n", + "encoder.encoders.11.self_attn.linear_k.weight | [256, 256] | 65536\n", + "encoder.encoders.11.self_attn.linear_k.bias | [256] | 256\n", + "encoder.encoders.11.self_attn.linear_v.weight | [256, 256] | 65536\n", + "encoder.encoders.11.self_attn.linear_v.bias | [256] | 256\n", + "encoder.encoders.11.self_attn.linear_out.weight | [256, 256] | 65536\n", + "encoder.encoders.11.self_attn.linear_out.bias | [256] | 256\n", + "encoder.encoders.11.self_attn.linear_pos.weight | [256, 256] | 65536\n", + "encoder.encoders.11.feed_forward.w_1.weight | [256, 2048] | 524288\n", + "encoder.encoders.11.feed_forward.w_1.bias | [2048] | 2048\n", + "encoder.encoders.11.feed_forward.w_2.weight | [2048, 256] | 524288\n", + "encoder.encoders.11.feed_forward.w_2.bias | [256] | 256\n", + "encoder.encoders.11.feed_forward_macaron.w_1.weight | [256, 2048] | 524288\n", + "encoder.encoders.11.feed_forward_macaron.w_1.bias | [2048] | 2048\n", + "encoder.encoders.11.feed_forward_macaron.w_2.weight | [2048, 256] | 524288\n", + "encoder.encoders.11.feed_forward_macaron.w_2.bias | [256] | 256\n", + "encoder.encoders.11.conv_module.pointwise_conv1.weight | [512, 256, 1] | 131072\n", + "encoder.encoders.11.conv_module.pointwise_conv1.bias | [512] | 512\n", + "encoder.encoders.11.conv_module.depthwise_conv.weight | [256, 1, 15] | 3840\n", + "encoder.encoders.11.conv_module.depthwise_conv.bias | [256] | 256\n", + "encoder.encoders.11.conv_module.norm.weight | [256] | 256\n", + "encoder.encoders.11.conv_module.norm.bias | [256] | 256\n", + "encoder.encoders.11.conv_module.norm._mean | [256] | 256\n", + "encoder.encoders.11.conv_module.norm._variance | [256] | 256\n", + "encoder.encoders.11.conv_module.pointwise_conv2.weight | [256, 256, 1] | 65536\n", + "encoder.encoders.11.conv_module.pointwise_conv2.bias | [256] | 256\n", + "encoder.encoders.11.norm_ff.weight | [256] | 256\n", + "encoder.encoders.11.norm_ff.bias | [256] | 256\n", + "encoder.encoders.11.norm_mha.weight | [256] | 256\n", + "encoder.encoders.11.norm_mha.bias | [256] | 256\n", + "encoder.encoders.11.norm_ff_macaron.weight | [256] | 256\n", + "encoder.encoders.11.norm_ff_macaron.bias | [256] | 256\n", + "encoder.encoders.11.norm_conv.weight | [256] | 256\n", + "encoder.encoders.11.norm_conv.bias | [256] | 256\n", + "encoder.encoders.11.norm_final.weight | [256] | 256\n", + "encoder.encoders.11.norm_final.bias | [256] | 256\n", + "encoder.encoders.11.concat_linear.weight | [512, 256] | 131072\n", + "encoder.encoders.11.concat_linear.bias | [256] | 256\n", + "decoder.embed.0.weight | [4223, 256] | 1081088\n", + "decoder.after_norm.weight | [256] | 256\n", + "decoder.after_norm.bias | [256] | 256\n", + "decoder.output_layer.weight | [256, 4223] | 1081088\n", + "decoder.output_layer.bias | [4223] | 4223\n", + "decoder.decoders.0.self_attn.linear_q.weight | [256, 256] | 65536\n", + "decoder.decoders.0.self_attn.linear_q.bias | [256] | 256\n", + "decoder.decoders.0.self_attn.linear_k.weight | [256, 256] | 65536\n", + "decoder.decoders.0.self_attn.linear_k.bias | [256] | 256\n", + "decoder.decoders.0.self_attn.linear_v.weight | [256, 256] | 65536\n", + "decoder.decoders.0.self_attn.linear_v.bias | [256] | 256\n", + "decoder.decoders.0.self_attn.linear_out.weight | [256, 256] | 65536\n", + "decoder.decoders.0.self_attn.linear_out.bias | [256] | 256\n", + "decoder.decoders.0.src_attn.linear_q.weight | [256, 256] | 65536\n", + "decoder.decoders.0.src_attn.linear_q.bias | [256] | 256\n", + "decoder.decoders.0.src_attn.linear_k.weight | [256, 256] | 65536\n", + "decoder.decoders.0.src_attn.linear_k.bias | [256] | 256\n", + "decoder.decoders.0.src_attn.linear_v.weight | [256, 256] | 65536\n", + "decoder.decoders.0.src_attn.linear_v.bias | [256] | 256\n", + "decoder.decoders.0.src_attn.linear_out.weight | [256, 256] | 65536\n", + "decoder.decoders.0.src_attn.linear_out.bias | [256] | 256\n", + "decoder.decoders.0.feed_forward.w_1.weight | [256, 2048] | 524288\n", + "decoder.decoders.0.feed_forward.w_1.bias | [2048] | 2048\n", + "decoder.decoders.0.feed_forward.w_2.weight | [2048, 256] | 524288\n", + "decoder.decoders.0.feed_forward.w_2.bias | [256] | 256\n", + "decoder.decoders.0.norm1.weight | [256] | 256\n", + "decoder.decoders.0.norm1.bias | [256] | 256\n", + "decoder.decoders.0.norm2.weight | [256] | 256\n", + "decoder.decoders.0.norm2.bias | [256] | 256\n", + "decoder.decoders.0.norm3.weight | [256] | 256\n", + "decoder.decoders.0.norm3.bias | [256] | 256\n", + "decoder.decoders.0.concat_linear1.weight | [512, 256] | 131072\n", + "decoder.decoders.0.concat_linear1.bias | [256] | 256\n", + "decoder.decoders.0.concat_linear2.weight | [512, 256] | 131072\n", + "decoder.decoders.0.concat_linear2.bias | [256] | 256\n", + "decoder.decoders.1.self_attn.linear_q.weight | [256, 256] | 65536\n", + "decoder.decoders.1.self_attn.linear_q.bias | [256] | 256\n", + "decoder.decoders.1.self_attn.linear_k.weight | [256, 256] | 65536\n", + "decoder.decoders.1.self_attn.linear_k.bias | [256] | 256\n", + "decoder.decoders.1.self_attn.linear_v.weight | [256, 256] | 65536\n", + "decoder.decoders.1.self_attn.linear_v.bias | [256] | 256\n", + "decoder.decoders.1.self_attn.linear_out.weight | [256, 256] | 65536\n", + "decoder.decoders.1.self_attn.linear_out.bias | [256] | 256\n", + "decoder.decoders.1.src_attn.linear_q.weight | [256, 256] | 65536\n", + "decoder.decoders.1.src_attn.linear_q.bias | [256] | 256\n", + "decoder.decoders.1.src_attn.linear_k.weight | [256, 256] | 65536\n", + "decoder.decoders.1.src_attn.linear_k.bias | [256] | 256\n", + "decoder.decoders.1.src_attn.linear_v.weight | [256, 256] | 65536\n", + "decoder.decoders.1.src_attn.linear_v.bias | [256] | 256\n", + "decoder.decoders.1.src_attn.linear_out.weight | [256, 256] | 65536\n", + "decoder.decoders.1.src_attn.linear_out.bias | [256] | 256\n", + "decoder.decoders.1.feed_forward.w_1.weight | [256, 2048] | 524288\n", + "decoder.decoders.1.feed_forward.w_1.bias | [2048] | 2048\n", + "decoder.decoders.1.feed_forward.w_2.weight | [2048, 256] | 524288\n", + "decoder.decoders.1.feed_forward.w_2.bias | [256] | 256\n", + "decoder.decoders.1.norm1.weight | [256] | 256\n", + "decoder.decoders.1.norm1.bias | [256] | 256\n", + "decoder.decoders.1.norm2.weight | [256] | 256\n", + "decoder.decoders.1.norm2.bias | [256] | 256\n", + "decoder.decoders.1.norm3.weight | [256] | 256\n", + "decoder.decoders.1.norm3.bias | [256] | 256\n", + "decoder.decoders.1.concat_linear1.weight | [512, 256] | 131072\n", + "decoder.decoders.1.concat_linear1.bias | [256] | 256\n", + "decoder.decoders.1.concat_linear2.weight | [512, 256] | 131072\n", + "decoder.decoders.1.concat_linear2.bias | [256] | 256\n", + "decoder.decoders.2.self_attn.linear_q.weight | [256, 256] | 65536\n", + "decoder.decoders.2.self_attn.linear_q.bias | [256] | 256\n", + "decoder.decoders.2.self_attn.linear_k.weight | [256, 256] | 65536\n", + "decoder.decoders.2.self_attn.linear_k.bias | [256] | 256\n", + "decoder.decoders.2.self_attn.linear_v.weight | [256, 256] | 65536\n", + "decoder.decoders.2.self_attn.linear_v.bias | [256] | 256\n", + "decoder.decoders.2.self_attn.linear_out.weight | [256, 256] | 65536\n", + "decoder.decoders.2.self_attn.linear_out.bias | [256] | 256\n", + "decoder.decoders.2.src_attn.linear_q.weight | [256, 256] | 65536\n", + "decoder.decoders.2.src_attn.linear_q.bias | [256] | 256\n", + "decoder.decoders.2.src_attn.linear_k.weight | [256, 256] | 65536\n", + "decoder.decoders.2.src_attn.linear_k.bias | [256] | 256\n", + "decoder.decoders.2.src_attn.linear_v.weight | [256, 256] | 65536\n", + "decoder.decoders.2.src_attn.linear_v.bias | [256] | 256\n", + "decoder.decoders.2.src_attn.linear_out.weight | [256, 256] | 65536\n", + "decoder.decoders.2.src_attn.linear_out.bias | [256] | 256\n", + "decoder.decoders.2.feed_forward.w_1.weight | [256, 2048] | 524288\n", + "decoder.decoders.2.feed_forward.w_1.bias | [2048] | 2048\n", + "decoder.decoders.2.feed_forward.w_2.weight | [2048, 256] | 524288\n", + "decoder.decoders.2.feed_forward.w_2.bias | [256] | 256\n", + "decoder.decoders.2.norm1.weight | [256] | 256\n", + "decoder.decoders.2.norm1.bias | [256] | 256\n", + "decoder.decoders.2.norm2.weight | [256] | 256\n", + "decoder.decoders.2.norm2.bias | [256] | 256\n", + "decoder.decoders.2.norm3.weight | [256] | 256\n", + "decoder.decoders.2.norm3.bias | [256] | 256\n", + "decoder.decoders.2.concat_linear1.weight | [512, 256] | 131072\n", + "decoder.decoders.2.concat_linear1.bias | [256] | 256\n", + "decoder.decoders.2.concat_linear2.weight | [512, 256] | 131072\n", + "decoder.decoders.2.concat_linear2.bias | [256] | 256\n", + "decoder.decoders.3.self_attn.linear_q.weight | [256, 256] | 65536\n", + "decoder.decoders.3.self_attn.linear_q.bias | [256] | 256\n", + "decoder.decoders.3.self_attn.linear_k.weight | [256, 256] | 65536\n", + "decoder.decoders.3.self_attn.linear_k.bias | [256] | 256\n", + "decoder.decoders.3.self_attn.linear_v.weight | [256, 256] | 65536\n", + "decoder.decoders.3.self_attn.linear_v.bias | [256] | 256\n", + "decoder.decoders.3.self_attn.linear_out.weight | [256, 256] | 65536\n", + "decoder.decoders.3.self_attn.linear_out.bias | [256] | 256\n", + "decoder.decoders.3.src_attn.linear_q.weight | [256, 256] | 65536\n", + "decoder.decoders.3.src_attn.linear_q.bias | [256] | 256\n", + "decoder.decoders.3.src_attn.linear_k.weight | [256, 256] | 65536\n", + "decoder.decoders.3.src_attn.linear_k.bias | [256] | 256\n", + "decoder.decoders.3.src_attn.linear_v.weight | [256, 256] | 65536\n", + "decoder.decoders.3.src_attn.linear_v.bias | [256] | 256\n", + "decoder.decoders.3.src_attn.linear_out.weight | [256, 256] | 65536\n", + "decoder.decoders.3.src_attn.linear_out.bias | [256] | 256\n", + "decoder.decoders.3.feed_forward.w_1.weight | [256, 2048] | 524288\n", + "decoder.decoders.3.feed_forward.w_1.bias | [2048] | 2048\n", + "decoder.decoders.3.feed_forward.w_2.weight | [2048, 256] | 524288\n", + "decoder.decoders.3.feed_forward.w_2.bias | [256] | 256\n", + "decoder.decoders.3.norm1.weight | [256] | 256\n", + "decoder.decoders.3.norm1.bias | [256] | 256\n", + "decoder.decoders.3.norm2.weight | [256] | 256\n", + "decoder.decoders.3.norm2.bias | [256] | 256\n", + "decoder.decoders.3.norm3.weight | [256] | 256\n", + "decoder.decoders.3.norm3.bias | [256] | 256\n", + "decoder.decoders.3.concat_linear1.weight | [512, 256] | 131072\n", + "decoder.decoders.3.concat_linear1.bias | [256] | 256\n", + "decoder.decoders.3.concat_linear2.weight | [512, 256] | 131072\n", + "decoder.decoders.3.concat_linear2.bias | [256] | 256\n", + "decoder.decoders.4.self_attn.linear_q.weight | [256, 256] | 65536\n", + "decoder.decoders.4.self_attn.linear_q.bias | [256] | 256\n", + "decoder.decoders.4.self_attn.linear_k.weight | [256, 256] | 65536\n", + "decoder.decoders.4.self_attn.linear_k.bias | [256] | 256\n", + "decoder.decoders.4.self_attn.linear_v.weight | [256, 256] | 65536\n", + "decoder.decoders.4.self_attn.linear_v.bias | [256] | 256\n", + "decoder.decoders.4.self_attn.linear_out.weight | [256, 256] | 65536\n", + "decoder.decoders.4.self_attn.linear_out.bias | [256] | 256\n", + "decoder.decoders.4.src_attn.linear_q.weight | [256, 256] | 65536\n", + "decoder.decoders.4.src_attn.linear_q.bias | [256] | 256\n", + "decoder.decoders.4.src_attn.linear_k.weight | [256, 256] | 65536\n", + "decoder.decoders.4.src_attn.linear_k.bias | [256] | 256\n", + "decoder.decoders.4.src_attn.linear_v.weight | [256, 256] | 65536\n", + "decoder.decoders.4.src_attn.linear_v.bias | [256] | 256\n", + "decoder.decoders.4.src_attn.linear_out.weight | [256, 256] | 65536\n", + "decoder.decoders.4.src_attn.linear_out.bias | [256] | 256\n", + "decoder.decoders.4.feed_forward.w_1.weight | [256, 2048] | 524288\n", + "decoder.decoders.4.feed_forward.w_1.bias | [2048] | 2048\n", + "decoder.decoders.4.feed_forward.w_2.weight | [2048, 256] | 524288\n", + "decoder.decoders.4.feed_forward.w_2.bias | [256] | 256\n", + "decoder.decoders.4.norm1.weight | [256] | 256\n", + "decoder.decoders.4.norm1.bias | [256] | 256\n", + "decoder.decoders.4.norm2.weight | [256] | 256\n", + "decoder.decoders.4.norm2.bias | [256] | 256\n", + "decoder.decoders.4.norm3.weight | [256] | 256\n", + "decoder.decoders.4.norm3.bias | [256] | 256\n", + "decoder.decoders.4.concat_linear1.weight | [512, 256] | 131072\n", + "decoder.decoders.4.concat_linear1.bias | [256] | 256\n", + "decoder.decoders.4.concat_linear2.weight | [512, 256] | 131072\n", + "decoder.decoders.4.concat_linear2.bias | [256] | 256\n", + "decoder.decoders.5.self_attn.linear_q.weight | [256, 256] | 65536\n", + "decoder.decoders.5.self_attn.linear_q.bias | [256] | 256\n", + "decoder.decoders.5.self_attn.linear_k.weight | [256, 256] | 65536\n", + "decoder.decoders.5.self_attn.linear_k.bias | [256] | 256\n", + "decoder.decoders.5.self_attn.linear_v.weight | [256, 256] | 65536\n", + "decoder.decoders.5.self_attn.linear_v.bias | [256] | 256\n", + "decoder.decoders.5.self_attn.linear_out.weight | [256, 256] | 65536\n", + "decoder.decoders.5.self_attn.linear_out.bias | [256] | 256\n", + "decoder.decoders.5.src_attn.linear_q.weight | [256, 256] | 65536\n", + "decoder.decoders.5.src_attn.linear_q.bias | [256] | 256\n", + "decoder.decoders.5.src_attn.linear_k.weight | [256, 256] | 65536\n", + "decoder.decoders.5.src_attn.linear_k.bias | [256] | 256\n", + "decoder.decoders.5.src_attn.linear_v.weight | [256, 256] | 65536\n", + "decoder.decoders.5.src_attn.linear_v.bias | [256] | 256\n", + "decoder.decoders.5.src_attn.linear_out.weight | [256, 256] | 65536\n", + "decoder.decoders.5.src_attn.linear_out.bias | [256] | 256\n", + "decoder.decoders.5.feed_forward.w_1.weight | [256, 2048] | 524288\n", + "decoder.decoders.5.feed_forward.w_1.bias | [2048] | 2048\n", + "decoder.decoders.5.feed_forward.w_2.weight | [2048, 256] | 524288\n", + "decoder.decoders.5.feed_forward.w_2.bias | [256] | 256\n", + "decoder.decoders.5.norm1.weight | [256] | 256\n", + "decoder.decoders.5.norm1.bias | [256] | 256\n", + "decoder.decoders.5.norm2.weight | [256] | 256\n", + "decoder.decoders.5.norm2.bias | [256] | 256\n", + "decoder.decoders.5.norm3.weight | [256] | 256\n", + "decoder.decoders.5.norm3.bias | [256] | 256\n", + "decoder.decoders.5.concat_linear1.weight | [512, 256] | 131072\n", + "decoder.decoders.5.concat_linear1.bias | [256] | 256\n", + "decoder.decoders.5.concat_linear2.weight | [512, 256] | 131072\n", + "decoder.decoders.5.concat_linear2.bias | [256] | 256\n", + "ctc.ctc_lo.weight | [256, 4223] | 1081088\n", + "ctc.ctc_lo.bias | [4223] | 4223\n", + "Total parameters: 689, 49347742 elements.\n" + ] + } + ], + "source": [ + "summary(model)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ranking-beads", + "metadata": {}, + "outputs": [], + "source": [ + "total_loss, attention_loss, ctc_loss = model(self.audio, self.audio_len,\n", + " self.text, self.text_len)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.0" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/deepspeech/frontend/normalizer.py b/deepspeech/frontend/normalizer.py index 9161c1e46..83c1ff905 100644 --- a/deepspeech/frontend/normalizer.py +++ b/deepspeech/frontend/normalizer.py @@ -77,15 +77,19 @@ class FeatureNormalizer(object): :param filepath: File to write mean and stddev. :type filepath: str """ - np.savez(filepath, mean=self._mean, std=self._std) + np.savez(filepath, mean=self._mean, istd=self._istd) def _read_mean_std_from_file(self, filepath, eps=1e-20): """Load mean and std from file.""" - mean, std = load_cmvn(filepath, filetype='npz') + mean, istd = load_cmvn(filepath, filetype='npz') self._mean = mean.T - self._istd = 1.0 / std.T + self._istd = istd.T - def _compute_mean_std(self, manifest_path, featurize_func, num_samples): + def _compute_mean_std(self, + manifest_path, + featurize_func, + num_samples, + eps=1e-20): """Compute mean and std from randomly sampled instances.""" manifest = read_manifest(manifest_path) if num_samples == -1: @@ -98,4 +102,6 @@ class FeatureNormalizer(object): featurize_func(AudioSegment.from_file(instance["feat"]))) features = np.hstack(features) #(D, T) self._mean = np.mean(features, axis=1).reshape([1, -1]) #(1, D) - self._std = np.std(features, axis=1).reshape([1, -1]) #(1, D) + std = np.std(features, axis=1).reshape([1, -1]) #(1, D) + std = np.clip(std, eps, None) + self._istd = 1.0 / std diff --git a/deepspeech/frontend/utility.py b/deepspeech/frontend/utility.py index 4b17c841a..5a4989d62 100644 --- a/deepspeech/frontend/utility.py +++ b/deepspeech/frontend/utility.py @@ -238,10 +238,8 @@ def _load_kaldi_cmvn(kaldi_cmvn_file): def _load_npz_cmvn(npz_cmvn_file, eps=1e-20): npzfile = np.load(npz_cmvn_file) means = npzfile["mean"] #(1, D) - std = npzfile["std"] #(1, D) - std = np.clip(std, eps, None) - variance = 1.0 / std - cmvn = np.array([means, variance]) + istd = npzfile["istd"] #(1, D) + cmvn = np.array([means, istd]) return cmvn diff --git a/deepspeech/modules/mask.py b/deepspeech/modules/mask.py index fa6e0d552..f19e56f2f 100644 --- a/deepspeech/modules/mask.py +++ b/deepspeech/modules/mask.py @@ -25,7 +25,7 @@ __all__ = [ def sequence_mask(x_len, max_len=None, dtype='float32'): - """[summary] + """batch sequence mask. Args: x_len ([paddle.Tensor]): xs lenght, [B] diff --git a/deepspeech/utils/layer_tools.py b/deepspeech/utils/layer_tools.py index 0ff4f6f54..1e8e55ed1 100644 --- a/deepspeech/utils/layer_tools.py +++ b/deepspeech/utils/layer_tools.py @@ -22,8 +22,6 @@ __all__ = [ def summary(layer: nn.Layer, print_func=print): num_params = num_elements = 0 - if print_func: - print_func(f"{layer.__class__.__name__} summary:") for name, param in layer.state_dict().items(): if print_func: print_func( @@ -31,9 +29,7 @@ def summary(layer: nn.Layer, print_func=print): num_elements += np.prod(param.shape) num_params += 1 if print_func: - print_func( - f"{layer.__class__.__name__} has {num_params} parameters, {num_elements} elements." - ) + print_func(f"Total parameters: {num_params}, {num_elements} elements.") def gradient_norm(layer: nn.Layer): @@ -45,25 +41,6 @@ def gradient_norm(layer: nn.Layer): return grad_norm_dict -def recursively_remove_weight_norm(layer: nn.Layer): - for layer in layer.sublayers(): - try: - nn.utils.remove_weight_norm(layer) - except ValueError as e: - # ther is not weight norm hoom in this layer - pass - - -def freeze(layer: nn.Layer): - for param in layer.parameters(): - param.trainable = False - - -def unfreeze(layer: nn.Layer): - for param in layer.parameters(): - param.trainable = True - - def print_grads(model, print_func=print): if print_func is None: return @@ -75,12 +52,32 @@ def print_grads(model, print_func=print): def print_params(model, print_func=print): if print_func is None: return - total = 0.0 + num_params = 0.0 for n, p in model.named_parameters(): - msg = f"param: {n}: shape: {p.shape} stop_grad: {p.stop_gradient}" + msg = f"{n} | {p.shape} | {np.prod(p.shape)} | {not p.stop_gradient}" total += np.prod(p.shape) + num_params += 1 if print_func: print_func(msg) if print_func: - print_func(f"Total parameters: {total}!") + print_func(f"Total parameters: {num_params}, {total} elements.") + + +def recursively_remove_weight_norm(layer: nn.Layer): + for layer in layer.sublayers(): + try: + nn.utils.remove_weight_norm(layer) + except ValueError as e: + # ther is not weight norm hoom in this layer + pass + + +def freeze(layer: nn.Layer): + for param in layer.parameters(): + param.trainable = False + + +def unfreeze(layer: nn.Layer): + for param in layer.parameters(): + param.trainable = True