ds2 offline cer 6p4287

pull/793/head
Hui Zhang 3 years ago
parent 673cc4a081
commit 341038b626

@ -41,13 +41,6 @@ def conv_output_size(I, F, P, S):
return (I - F + 2 * P - S) // S return (I - F + 2 * P - S) // S
# receptive field calculator
# https://fomoro.com/research/article/receptive-field-calculator
# https://stanford.edu/~shervine/teaching/cs-230/cheatsheet-convolutional-neural-networks#hyperparameters
# https://distill.pub/2019/computing-receptive-fields/
# Rl-1 = Sl * Rl + (Kl - Sl)
class ConvBn(nn.Layer): class ConvBn(nn.Layer):
"""Convolution layer with batch normalization. """Convolution layer with batch normalization.

@ -108,8 +108,8 @@ class Conv2dSubsampling4(BaseSubsampling):
nn.Linear(odim * (((idim - 1) // 2 - 1) // 2), odim)) nn.Linear(odim * (((idim - 1) // 2 - 1) // 2), odim))
self.subsampling_rate = 4 self.subsampling_rate = 4
# The right context for every conv layer is computed by: # The right context for every conv layer is computed by:
# (kernel_size - 1) / 2 * stride * frame_rate_of_this_layer # (kernel_size - 1) * frame_rate_of_this_layer
# 6 = (3 - 1) / 2 * 2 * 1 + (3 - 1) / 2 * 2 * 2 # 6 = (3 - 1) * 1 + (3 - 1) * 2
self.right_context = 6 self.right_context = 6
def forward(self, x: paddle.Tensor, x_mask: paddle.Tensor, offset: int=0 def forward(self, x: paddle.Tensor, x_mask: paddle.Tensor, offset: int=0
@ -160,10 +160,10 @@ class Conv2dSubsampling6(BaseSubsampling):
# when Padding == 0, O = (I - F - S) // S # when Padding == 0, O = (I - F - S) // S
self.linear = nn.Linear(odim * (((idim - 1) // 2 - 2) // 3), odim) self.linear = nn.Linear(odim * (((idim - 1) // 2 - 2) // 3), odim)
# The right context for every conv layer is computed by: # The right context for every conv layer is computed by:
# (kernel_size - 1) / 2 * stride * frame_rate_of_this_layer # (kernel_size - 1) * frame_rate_of_this_layer
# 14 = (3 - 1) / 2 * 2 * 1 + (5 - 1) / 2 * 3 * 2 # 10 = (3 - 1) * 1 + (5 - 1) * 2
self.subsampling_rate = 6 self.subsampling_rate = 6
self.right_context = 14 self.right_context = 10
def forward(self, x: paddle.Tensor, x_mask: paddle.Tensor, offset: int=0 def forward(self, x: paddle.Tensor, x_mask: paddle.Tensor, offset: int=0
) -> Tuple[paddle.Tensor, paddle.Tensor, paddle.Tensor]: ) -> Tuple[paddle.Tensor, paddle.Tensor, paddle.Tensor]:
@ -214,8 +214,8 @@ class Conv2dSubsampling8(BaseSubsampling):
odim) odim)
self.subsampling_rate = 8 self.subsampling_rate = 8
# The right context for every conv layer is computed by: # The right context for every conv layer is computed by:
# (kernel_size - 1) / 2 * stride * frame_rate_of_this_layer # (kernel_size - 1) * frame_rate_of_this_layer
# 14 = (3 - 1) / 2 * 2 * 1 + (3 - 1) / 2 * 2 * 2 + (3 - 1) / 2 * 2 * 4 # 14 = (3 - 1) * 1 + (3 - 1) * 2 + (3 - 1) * 4
self.right_context = 14 self.right_context = 14
def forward(self, x: paddle.Tensor, x_mask: paddle.Tensor, offset: int=0 def forward(self, x: paddle.Tensor, x_mask: paddle.Tensor, offset: int=0

@ -10,7 +10,7 @@
| Model | Params | Release | Config | Test set | Loss | CER | | Model | Params | Release | Config | Test set | Loss | CER |
| --- | --- | --- | --- | --- | --- | --- | | --- | --- | --- | --- | --- | --- | --- |
| DeepSpeech2 | 58.4M | 2.2.0 | conf/deepspeech2.yaml + spec aug + new datapipe | test | 6.396368026733398 | 0.068382 | | DeepSpeech2 | 58.4M | 2.2.0 | conf/deepspeech2.yaml + spec aug | test | 5.71956205368042 | 0.064287 |
| DeepSpeech2 | 58.4M | 2.1.0 | conf/deepspeech2.yaml + spec aug | test | 7.483316898345947 | 0.077860 | | DeepSpeech2 | 58.4M | 2.1.0 | conf/deepspeech2.yaml + spec aug | test | 7.483316898345947 | 0.077860 |
| DeepSpeech2 | 58.4M | 2.1.0 | conf/deepspeech2.yaml | test | 7.299022197723389 | 0.078671 | | DeepSpeech2 | 58.4M | 2.1.0 | conf/deepspeech2.yaml | test | 7.299022197723389 | 0.078671 |
| DeepSpeech2 | 58.4M | 2.0.0 | conf/deepspeech2.yaml | test | - | 0.078977 | | DeepSpeech2 | 58.4M | 2.0.0 | conf/deepspeech2.yaml | test | - | 0.078977 |

@ -42,7 +42,7 @@ model:
share_rnn_weights: False share_rnn_weights: False
training: training:
n_epoch: 50 n_epoch: 80
lr: 2e-3 lr: 2e-3
lr_decay: 0.83 lr_decay: 0.83
weight_decay: 1e-06 weight_decay: 1e-06

@ -19,7 +19,7 @@ fi
mkdir -p exp mkdir -p exp
seed=1024 seed=10086
if [ ${seed} ]; then if [ ${seed} ]; then
export FLAGS_cudnn_deterministic=True export FLAGS_cudnn_deterministic=True
fi fi

@ -1,19 +1,29 @@
#! /usr/bin/env bash #! /usr/bin/env bash
if [ $# != 2 ]; then if [ $# != 3 ]; then
echo "usage: ${0} ckpt_dir avg_num" echo "usage: ${0} [best|latest] ckpt_dir avg_num"
exit -1 exit -1
fi fi
ckpt_dir=${1} ckpt_dir=${1}
average_num=${2} avg_mode=${2} # best,latest
average_num=${3}
decode_checkpoint=${ckpt_dir}/avg_${average_num}.pdparams decode_checkpoint=${ckpt_dir}/avg_${average_num}.pdparams
if [ $avg_mode == best ];then
# best
avg_model.py \ avg_model.py \
--dst_model ${decode_checkpoint} \ --dst_model ${decode_checkpoint} \
--ckpt_dir ${ckpt_dir} \ --ckpt_dir ${ckpt_dir} \
--num ${average_num} \ --num ${average_num} \
--val_best --val_best
else
# latest
avg_model.py \
--dst_model ${decode_checkpoint} \
--ckpt_dir ${ckpt_dir} \
--num ${average_num}
fi
if [ $? -ne 0 ]; then if [ $? -ne 0 ]; then
echo "Failed in avg ckpt!" echo "Failed in avg ckpt!"

@ -1,7 +1,7 @@
#!/bin/bash #!/bin/bash
if [ $# != 4 ];then if [ $# != 5 ];then
echo "usage: $0 ckpt_prefix model_config mean_std vocab" echo "usage: $0 ckpt_prefix model_config mean_std vocab pack_name"
exit -1 exit -1
fi fi
@ -9,6 +9,7 @@ ckpt_prefix=$1
model_config=$2 model_config=$2
mean_std=$3 mean_std=$3
vocab=$4 vocab=$4
pack_name=$5
output=release output=release
@ -27,6 +28,6 @@ cp ${ckpt_prefix}.* ${output}
# model config, mean std, vocab # model config, mean std, vocab
cp ${model_config} ${mean_std} ${vocab} ${output} cp ${model_config} ${mean_std} ${vocab} ${output}
tar zcvf release.tar.gz ${output} tar zcvf ${pack_name}.release.tar.gz ${output}
echo "tarball done!" echo "tarball: ${pack_name}.release.tar.gz done!"

Loading…
Cancel
Save