|
|
@ -6,6 +6,11 @@ set -e
|
|
|
|
|
|
|
|
|
|
|
|
stage=0
|
|
|
|
stage=0
|
|
|
|
stop_stage=100
|
|
|
|
stop_stage=100
|
|
|
|
|
|
|
|
#tarfile=asr0_deepspeech2_online_wenetspeech_ckpt_1.0.2.model.tar.gz
|
|
|
|
|
|
|
|
tarfile=asr0_deepspeech2_online_aishell_fbank161_ckpt_1.0.1.model.tar.gz
|
|
|
|
|
|
|
|
model_prefix=avg_1.jit
|
|
|
|
|
|
|
|
model=${model_prefix}.pdmodel
|
|
|
|
|
|
|
|
param=${model_prefix}.pdiparams
|
|
|
|
|
|
|
|
|
|
|
|
. utils/parse_options.sh
|
|
|
|
. utils/parse_options.sh
|
|
|
|
|
|
|
|
|
|
|
@ -14,27 +19,25 @@ exp=exp
|
|
|
|
|
|
|
|
|
|
|
|
mkdir -p $data $exp
|
|
|
|
mkdir -p $data $exp
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
dir=$data/exp/deepspeech2_online/checkpoints
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# wenetspeech or aishell
|
|
|
|
|
|
|
|
model_type=$(echo $tarfile | cut -d '_' -f 4)
|
|
|
|
|
|
|
|
|
|
|
|
if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ];then
|
|
|
|
if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ];then
|
|
|
|
test -f $data/asr0_deepspeech2_online_wenetspeech_ckpt_1.0.0a.model.tar.gz || wget -c https://paddlespeech.bj.bcebos.com/s2t/wenetspeech/asr0/asr0_deepspeech2_online_wenetspeech_ckpt_1.0.0a.model.tar.gz -P $data
|
|
|
|
test -f $data/$tarfile || wget -P $data -c https://paddlespeech.bj.bcebos.com/s2t/$model_type/asr0/$tarfile
|
|
|
|
|
|
|
|
|
|
|
|
# wenetspeech ds2 model
|
|
|
|
# wenetspeech ds2 model
|
|
|
|
pushd $data
|
|
|
|
pushd $data
|
|
|
|
tar zxvf asr0_deepspeech2_online_wenetspeech_ckpt_1.0.0a.model.tar.gz
|
|
|
|
tar zxvf $tarfile
|
|
|
|
popd
|
|
|
|
popd
|
|
|
|
|
|
|
|
|
|
|
|
# ds2 model demo inputs
|
|
|
|
# ds2 model demo inputs
|
|
|
|
pushd $exp
|
|
|
|
pushd $exp
|
|
|
|
wget -c http://paddlespeech.bj.bcebos.com/speechx/examples/ds2_ol/onnx/static_ds2online_inputs.pickle
|
|
|
|
wget -c http://paddlespeech.bj.bcebos.com/speechx/examples/ds2_ol/onnx/static_ds2online_inputs.pickle
|
|
|
|
popd
|
|
|
|
popd
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
fi
|
|
|
|
fi
|
|
|
|
|
|
|
|
|
|
|
|
dir=$data/exp/deepspeech2_online/checkpoints
|
|
|
|
|
|
|
|
model=avg_1.jit.pdmodel
|
|
|
|
|
|
|
|
param=avg_1.jit.pdiparams
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
output_names=softmax_0.tmp_0,tmp_5,concat_0.tmp_0,concat_1.tmp_0
|
|
|
|
output_names=softmax_0.tmp_0,tmp_5,concat_0.tmp_0,concat_1.tmp_0
|
|
|
|
if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ];then
|
|
|
|
if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ];then
|
|
|
|
# prune model by outputs
|
|
|
|
# prune model by outputs
|
|
|
@ -44,10 +47,20 @@ if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ];then
|
|
|
|
./local/prune.sh $dir $model $param $output_names $exp/prune
|
|
|
|
./local/prune.sh $dir $model $param $output_names $exp/prune
|
|
|
|
fi
|
|
|
|
fi
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# aishell rnn hidden is 1024
|
|
|
|
|
|
|
|
# wenetspeech rnn hiddn is 2048
|
|
|
|
|
|
|
|
if [ $model_type == 'aishell' ];then
|
|
|
|
input_shape_dict="{'audio_chunk':[1,-1,161], 'audio_chunk_lens':[1], 'chunk_state_c_box':[5, 1, 1024], 'chunk_state_h_box':[5,1,1024]}"
|
|
|
|
input_shape_dict="{'audio_chunk':[1,-1,161], 'audio_chunk_lens':[1], 'chunk_state_c_box':[5, 1, 1024], 'chunk_state_h_box':[5,1,1024]}"
|
|
|
|
|
|
|
|
elif [ $model_type == 'wenetspeech' ];then
|
|
|
|
|
|
|
|
input_shape_dict="{'audio_chunk':[1,-1,161], 'audio_chunk_lens':[1], 'chunk_state_c_box':[5, 1, 2048], 'chunk_state_h_box':[5,1,2048]}"
|
|
|
|
|
|
|
|
else
|
|
|
|
|
|
|
|
echo "not support: $model_type"
|
|
|
|
|
|
|
|
exit -1
|
|
|
|
|
|
|
|
fi
|
|
|
|
if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ];then
|
|
|
|
if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ];then
|
|
|
|
# infer shape by new shape
|
|
|
|
# infer shape by new shape
|
|
|
|
mkdir -p $exp/shape
|
|
|
|
mkdir -p $exp/shape
|
|
|
|
|
|
|
|
echo $input_shape_dict
|
|
|
|
python3 local/pd_infer_shape.py \
|
|
|
|
python3 local/pd_infer_shape.py \
|
|
|
|
--model_dir $dir \
|
|
|
|
--model_dir $dir \
|
|
|
|
--model_filename $model \
|
|
|
|
--model_filename $model \
|
|
|
@ -63,14 +76,26 @@ if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ];then
|
|
|
|
# to onnx
|
|
|
|
# to onnx
|
|
|
|
./local/tonnx.sh $dir $model $param $exp/model.onnx
|
|
|
|
./local/tonnx.sh $dir $model $param $exp/model.onnx
|
|
|
|
|
|
|
|
|
|
|
|
./local/infer_check.py --input_file $input_file --model_dir $dir --onnx_model $exp/model.onnx
|
|
|
|
./local/infer_check.py --input_file $input_file --model_type $model_type --model_dir $dir --model_prefix $model_prefix --onnx_model $exp/model.onnx
|
|
|
|
fi
|
|
|
|
fi
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ] ;then
|
|
|
|
# aishell rnn hidden is 1024
|
|
|
|
|
|
|
|
# wenetspeech rnn hiddn is 2048
|
|
|
|
|
|
|
|
if [ $model_type == 'aishell' ];then
|
|
|
|
input_shape="audio_chunk:1,-1,161 audio_chunk_lens:1 chunk_state_c_box:5,1,1024 chunk_state_h_box:5,1,1024"
|
|
|
|
input_shape="audio_chunk:1,-1,161 audio_chunk_lens:1 chunk_state_c_box:5,1,1024 chunk_state_h_box:5,1,1024"
|
|
|
|
|
|
|
|
elif [ $model_type == 'wenetspeech' ];then
|
|
|
|
|
|
|
|
input_shape="audio_chunk:1,-1,161 audio_chunk_lens:1 chunk_state_c_box:5,1,2048 chunk_state_h_box:5,1,2048"
|
|
|
|
|
|
|
|
else
|
|
|
|
|
|
|
|
echo "not support: $model_type"
|
|
|
|
|
|
|
|
exit -1
|
|
|
|
|
|
|
|
fi
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ] ;then
|
|
|
|
|
|
|
|
# wenetspeech ds2 model execed 2GB limit, will error.
|
|
|
|
# simplifying onnx model
|
|
|
|
# simplifying onnx model
|
|
|
|
./local/onnx_opt.sh $exp/model.onnx $exp/model.opt.onnx "$input_shape"
|
|
|
|
./local/onnx_opt.sh $exp/model.onnx $exp/model.opt.onnx "$input_shape"
|
|
|
|
|
|
|
|
|
|
|
|
./local/infer_check.py --input_file $input_file --model_dir $dir --onnx_model $exp/model.opt.onnx
|
|
|
|
./local/infer_check.py --input_file $input_file --model_type $model_type --model_dir $dir --model_prefix $model_prefix --onnx_model $exp/model.opt.onnx
|
|
|
|
fi
|
|
|
|
fi
|