update script

pull/3173/head
YangZhou 2 years ago
parent 1566837e99
commit 3e4fc6f0bb

@ -4,7 +4,7 @@
## U2++ Attention Rescore
> Intel(R) Xeon(R) Gold 6271C CPU @ 2.60GHz, support `avx512_vnni`
> Intel(R) Xeon(R) Gold 6148 CPU @ 2.40GHz, support `avx512_vnni`
> RTF with feature and decoder which is more end to end.
### FP32
@ -23,9 +23,9 @@ Other -> 100.00 % N=3 C=0 S=3 D=0 I=0
#### RTF
```
I1027 10:52:38.662868 51665 u2_recognizer_main.cc:122] total wav duration is: 36108.9 sec
I1027 10:52:38.662858 51665 u2_recognizer_main.cc:121] total cost:11169.1 sec
I1027 10:52:38.662876 51665 u2_recognizer_main.cc:123] RTF is: 0.309318
I1027 10:52:38.662868 51665 recognizer_main.cc:122] total wav duration is: 36108.9 sec
I1027 10:52:38.662858 51665 recognizer_main.cc:121] total cost:9577.31 sec
I1027 10:52:38.662876 51665 recognizer_main.cc:123] RTF is: 0.265234
```
### INT8
@ -52,16 +52,22 @@ I1110 09:59:52.551717 37249 u2_recognizer_main.cc:123] total decode cost:9737.63
I1110 09:59:52.551723 37249 u2_recognizer_main.cc:124] RTF is: 0.269674
```
### CTC Prefix Beam Search
### TLG decoder without attention rescore
`local/decode.sh`
`local/recognizer_wfst.sh`
#### CER
```
Overall -> 6.74 % N=104765 C=98106 S=6516 D=143 I=401
Mandarin -> 6.74 % N=104762 C=98106 S=6513 D=143 I=401
English -> 0.00 % N=0 C=0 S=0 D=0 I=0
Overall -> 4.73 % N=104765 C=100001 S=4283 D=481 I=187
Mandarin -> 4.72 % N=104762 C=100001 S=4280 D=481 I=187
Other -> 100.00 % N=3 C=0 S=3 D=0 I=0
```
#### RTF
```
I0417 08:07:15.300631 75784 recognizer_main.cc:113] total wav duration is: 36108.9 sec
I0417 08:07:15.300642 75784 recognizer_main.cc:114] total decode cost:16353.7 sec
I0417 08:07:15.300648 75784 recognizer_main.cc:115] total rescore cost:936.858 sec
I0417 08:07:15.300653 75784 recognizer_main.cc:116] RTF is: 0.4529
```

@ -16,7 +16,7 @@ text=$data/test/text
./local/split_data.sh $data $data/$aishell_wav_scp $aishell_wav_scp $nj
utils/run.pl JOB=1:$nj $data/split${nj}/JOB/recognizer.quant.log \
u2_recognizer_main \
recognizer_main \
--use_fbank=true \
--num_bins=80 \
--cmvn_file=$model_dir/mean_std.json \

@ -19,6 +19,15 @@ lang_dir=./data/lang_test/
graph=$lang_dir/TLG.fst
word_table=$lang_dir/words.txt
if [ ! -f $graph ]; then
# download ngram, if you want to make graph by yourself, please refer local/run_build_tlg.sh
mkdir -p $lang_dir
pushd $lang_dir
wget -c https://paddlespeech.bj.bcebos.com/speechx/examples/ngram/zh/tlg.zip
unzip tlg.zip
popd
fi
utils/run.pl JOB=1:$nj $data/split${nj}/JOB/recognizer_wfst.log \
recognizer_main \
--use_fbank=true \
@ -31,6 +40,8 @@ recognizer_main \
--receptive_field_length=7 \
--subsampling_rate=4 \
--wav_rspecifier=scp:$data/split${nj}/JOB/${aishell_wav_scp} \
--rescoring_weight=0.0 \
--acoustic_scale=2 \
--result_wspecifier=ark,t:$data/split${nj}/JOB/result_recognizer_wfst.ark

@ -7,13 +7,12 @@ set -eo pipefail
# different acustic model has different vocab
ckpt_dir=data/model/asr1_chunk_conformer_u2pp_wenetspeech_static_1.3.0.model
unit=$ckpt_dir/vocab.txt # vocab file, line: char/spm_pice
model_dir=$ckpt_dir/exp/deepspeech2_online/checkpoints/
stage=2
stop_stage=100
corpus=aishell
lexicon=data/lexicon.txt # line: word ph0 ... phn, aishell/resource_aishell/lexicon.txt
text=data/text # line: utt text, aishell/data_aishell/transcript/aishell_transcript_v0.8.txt
text=data/text # line: utt text, aishell/data_aishell/transcript/aishell_transcript_v0.8.txt filter by data/train/text
. utils/parse_options.sh

@ -12,7 +12,7 @@ TOOLS_BIN=$ENGINE_TOOLS/valgrind/install/bin
export LC_AL=C
export PATH=$PATH:$TOOLS_BIN:$ENGINE_BUILD/nnet:$ENGINE_BUILD/decoder:$ENGINE_BUILD/../common/frontend/audio:$ENGINE_BUILD/recognizer
export PATH=$PATH:$TOOLS_BIN:$ENGINE_BUILD/nnet:$ENGINE_BUILD/decoder:$ENGINE_BUILD/../common/frontend/audio:$ENGINE_BUILD/recognizer:../../../fc_patch/openfst/bin:$ENGINE_BUILD/../kaldi/fstbin:$ENGINE_BUILD/../kaldi/lmbin
#PADDLE_LIB_PATH=$(python -c "import os; import paddle; include_dir=paddle.sysconfig.get_include(); paddle_dir=os.path.split(include_dir)[0]; libs_dir=os.path.join(paddle_dir, 'libs'); fluid_dir=os.path.join(paddle_dir, 'fluid'); out=':'.join([libs_dir, fluid_dir]); print(out);")
export LD_LIBRARY_PATH=$PADDLE_LIB_PATH:$LD_LIBRARY_PATH

@ -69,23 +69,17 @@ if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ];then
fi
fi
if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
# process compute fbank feat
./local/feat.sh
fi
if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
# decode with fbank feat input
./local/decode.sh
fi
if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
# decode with wav input
./local/recognizer.sh
fi
if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ]; then
if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
# decode with wav input with quanted model
./local/recognizer_quant.sh
fi
if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
# decode with wfst
./local/recognizer_wfst.sh
fi

Loading…
Cancel
Save