|
|
|
#!/bin/bash
|
|
|
|
set -eo pipefail
|
|
|
|
|
|
|
|
. path.sh
|
|
|
|
|
|
|
|
# attention, please replace the vocab is only for this script.
|
|
|
|
# different acustic model has different vocab
|
|
|
|
ckpt_dir=data/fbank_model
|
|
|
|
unit=$ckpt_dir/data/lang_char/vocab.txt # vocab file, line: char/spm_pice
|
|
|
|
model_dir=$ckpt_dir/exp/deepspeech2_online/checkpoints/
|
|
|
|
|
|
|
|
stage=-1
|
|
|
|
stop_stage=100
|
|
|
|
corpus=aishell
|
|
|
|
lexicon=data/lexicon.txt # line: word ph0 ... phn, aishell/resource_aishell/lexicon.txt
|
|
|
|
text=data/text # line: utt text, aishell/data_aishell/transcript/aishell_transcript_v0.8.txt
|
|
|
|
|
|
|
|
. utils/parse_options.sh
|
|
|
|
|
|
|
|
data=$PWD/data
|
|
|
|
mkdir -p $data
|
|
|
|
|
|
|
|
if [ $stage -le -1 ] && [ $stop_stage -ge -1 ]; then
|
|
|
|
if [ ! -f $data/speech.ngram.zh.tar.gz ];then
|
|
|
|
pushd $data
|
|
|
|
wget -c http://paddlespeech.bj.bcebos.com/speechx/examples/ngram/zh/speech.ngram.zh.tar.gz
|
|
|
|
tar xvzf speech.ngram.zh.tar.gz
|
|
|
|
popd
|
|
|
|
fi
|
|
|
|
|
|
|
|
if [ ! -f $ckpt_dir/data/mean_std.json ]; then
|
|
|
|
mkdir -p $ckpt_dir
|
|
|
|
pushd $ckpt_dir
|
|
|
|
wget -c https://paddlespeech.bj.bcebos.com/s2t/wenetspeech/asr0/WIP1_asr0_deepspeech2_online_wenetspeech_ckpt_1.0.0a.model.tar.gz
|
|
|
|
tar xzfv WIP1_asr0_deepspeech2_online_wenetspeech_ckpt_1.0.0a.model.tar.gz
|
|
|
|
popd
|
|
|
|
fi
|
|
|
|
fi
|
|
|
|
|
|
|
|
if [ ! -f $unit ]; then
|
|
|
|
echo "$0: No such file $unit"
|
|
|
|
exit 1;
|
|
|
|
fi
|
|
|
|
|
|
|
|
if ! which ngram-count; then
|
|
|
|
pushd $MAIN_ROOT/tools
|
|
|
|
make srilm.done
|
|
|
|
popd
|
|
|
|
fi
|
|
|
|
|
|
|
|
mkdir -p data/local/dict
|
|
|
|
if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
|
|
|
|
# Prepare dict
|
|
|
|
# line: char/spm_pices
|
|
|
|
cp $unit data/local/dict/units.txt
|
|
|
|
|
|
|
|
if [ ! -f $lexicon ];then
|
|
|
|
utils/text_to_lexicon.py --has_key true --text $text --lexicon $lexicon
|
|
|
|
echo "Generate $lexicon from $text"
|
|
|
|
fi
|
|
|
|
|
|
|
|
# filter by vocab
|
|
|
|
# line: word ph0 ... phn -> line: word char0 ... charn
|
|
|
|
utils/fst/prepare_dict.py \
|
|
|
|
--unit_file $unit \
|
|
|
|
--in_lexicon ${lexicon} \
|
|
|
|
--out_lexicon data/local/dict/lexicon.txt
|
|
|
|
fi
|
|
|
|
|
|
|
|
lm=data/local/lm
|
|
|
|
mkdir -p $lm
|
|
|
|
|
|
|
|
if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
|
|
|
|
# Train lm
|
|
|
|
cp $text $lm/text
|
|
|
|
local/aishell_train_lms.sh
|
|
|
|
echo "build LM done."
|
|
|
|
fi
|
|
|
|
|
|
|
|
# build TLG
|
|
|
|
if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
|
|
|
|
# build T & L
|
|
|
|
utils/fst/compile_lexicon_token_fst.sh \
|
|
|
|
data/local/dict data/local/tmp data/local/lang
|
|
|
|
|
|
|
|
# build G & TLG
|
|
|
|
utils/fst/make_tlg.sh data/local/lm data/local/lang data/lang_test || exit 1;
|
|
|
|
|
|
|
|
fi
|
|
|
|
|
|
|
|
aishell_wav_scp=aishell_test.scp
|
|
|
|
nj=40
|
|
|
|
cmvn=$data/cmvn_fbank.ark
|
|
|
|
wfst=$data/lang_test
|
|
|
|
|
|
|
|
if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
|
|
|
|
|
|
|
|
if [ ! -d $data/test ]; then
|
|
|
|
pushd $data
|
|
|
|
wget -c https://paddlespeech.bj.bcebos.com/s2t/paddle_asr_online/aishell_test.zip
|
|
|
|
unzip aishell_test.zip
|
|
|
|
popd
|
|
|
|
|
|
|
|
realpath $data/test/*/*.wav > $data/wavlist
|
|
|
|
awk -F '/' '{ print $(NF) }' $data/wavlist | awk -F '.' '{ print $1 }' > $data/utt_id
|
|
|
|
paste $data/utt_id $data/wavlist > $data/$aishell_wav_scp
|
|
|
|
fi
|
|
|
|
|
|
|
|
./local/split_data.sh $data $data/$aishell_wav_scp $aishell_wav_scp $nj
|
|
|
|
|
|
|
|
cmvn-json2kaldi --json_file=$ckpt_dir/data/mean_std.json --cmvn_write_path=$cmvn
|
|
|
|
fi
|
|
|
|
|
|
|
|
wer=aishell_wer
|
|
|
|
label_file=aishell_result
|
|
|
|
export GLOG_logtostderr=1
|
|
|
|
|
|
|
|
if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ]; then
|
|
|
|
# TLG decoder
|
|
|
|
utils/run.pl JOB=1:$nj $data/split${nj}/JOB/check_tlg.log \
|
|
|
|
recognizer_main \
|
|
|
|
--wav_rspecifier=scp:$data/split${nj}/JOB/${aishell_wav_scp} \
|
|
|
|
--cmvn_file=$cmvn \
|
|
|
|
--model_path=$model_dir/avg_5.jit.pdmodel \
|
|
|
|
--streaming_chunk=30 \
|
|
|
|
--use_fbank=true \
|
|
|
|
--param_path=$model_dir/avg_5.jit.pdiparams \
|
|
|
|
--word_symbol_table=$wfst/words.txt \
|
|
|
|
--model_output_names=softmax_0.tmp_0,tmp_5,concat_0.tmp_0,concat_1.tmp_0 \
|
|
|
|
--model_cache_shapes="5-1-2048,5-1-2048" \
|
|
|
|
--graph_path=$wfst/TLG.fst --max_active=7500 \
|
|
|
|
--acoustic_scale=1.2 \
|
|
|
|
--result_wspecifier=ark,t:$data/split${nj}/JOB/result_check_tlg
|
|
|
|
|
|
|
|
cat $data/split${nj}/*/result_check_tlg > $exp/${label_file}_check_tlg
|
|
|
|
utils/compute-wer.py --char=1 --v=1 $text $exp/${label_file}_check_tlg > $exp/${wer}.check_tlg
|
|
|
|
echo "recognizer test have finished!!!"
|
|
|
|
echo "please checkout in ${exp}/${wer}.check_tlg"
|
|
|
|
fi
|
|
|
|
|
|
|
|
exit 0
|