You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
89 lines
2.6 KiB
89 lines
2.6 KiB
#!/bin/bash
|
|
set +x
|
|
set -e
|
|
|
|
export GLOG_logtostderr=1
|
|
|
|
. ./path.sh || exit 1;
|
|
|
|
# ds2 means deepspeech2 (acoutic model type)
|
|
dir=$PWD/exp/ds2_graph_with_slot
|
|
data=$PWD/data
|
|
stage=0
|
|
stop_stage=10
|
|
|
|
mkdir -p $dir
|
|
|
|
model_dir=$PWD/resource/model
|
|
vocab=$model_dir/vocab.txt
|
|
cmvn=$data/cmvn.ark
|
|
text_with_slot=$data/text_with_slot
|
|
resource=$PWD/resource
|
|
# download resource
|
|
if [ ! -f $cmvn ]; then
|
|
wget -c https://paddlespeech.bj.bcebos.com/s2t/paddle_asr_online/resource.tar.gz
|
|
tar xzfv resource.tar.gz
|
|
ln -s ./resource/data .
|
|
fi
|
|
|
|
if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
|
|
# make dict
|
|
unit_file=$vocab
|
|
mkdir -p $dir/local/dict
|
|
cp $unit_file $dir/local/dict/units.txt
|
|
cp $text_with_slot $dir/train_text
|
|
utils/fst/prepare_dict.py --unit_file $unit_file --in_lexicon $data/lexicon.txt \
|
|
--out_lexicon $dir/local/dict/lexicon.txt
|
|
# add slot to lexicon, just in case the lm training script filter the slot.
|
|
echo "<MONEY_SLOT> 一" >> $dir/local/dict/lexicon.txt
|
|
echo "<DATE_SLOT> 一" >> $dir/local/dict/lexicon.txt
|
|
echo "<ADDRESS_SLOT> 一" >> $dir/local/dict/lexicon.txt
|
|
echo "<YEAR_SLOT> 一" >> $dir/local/dict/lexicon.txt
|
|
echo "<TIME_SLOT> 一" >> $dir/local/dict/lexicon.txt
|
|
fi
|
|
|
|
if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
|
|
# train lm
|
|
lm=$dir/local/lm
|
|
mkdir -p $lm
|
|
# this script is different with the common lm training script
|
|
local/train_lm_with_slot.sh
|
|
fi
|
|
|
|
if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
|
|
# make T & L
|
|
local/compile_lexicon_token_fst.sh $dir/local/dict $dir/local/tmp $dir/local/lang
|
|
mkdir -p $dir/local/lang_test
|
|
# make slot graph
|
|
local/mk_slot_graph.sh $resource/graph $dir/local/lang_test
|
|
# make TLG
|
|
local/mk_tlg_with_slot.sh $dir/local/lm $dir/local/lang $dir/local/lang_test || exit 1;
|
|
mv $dir/local/lang_test/TLG.fst $dir/local/lang/
|
|
fi
|
|
|
|
if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ]; then
|
|
# test TLG
|
|
model_dir=$PWD/resource/model
|
|
cmvn=$data/cmvn.ark
|
|
wav_scp=$data/wav.scp
|
|
graph=$dir/local/lang
|
|
|
|
recognizer_test_main \
|
|
--wav_rspecifier=scp:$wav_scp \
|
|
--cmvn_file=$cmvn \
|
|
--streaming_chunk=30 \
|
|
--use_fbank=true \
|
|
--model_path=$model_dir/avg_10.jit.pdmodel \
|
|
--param_path=$model_dir/avg_10.jit.pdiparams \
|
|
--model_cache_shapes="5-1-2048,5-1-2048" \
|
|
--model_output_names=softmax_0.tmp_0,tmp_5,concat_0.tmp_0,concat_1.tmp_0 \
|
|
--word_symbol_table=$graph/words.txt \
|
|
--graph_path=$graph/TLG.fst --max_active=7500 \
|
|
--acoustic_scale=12 \
|
|
--result_wspecifier=ark,t:./exp/result_run.txt
|
|
|
|
# the data/wav.trans is the label.
|
|
utils/compute-wer.py --char=1 --v=1 data/wav.trans exp/result_run.txt > exp/wer_run
|
|
tail -n 7 exp/wer_run
|
|
fi
|