You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
104 lines
2.5 KiB
104 lines
2.5 KiB
3 years ago
|
#!/bin/bash
|
||
|
|
||
|
set -e
|
||
|
|
||
|
expdir=exp
|
||
|
datadir=data
|
||
|
nj=32
|
||
|
|
||
|
decode_config=conf/decode/decode.yaml
|
||
|
lang_model=rnnlm.model.best
|
||
|
lmexpdir=exp/train_rnnlm_pytorch_lm_transformer_cosine_batchsize32_lr1e-4_layer16_unigram5000_ngpu4/
|
||
|
|
||
|
lmtag='nolm'
|
||
|
|
||
|
recog_set="test-clean test-other dev-clean dev-other"
|
||
|
recog_set="test-clean"
|
||
|
|
||
|
# bpemode (unigram or bpe)
|
||
|
nbpe=5000
|
||
|
bpemode=unigram
|
||
|
bpeprefix="data/bpe_${bpemode}_${nbpe}"
|
||
|
bpemodel=${bpeprefix}.model
|
||
|
|
||
|
if [ $# != 3 ];then
|
||
|
echo "usage: ${0} config_path dict_path ckpt_path_prefix"
|
||
|
exit -1
|
||
|
fi
|
||
|
|
||
|
ngpu=$(echo $CUDA_VISIBLE_DEVICES | awk -F "," '{print NF}')
|
||
|
echo "using $ngpu gpus..."
|
||
|
|
||
|
config_path=$1
|
||
|
dict=$2
|
||
|
ckpt_prefix=$3
|
||
|
|
||
|
ckpt_dir=$(dirname `dirname ${ckpt_prefix}`)
|
||
|
echo "ckpt dir: ${ckpt_dir}"
|
||
|
|
||
|
ckpt_tag=$(basename ${ckpt_prefix})
|
||
|
echo "ckpt tag: ${ckpt_tag}"
|
||
|
|
||
|
chunk_mode=false
|
||
|
if [[ ${config_path} =~ ^.*chunk_.*yaml$ ]];then
|
||
|
chunk_mode=true
|
||
|
fi
|
||
|
echo "chunk mode: ${chunk_mode}"
|
||
|
echo "decode conf: ${decode_config}"
|
||
|
|
||
|
# download language model
|
||
|
#bash local/download_lm_en.sh
|
||
|
#if [ $? -ne 0 ]; then
|
||
|
# exit 1
|
||
|
#fi
|
||
|
|
||
|
|
||
|
pids=() # initialize pids
|
||
|
|
||
|
for dmethd in join_ctc; do
|
||
|
(
|
||
|
echo "${dmethd} decoding"
|
||
|
for rtask in ${recog_set}; do
|
||
|
(
|
||
|
echo "${rtask} dataset"
|
||
|
decode_dir=${ckpt_dir}/decode/decode_${rtask/-/_}_${dmethd}_$(basename ${config_path%.*})_${lmtag}_${ckpt_tag}
|
||
|
feat_recog_dir=${datadir}
|
||
|
mkdir -p ${decode_dir}
|
||
|
mkdir -p ${feat_recog_dir}
|
||
|
|
||
|
# split data
|
||
|
split_json.sh manifest.${rtask} ${nj}
|
||
|
|
||
|
#### use CPU for decoding
|
||
|
ngpu=0
|
||
|
|
||
|
# set batchsize 0 to disable batch decoding
|
||
|
${decode_cmd} JOB=1:${nj} ${decode_dir}/log/decode.JOB.log \
|
||
|
python3 -u ${BIN_DIR}/recog.py \
|
||
|
--api v2 \
|
||
|
--config ${decode_config} \
|
||
|
--ngpu ${ngpu} \
|
||
|
--batchsize 0 \
|
||
|
--checkpoint_path ${ckpt_prefix} \
|
||
|
--dict-path ${dict} \
|
||
|
--recog-json ${feat_recog_dir}/split${nj}/JOB/manifest.${rtask} \
|
||
|
--result-label ${decode_dir}/data.JOB.json \
|
||
|
--model-conf ${config_path} \
|
||
|
--model ${ckpt_prefix}.pdparams
|
||
|
|
||
|
#--rnnlm ${lmexpdir}/${lang_model} \
|
||
|
|
||
|
score_sclite.sh --bpe ${nbpe} --bpemodel ${bpemodel} --wer false ${decode_dir} ${dict}
|
||
|
|
||
|
) &
|
||
|
pids+=($!) # store background pids
|
||
|
i=0; for pid in "${pids[@]}"; do wait ${pid} || ((++i)); done
|
||
|
[ ${i} -gt 0 ] && echo "$0: ${i} background jobs are failed." || true
|
||
|
done
|
||
|
)
|
||
|
done
|
||
|
|
||
|
echo "Finished"
|
||
|
|
||
|
exit 0
|