PaddleSpeech/examples/librispeech/asr2/local/test.sh

#!/bin/bash

set -e

expdir=exp
datadir=data
nj=32

lmtag='nolm'

train_set=train_960
recog_set="test-clean test-other dev-clean dev-other"
recog_set="test-clean"

# bpemode (unigram or bpe)
nbpe=5000
bpemode=unigram
bpeprefix=data/lang_char/${train_set}_${bpemode}${nbpe}
bpemodel=${bpeprefix}.model

config_path=conf/transformer.yaml
decode_config_path=conf/decode/decode_base.yaml
dict=data/lang_char/${train_set}_${bpemode}${nbpe}_units.txt
ckpt_prefix=

source ${MAIN_ROOT}/utils/parse_options.sh || exit 1;

if [ -z ${ckpt_prefix} ]; then
    echo "usage: $0 --ckpt_prefix ckpt_prefix"
    exit 1
fi

ngpu=$(echo $CUDA_VISIBLE_DEVICES | awk -F "," '{print NF}')
echo "using $ngpu gpus..."

ckpt_dir=$(dirname `dirname ${ckpt_prefix}`)
echo "ckpt dir: ${ckpt_dir}"

ckpt_tag=$(basename ${ckpt_prefix})
echo "ckpt tag: ${ckpt_tag}"

chunk_mode=false
if [[ ${config_path} =~ ^.*chunk_.*yaml$ ]];then
    chunk_mode=true
fi
echo "chunk mode: ${chunk_mode}"


# download language model
#bash local/download_lm_en.sh
#if [ $? -ne 0 ]; then
#    exit 1
#fi

pids=() # initialize pids

for dmethd in attention ctc_greedy_search ctc_prefix_beam_search attention_rescoring; do
(
    echo "decode method: ${dmethd}"
    for rtask in ${recog_set}; do
    (
        echo "dataset: ${rtask}"
        decode_dir=${ckpt_dir}/decode/decode_${rtask/-/_}_${dmethd}_$(basename ${config_path%.*})_${lmtag}_${ckpt_tag}
        feat_recog_dir=${datadir}
        mkdir -p ${decode_dir}
        mkdir -p ${feat_recog_dir}

        # split data
        split_json.sh manifest.${rtask} ${nj}

        #### use CPU for decoding
        ngpu=0

        # set batchsize 0 to disable batch decoding
        batch_size=1
        ${decode_cmd} JOB=1:${nj} ${decode_dir}/log/decode.JOB.log \
            python3 -u ${BIN_DIR}/test.py \
            --model-name u2_kaldi \
            --run-mode test \
            --ngpu ${ngpu} \
            --dict-path ${dict} \
            --config ${config_path} \
            --decode_cfg ${decode_config_path} \
            --checkpoint_path ${ckpt_prefix} \
            --result-file ${decode_dir}/data.JOB.json \
            --opts decode.decoding_method ${dmethd} \
            --opts decode.decode_batch_size ${batch_size} \
            --opts test_manifest ${feat_recog_dir}/split${nj}/JOB/manifest.${rtask}

        score_sclite.sh --bpe ${nbpe} --bpemodel ${bpemodel} --wer false ${decode_dir} ${dict}

    ) &
    pids+=($!) # store background pids
    i=0; for pid in "${pids[@]}"; do wait ${pid} || ((++i)); done
    [ ${i} -gt 0 ] && echo "$0: ${i} background jobs are failed." || true
    done
)
done

echo "Finished"

exit 0
librispeech s2 for kaldi feat 3 years ago			`#!/bin/bash`

librispeech s1 support multi process decode and sclite 3 years ago			`set -e`

fix set_device; more utils; args.opts support multi same name 3 years ago			`expdir=exp`
			`datadir=data`
librispeech s1 support multi process decode and sclite 3 years ago			`nj=32`
fix set_device; more utils; args.opts support multi same name 3 years ago
u2 kaldi wer4p0 3 years ago			`lmtag='nolm'`
fix set_device; more utils; args.opts support multi same name 3 years ago
transform; librispeech/s2 data process ok 3 years ago			`train_set=train_960`
fix set_device; more utils; args.opts support multi same name 3 years ago			`recog_set="test-clean test-other dev-clean dev-other"`
			`recog_set="test-clean"`

			`# bpemode (unigram or bpe)`
			`nbpe=5000`
			`bpemode=unigram`
transform; librispeech/s2 data process ok 3 years ago			`bpeprefix=data/lang_char/${train_set}_${bpemode}${nbpe}`
fix set_device; more utils; args.opts support multi same name 3 years ago			`bpemodel=${bpeprefix}.model`

recog into decoders, format code 3 years ago			`config_path=conf/transformer.yaml`
change all recipes 3 years ago			`decode_config_path=conf/decode/decode_base.yaml`
transform; librispeech/s2 data process ok 3 years ago			`dict=data/lang_char/${train_set}_${bpemode}${nbpe}_units.txt`
fix some bug, test=asr 3 years ago			`ckpt_prefix=`
recog into decoders, format code 3 years ago
			`source ${MAIN_ROOT}/utils/parse_options.sh \|\| exit 1;`

			`if [ -z ${ckpt_prefix} ]; then`
			`echo "usage: $0 --ckpt_prefix ckpt_prefix"`
			`exit 1`
librispeech s2 for kaldi feat 3 years ago			`fi`

			`ngpu=$(echo $CUDA_VISIBLE_DEVICES \| awk -F "," '{print NF}')`
			`echo "using $ngpu gpus..."`

u2 kaldi wer4p0 3 years ago			ckpt_dir=$(dirname `dirname ${ckpt_prefix}`)
			`echo "ckpt dir: ${ckpt_dir}"`

			`ckpt_tag=$(basename ${ckpt_prefix})`
			`echo "ckpt tag: ${ckpt_tag}"`

librispeech s2 for kaldi feat 3 years ago			`chunk_mode=false`
			`if [[ ${config_path} =~ ^.chunk_.yaml$ ]];then`
			`chunk_mode=true`
			`fi`
u2 kaldi wer4p0 3 years ago			`echo "chunk mode: ${chunk_mode}"`
librispeech s2 for kaldi feat 3 years ago

			`# download language model`
			`#bash local/download_lm_en.sh`
			`#if [ $? -ne 0 ]; then`
			`# exit 1`
			`#fi`

fix set_device; more utils; args.opts support multi same name 3 years ago			`pids=() # initialize pids`

			`for dmethd in attention ctc_greedy_search ctc_prefix_beam_search attention_rescoring; do`
			`(`
u2 kaldi wer4p0 3 years ago			`echo "decode method: ${dmethd}"`
fix set_device; more utils; args.opts support multi same name 3 years ago			`for rtask in ${recog_set}; do`
			`(`
u2 kaldi wer4p0 3 years ago			`echo "dataset: ${rtask}"`
			`decode_dir=${ckpt_dir}/decode/decode_${rtask/-/_}_${dmethd}_$(basename ${config_path%.*})_${lmtag}_${ckpt_tag}`
fix set_device; more utils; args.opts support multi same name 3 years ago			`feat_recog_dir=${datadir}`
u2 kaldi wer4p0 3 years ago			`mkdir -p ${decode_dir}`
fix set_device; more utils; args.opts support multi same name 3 years ago			`mkdir -p ${feat_recog_dir}`

			`# split data`
u2 kaldi mutli process test with batchsize one 3 years ago			`split_json.sh manifest.${rtask} ${nj}`
fix set_device; more utils; args.opts support multi same name 3 years ago
			`#### use CPU for decoding`
			`ngpu=0`

			`# set batchsize 0 to disable batch decoding`
fix espnet kaldi libri s2 config 3 years ago			`batch_size=1`
u2 kaldi wer4p0 3 years ago			`${decode_cmd} JOB=1:${nj} ${decode_dir}/log/decode.JOB.log \`
fix set_device; more utils; args.opts support multi same name 3 years ago			`python3 -u ${BIN_DIR}/test.py \`
			`--model-name u2_kaldi \`
			`--run-mode test \`
nproc to ngpu 3 years ago			`--ngpu ${ngpu} \`
fix set_device; more utils; args.opts support multi same name 3 years ago			`--dict-path ${dict} \`
			`--config ${config_path} \`
change all recipes 3 years ago			`--decode_cfg ${decode_config_path} \`
fix set_device; more utils; args.opts support multi same name 3 years ago			`--checkpoint_path ${ckpt_prefix} \`
u2 kaldi wer4p0 3 years ago			`--result-file ${decode_dir}/data.JOB.json \`
change all recipes 3 years ago			`--opts decode.decoding_method ${dmethd} \`
			`--opts decode.decode_batch_size ${batch_size} \`
			`--opts test_manifest ${feat_recog_dir}/split${nj}/JOB/manifest.${rtask}`
fix set_device; more utils; args.opts support multi same name 3 years ago
decoder with ctc prefix score 3 years ago			`score_sclite.sh --bpe ${nbpe} --bpemodel ${bpemodel} --wer false ${decode_dir} ${dict}`
librispeech s2 for kaldi feat 3 years ago
fix set_device; more utils; args.opts support multi same name 3 years ago			`) &`
			`pids+=($!) # store background pids`
u2 kaldi mutli process test with batchsize one 3 years ago			`i=0; for pid in "${pids[@]}"; do wait ${pid} \|\| ((++i)); done`
fix error condition 3 years ago			`[ ${i} -gt 0 ] && echo "$0: ${i} background jobs are failed." \|\| true`
fix set_device; more utils; args.opts support multi same name 3 years ago			`done`
u2 kaldi mutli process test with batchsize one 3 years ago			`)`
librispeech s2 for kaldi feat 3 years ago			`done`

fix set_device; more utils; args.opts support multi same name 3 years ago			`echo "Finished"`
librispeech s2 for kaldi feat 3 years ago
			`exit 0`