You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
PaddleSpeech/utils/score_sclite.sh

129 lines
4.6 KiB

#!/usr/bin/env bash
set -e
# Copyright 2017 Johns Hopkins University (Shinji Watanabe)
# Apache 2.0 (http://www.apache.org/licenses/LICENSE-2.0)
[ -f ./path.sh ] && . ./path.sh
# non language symbol
nlsyms=""
wer=false
bpe=""
bpemodel=""
remove_blank=true
filter=""
num_spkrs=1
help_message="Usage: $0 <data-dir> <dict>"
. utils/parse_options.sh
if [ $# != 2 ]; then
echo "${help_message}"
exit 1;
fi
dir=$1
dic=$2
cat ${dir}/data.*.json > ${dir}/data.json
if [ $num_spkrs -eq 1 ]; then
json2trn.py ${dir}/data.json ${dic} --num-spkrs ${num_spkrs} --refs ${dir}/ref.trn --hyps ${dir}/hyp.trn
if ${remove_blank}; then
sed -i.bak2 -r 's/<blank> //g' ${dir}/hyp.trn
fi
if [ -n "${nlsyms}" ]; then
cp ${dir}/ref.trn ${dir}/ref.trn.org
cp ${dir}/hyp.trn ${dir}/hyp.trn.org
filt.py -v ${nlsyms} ${dir}/ref.trn.org > ${dir}/ref.trn
filt.py -v ${nlsyms} ${dir}/hyp.trn.org > ${dir}/hyp.trn
fi
if [ -n "${filter}" ]; then
sed -i.bak3 -f ${filter} ${dir}/hyp.trn
sed -i.bak3 -f ${filter} ${dir}/ref.trn
fi
sclite -r ${dir}/ref.trn trn -h ${dir}/hyp.trn trn -i rm -o all stdout > ${dir}/result.txt
echo "write a CER (or TER) result in ${dir}/result.txt"
grep -e Avg -e SPKR -m 2 ${dir}/result.txt
if ${wer}; then
if [ -n "$bpe" ]; then
spm_decode --model=${bpemodel} --input_format=piece < ${dir}/ref.trn | sed -e "s/▁/ /g" > ${dir}/ref.wrd.trn
spm_decode --model=${bpemodel} --input_format=piece < ${dir}/hyp.trn | sed -e "s/▁/ /g" > ${dir}/hyp.wrd.trn
else
sed -e "s/ //g" -e "s/(/ (/" -e "s/<space>/ /g" ${dir}/ref.trn > ${dir}/ref.wrd.trn
sed -e "s/ //g" -e "s/(/ (/" -e "s/<space>/ /g" ${dir}/hyp.trn > ${dir}/hyp.wrd.trn
fi
sclite -r ${dir}/ref.wrd.trn trn -h ${dir}/hyp.wrd.trn trn -i rm -o all stdout > ${dir}/result.wrd.txt
echo "write a WER result in ${dir}/result.wrd.txt"
grep -e Avg -e SPKR -m 2 ${dir}/result.wrd.txt
fi
elif [ ${num_spkrs} -lt 4 ]; then
ref_trns=""
hyp_trns=""
for i in $(seq ${num_spkrs}); do
ref_trns=${ref_trns}"${dir}/ref${i}.trn "
hyp_trns=${hyp_trns}"${dir}/hyp${i}.trn "
done
json2trn.py ${dir}/data.json ${dic} --num-spkrs ${num_spkrs} --refs ${ref_trns} --hyps ${hyp_trns}
for n in $(seq ${num_spkrs}); do
if ${remove_blank}; then
sed -i.bak2 -r 's/<blank> //g' ${dir}/hyp${n}.trn
fi
if [ -n "${nlsyms}" ]; then
cp ${dir}/ref${n}.trn ${dir}/ref${n}.trn.org
cp ${dir}/hyp${n}.trn ${dir}/hyp${n}.trn.org
filt.py -v ${nlsyms} ${dir}/ref${n}.trn.org > ${dir}/ref${n}.trn
filt.py -v ${nlsyms} ${dir}/hyp${n}.trn.org > ${dir}/hyp${n}.trn
fi
if [ -n "${filter}" ]; then
sed -i.bak3 -f ${filter} ${dir}/hyp${n}.trn
sed -i.bak3 -f ${filter} ${dir}/ref${n}.trn
fi
done
results_str=""
for (( i=0; i<$((num_spkrs * num_spkrs)); i++ )); do
ind_r=$((i / num_spkrs + 1))
ind_h=$((i % num_spkrs + 1))
results_str=${results_str}"${dir}/result_r${ind_r}h${ind_h}.txt "
sclite -r ${dir}/ref${ind_r}.trn trn -h ${dir}/hyp${ind_h}.trn trn -i rm -o all stdout > ${dir}/result_r${ind_r}h${ind_h}.txt
done
echo "write CER (or TER) results in ${dir}/result_r*h*.txt"
eval_perm_free_error.py --num-spkrs ${num_spkrs} \
${results_str} > ${dir}/min_perm_result.json
sed -n '2,4p' ${dir}/min_perm_result.json
if ${wer}; then
for n in $(seq ${num_spkrs}); do
if [ -n "$bpe" ]; then
spm_decode --model=${bpemodel} --input_format=piece < ${dir}/ref${n}.trn | sed -e "s/▁/ /g" > ${dir}/ref${n}.wrd.trn
spm_decode --model=${bpemodel} --input_format=piece < ${dir}/hyp${n}.trn | sed -e "s/▁/ /g" > ${dir}/hyp${n}.wrd.trn
else
sed -e "s/ //g" -e "s/(/ (/" -e "s/<space>/ /g" ${dir}/ref${n}.trn > ${dir}/ref${n}.wrd.trn
sed -e "s/ //g" -e "s/(/ (/" -e "s/<space>/ /g" ${dir}/hyp${n}.trn > ${dir}/hyp${n}.wrd.trn
fi
done
results_str=""
for (( i=0; i<$((num_spkrs * num_spkrs)); i++ )); do
ind_r=$((i / num_spkrs + 1))
ind_h=$((i % num_spkrs + 1))
results_str=${results_str}"${dir}/result_r${ind_r}h${ind_h}.wrd.txt "
sclite -r ${dir}/ref${ind_r}.wrd.trn trn -h ${dir}/hyp${ind_h}.wrd.trn trn -i rm -o all stdout > ${dir}/result_r${ind_r}h${ind_h}.wrd.txt
done
echo "write WER results in ${dir}/result_r*h*.wrd.txt"
eval_perm_free_error.py --num-spkrs ${num_spkrs} \
${results_str} > ${dir}/min_perm_result.wrd.json
sed -n '2,4p' ${dir}/min_perm_result.wrd.json
fi
fi