#!/usr/bin/env bash set -e # Copyright 2017 Johns Hopkins University (Shinji Watanabe) # Apache 2.0 (http://www.apache.org/licenses/LICENSE-2.0) [ -f ./path.sh ] && . ./path.sh # non language symbol nlsyms="" wer=false bpe="" bpemodel="" remove_blank=true filter="" num_spkrs=1 help_message="Usage: $0 <data-dir> <dict>" . utils/parse_options.sh if [ $# != 2 ]; then echo "${help_message}" exit 1; fi dir=$1 dic=$2 cat ${dir}/data.*.json > ${dir}/data.json if [ $num_spkrs -eq 1 ]; then json2trn.py ${dir}/data.json ${dic} --num-spkrs ${num_spkrs} --refs ${dir}/ref.trn --hyps ${dir}/hyp.trn if ${remove_blank}; then sed -i.bak2 -r 's/<blank> //g' ${dir}/hyp.trn fi if [ -n "${nlsyms}" ]; then cp ${dir}/ref.trn ${dir}/ref.trn.org cp ${dir}/hyp.trn ${dir}/hyp.trn.org filt.py -v ${nlsyms} ${dir}/ref.trn.org > ${dir}/ref.trn filt.py -v ${nlsyms} ${dir}/hyp.trn.org > ${dir}/hyp.trn fi if [ -n "${filter}" ]; then sed -i.bak3 -f ${filter} ${dir}/hyp.trn sed -i.bak3 -f ${filter} ${dir}/ref.trn fi sclite -r ${dir}/ref.trn trn -h ${dir}/hyp.trn trn -i rm -o all stdout > ${dir}/result.txt echo "write a CER (or TER) result in ${dir}/result.txt" grep -e Avg -e SPKR -m 2 ${dir}/result.txt if ${wer}; then if [ -n "$bpe" ]; then spm_decode --model=${bpemodel} --input_format=piece < ${dir}/ref.trn | sed -e "s/▁/ /g" > ${dir}/ref.wrd.trn spm_decode --model=${bpemodel} --input_format=piece < ${dir}/hyp.trn | sed -e "s/▁/ /g" > ${dir}/hyp.wrd.trn else sed -e "s/ //g" -e "s/(/ (/" -e "s/<space>/ /g" ${dir}/ref.trn > ${dir}/ref.wrd.trn sed -e "s/ //g" -e "s/(/ (/" -e "s/<space>/ /g" ${dir}/hyp.trn > ${dir}/hyp.wrd.trn fi sclite -r ${dir}/ref.wrd.trn trn -h ${dir}/hyp.wrd.trn trn -i rm -o all stdout > ${dir}/result.wrd.txt echo "write a WER result in ${dir}/result.wrd.txt" grep -e Avg -e SPKR -m 2 ${dir}/result.wrd.txt fi elif [ ${num_spkrs} -lt 4 ]; then ref_trns="" hyp_trns="" for i in $(seq ${num_spkrs}); do ref_trns=${ref_trns}"${dir}/ref${i}.trn " hyp_trns=${hyp_trns}"${dir}/hyp${i}.trn " done json2trn.py ${dir}/data.json ${dic} --num-spkrs ${num_spkrs} --refs ${ref_trns} --hyps ${hyp_trns} for n in $(seq ${num_spkrs}); do if ${remove_blank}; then sed -i.bak2 -r 's/<blank> //g' ${dir}/hyp${n}.trn fi if [ -n "${nlsyms}" ]; then cp ${dir}/ref${n}.trn ${dir}/ref${n}.trn.org cp ${dir}/hyp${n}.trn ${dir}/hyp${n}.trn.org filt.py -v ${nlsyms} ${dir}/ref${n}.trn.org > ${dir}/ref${n}.trn filt.py -v ${nlsyms} ${dir}/hyp${n}.trn.org > ${dir}/hyp${n}.trn fi if [ -n "${filter}" ]; then sed -i.bak3 -f ${filter} ${dir}/hyp${n}.trn sed -i.bak3 -f ${filter} ${dir}/ref${n}.trn fi done results_str="" for (( i=0; i<$((num_spkrs * num_spkrs)); i++ )); do ind_r=$((i / num_spkrs + 1)) ind_h=$((i % num_spkrs + 1)) results_str=${results_str}"${dir}/result_r${ind_r}h${ind_h}.txt " sclite -r ${dir}/ref${ind_r}.trn trn -h ${dir}/hyp${ind_h}.trn trn -i rm -o all stdout > ${dir}/result_r${ind_r}h${ind_h}.txt done echo "write CER (or TER) results in ${dir}/result_r*h*.txt" eval_perm_free_error.py --num-spkrs ${num_spkrs} \ ${results_str} > ${dir}/min_perm_result.json sed -n '2,4p' ${dir}/min_perm_result.json if ${wer}; then for n in $(seq ${num_spkrs}); do if [ -n "$bpe" ]; then spm_decode --model=${bpemodel} --input_format=piece < ${dir}/ref${n}.trn | sed -e "s/▁/ /g" > ${dir}/ref${n}.wrd.trn spm_decode --model=${bpemodel} --input_format=piece < ${dir}/hyp${n}.trn | sed -e "s/▁/ /g" > ${dir}/hyp${n}.wrd.trn else sed -e "s/ //g" -e "s/(/ (/" -e "s/<space>/ /g" ${dir}/ref${n}.trn > ${dir}/ref${n}.wrd.trn sed -e "s/ //g" -e "s/(/ (/" -e "s/<space>/ /g" ${dir}/hyp${n}.trn > ${dir}/hyp${n}.wrd.trn fi done results_str="" for (( i=0; i<$((num_spkrs * num_spkrs)); i++ )); do ind_r=$((i / num_spkrs + 1)) ind_h=$((i % num_spkrs + 1)) results_str=${results_str}"${dir}/result_r${ind_r}h${ind_h}.wrd.txt " sclite -r ${dir}/ref${ind_r}.wrd.trn trn -h ${dir}/hyp${ind_h}.wrd.trn trn -i rm -o all stdout > ${dir}/result_r${ind_r}h${ind_h}.wrd.txt done echo "write WER results in ${dir}/result_r*h*.wrd.txt" eval_perm_free_error.py --num-spkrs ${num_spkrs} \ ${results_str} > ${dir}/min_perm_result.wrd.json sed -n '2,4p' ${dir}/min_perm_result.wrd.json fi fi