You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
PaddleSpeech/examples/librispeech/asr2/local/cacu_perplexity.sh

54 lines
1.1 KiB

#!/bin/bash
set -e
stage=-1
stop_stage=100
expdir=exp
datadir=data
ngpu=0
# lm params
rnnlm_config_path=conf/lm/transformer.yaml
lmexpdir=exp/lm/transformer
lang_model=transformerLM.pdparams
#data path
test_set=${datadir}/test_clean/text
test_set_lower=${datadir}/test_clean/text_lower
train_set=train_960
# bpemode (unigram or bpe)
nbpe=5000
bpemode=unigram
bpeprefix=${datadir}/lang_char/${train_set}_${bpemode}${nbpe}
bpemodel=${bpeprefix}.model
vocabfile=${bpeprefix}_units.txt
vocabfile_lower=${bpeprefix}_units_lower.txt
output_dir=${expdir}/lm/transformer/perplexity
mkdir -p ${output_dir}
# Transform the data upper case to lower
if [ -f ${vocabfile} ]; then
tr A-Z a-z < ${vocabfile} > ${vocabfile_lower}
fi
if [ -f ${test_set} ]; then
tr A-Z a-z < ${test_set} > ${test_set_lower}
fi
python ${LM_BIN_DIR}/cacu_perplexity.py \
--rnnlm ${lmexpdir}/${lang_model} \
--rnnlm-conf ${rnnlm_config_path} \
--vocab_path ${vocabfile_lower} \
--bpeprefix ${bpeprefix} \
--text_path ${test_set_lower} \
--output_dir ${output_dir} \
--ngpu ${ngpu}