You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
PaddleSpeech/utils/ngram_train.sh

29 lines
573 B

#!/bin/bash
set -e
order=5
mem=80%
prune=0
a=22
q=8
b=8
source ${MAIN_ROOT}/utils/parse_options.sh || exit 1;
if [ $# != 2 ]; then
echo "$0 exp/text exp/text.arpa"
exit 1
fi
text=${1}
arpa=${2}
lmbin=${2}.klm.bin
# https://kheafield.com/code/kenlm/estimation/
echo "build arpa lm."
lmplz -o ${order} -S ${mem} --prune ${prune} < ${text} > ${arpa} || { echo "train kenlm error!"; exit -1; }
# https://kheafield.com/code/kenlm/
echo "build binary lm."
build_binary -a ${a} -q ${q} -b ${b} trie ${arpa} ${lmbin} || { echo "build kenlm binary error!"; exit -1; }