You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
37 lines
675 B
37 lines
675 B
#!/bin/bash
|
|
set -e
|
|
|
|
stage=0
|
|
stop_stage=100
|
|
|
|
order=5
|
|
mem=80%
|
|
prune=0
|
|
a=22
|
|
q=8
|
|
b=8
|
|
|
|
source ${MAIN_ROOT}/utils/parse_options.sh || exit 1;
|
|
|
|
if [ $# != 3 ]; then
|
|
echo "$0 token_type exp/text exp/text.arpa"
|
|
echo $@
|
|
exit 1
|
|
fi
|
|
|
|
# char or word
|
|
type=$1
|
|
text=$2
|
|
arpa=$3
|
|
|
|
if [ $stage -le 0 ] && [ $stop_stage -ge 0 ];then
|
|
# text tn & wordseg preprocess
|
|
echo "process text."
|
|
python3 ${MAIN_ROOT}/utils/zh_tn.py ${type} ${text} ${text}.${type}.tn
|
|
fi
|
|
|
|
if [ $stage -le 1 ] && [ $stop_stage -ge 1 ];then
|
|
# train ngram lm
|
|
echo "build lm."
|
|
bash ${MAIN_ROOT}/utils/ngram_train.sh --order ${order} --mem ${mem} --prune "${prune}" ${text}.${type}.tn ${arpa}
|
|
fi |