fix utils for ngram and wfst

3 years ago · 37d9c08da5
parent cf1a395e15
commit 37d9c08da5
17 changed files with 374 additions and 195 deletions
--- a/.gitignore
+++ b/.gitignore
@ -33,6 +33,12 @@ tools/Miniconda3-latest-Linux-x86_64.sh
 tools/activate_python.sh
 tools/miniconda.sh
 tools/CRF++-0.58/
+tools/liblbfgs-1.10/
+tools/srilm/
+tools/env.sh
+tools/openfst-1.8.1/
+tools/libsndfile/
+tools/python-soundfile/

 speechx/fc_patch/

--- a/speechx/examples/build_wfst/path.sh
+++ b/speechx/examples/build_wfst/path.sh
@ -0,0 +1,27 @@
+# This contains the locations of binarys build required for running the examples.
+
+SPEECHX_ROOT=$PWD/../../../
+MAIN_ROOT=$SPEECHX_ROOT/../
+SPEECHX_EXAMPLES=$SPEECHX_ROOT/build/examples
+
+SPEECHX_TOOLS=$SPEECHX_ROOT/tools
+TOOLS_BIN=$SPEECHX_TOOLS/valgrind/install/bin
+
+[ -d $SPEECHX_EXAMPLES ] || { echo "Error: 'build/examples' directory not found. please ensure that the project build successfully"; }
+
+export LC_AL=C
+
+export PATH=$PATH:$TOOLS_BIN
+
+# srilm
+export LIBLBFGS=${MAIN_ROOT}/tools/liblbfgs-1.10
+export LD_LIBRARY_PATH=${LD_LIBRARY_PATH:-}:${LIBLBFGS}/lib/.libs
+export SRILM=${MAIN_ROOT}/tools/srilm
+export PATH=${PATH}:${SRILM}/bin:${SRILM}/bin/i686-m64
+
+# Kaldi
+export KALDI_ROOT=${MAIN_ROOT}/tools/kaldi
+[ -f $KALDI_ROOT/tools/env.sh ] && . $KALDI_ROOT/tools/env.sh
+export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$PWD:$PATH
+[ ! -f $KALDI_ROOT/tools/config/common_path.sh ] && echo >&2 "The standard file $KALDI_ROOT/tools/config/common_path.sh is not present, can not using Kaldi!"
+[ -f $KALDI_ROOT/tools/config/common_path.sh ] && . $KALDI_ROOT/tools/config/common_path.sh
--- a/speechx/examples/build_wfst/run.sh
+++ b/speechx/examples/build_wfst/run.sh
@ -0,0 +1,64 @@
+#!/bin/bash
+set -eo pipefail
+
+. path.sh
+
+stage=-1
+stop_stage=100
+corpus=aishell
+lmtype=srilm
+
+lexicon=  # aishell/resource_aishell/lexicon.txt
+text=     # aishell/data_aishell/transcript/aishell_transcript_v0.8.txt
+
+source parse_options.sh
+
+if [ ! which ngram-count ]; then
+    pushd $MAIN_ROOT/tools
+    make srilm.done
+    popd
+fi
+
+if [ ! which fstprint ]; then
+    pushd $MAIN_ROOT/tools
+    make kaldi.done
+    popd
+fi
+
+if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
+    # 7.1 Prepare dict
+    unit_file=data/vocab.txt
+    mkdir -p data/local/dict
+    cp $unit_file data/local/dict/units.txt
+    utils/fst/prepare_dict.py \
+        --unit_file $unit_file \
+        --in_lexicon ${lexicon} \
+        --out_lexicon data/local/dict/lexicon.txt
+fi
+
+if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
+    # 7.2 Train lm
+    lm=data/local/lm
+    mkdir -p data/train
+    mkdir -p $lm
+    utils/manifest_key_value.py \
+        --manifest_path data/manifest.train \
+        --output_path data/train
+    utils/filter_scp.pl data/train/text \
+        $text > $lm/text
+    if [ $lmtype == 'srilm' ];then
+        local/aishell_train_lms.sh
+    else
+        utils/ngram_train.sh --order 3 $lm/text $lm/lm.arpa
+    fi
+fi
+
+if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then 
+    # 7.3 Build decoding TLG
+    utils/fst/compile_lexicon_token_fst.sh \
+        data/local/dict data/local/tmp data/local/lang
+    utils/fst/make_tlg.sh data/local/lm data/local/lang data/lang_test || exit 1;
+fi
+
+echo "Aishell build TLG done."
+exit 0
--- a/speechx/examples/ds2_ol/aishell/README.md
+++ b/speechx/examples/ds2_ol/aishell/README.md
@ -10,12 +10,18 @@ Other -> 0.00 % N=0 C=0 S=0 D=0 I=0

 ## CTC Prefix Beam Search w LM

+LM: zh_giga.no_cna_cmn.prune01244.klm
 ```
-
+Overall -> 7.86 % N=104768 C=96865 S=7573 D=330 I=327
+Mandarin -> 7.86 % N=104768 C=96865 S=7573 D=330 I=327
+Other -> 0.00 % N=0 C=0 S=0 D=0 I=0
 ```

 ## CTC WFST

+LM: aishell train
 ```
-
+Overall -> 11.14 % N=103017 C=93363 S=9583 D=71 I=1819
+Mandarin -> 11.14 % N=103017 C=93363 S=9583 D=71 I=1818
+Other -> 0.00 % N=0 C=0 S=0 D=0 I=1
 ```
--- a/speechx/examples/ds2_ol/aishell/path.sh
+++ b/speechx/examples/ds2_ol/aishell/path.sh
@ -11,4 +11,4 @@ TOOLS_BIN=$SPEECHX_TOOLS/valgrind/install/bin
 export LC_AL=C

 SPEECHX_BIN=$SPEECHX_EXAMPLES/ds2_ol/decoder:$SPEECHX_EXAMPLES/ds2_ol/feat
-export PATH=$PATH:$SPEECHX_BIN:$TOOLS_BIN
+export PATH=$PATH:$SPEECHX_BIN:$TOOLS_BIN
--- a/speechx/examples/ds2_ol/aishell/run.sh
+++ b/speechx/examples/ds2_ol/aishell/run.sh
@ -5,7 +5,10 @@ set -e
 . path.sh

 nj=40
+stage=0
+stop_stage=100

+. utils/parse_options.sh

 # 1. compile
 if [ ! -d ${SPEECHX_EXAMPLES} ]; then
@ -26,102 +29,112 @@ vocb_dir=$ckpt_dir/data/lang_char/
 mkdir -p exp
 exp=$PWD/exp

-aishell_wav_scp=aishell_test.scp
-if [ ! -d $data/test ]; then
-    pushd $data
-    wget -c https://paddlespeech.bj.bcebos.com/s2t/paddle_asr_online/aishell_test.zip
-    unzip  aishell_test.zip
-    popd
-
-    realpath $data/test/*/*.wav > $data/wavlist
-    awk -F '/' '{ print $(NF) }' $data/wavlist | awk -F '.' '{ print $1 }' > $data/utt_id
-    paste $data/utt_id $data/wavlist > $data/$aishell_wav_scp
-fi
-
-
-if [ ! -d $ckpt_dir ]; then
-    mkdir -p $ckpt_dir
-    wget -P $ckpt_dir -c https://paddlespeech.bj.bcebos.com/s2t/aishell/asr0/asr0_deepspeech2_online_aishell_ckpt_0.2.0.model.tar.gz
-    tar xzfv $model_dir/asr0_deepspeech2_online_aishell_ckpt_0.2.0.model.tar.gz -C $ckpt_dir
-fi
-
-lm=$data/zh_giga.no_cna_cmn.prune01244.klm
-if [ ! -f $lm ]; then
-    pushd $data
-    wget -c https://deepspeech.bj.bcebos.com/zh_lm/zh_giga.no_cna_cmn.prune01244.klm
-    popd
+if [ $stage -le 0 ] && [ $stop_stage -ge 0 ];then
+    aishell_wav_scp=aishell_test.scp
+    if [ ! -d $data/test ]; then
+        pushd $data
+        wget -c https://paddlespeech.bj.bcebos.com/s2t/paddle_asr_online/aishell_test.zip
+        unzip  aishell_test.zip
+        popd
+
+        realpath $data/test/*/*.wav > $data/wavlist
+        awk -F '/' '{ print $(NF) }' $data/wavlist | awk -F '.' '{ print $1 }' > $data/utt_id
+        paste $data/utt_id $data/wavlist > $data/$aishell_wav_scp
+    fi
+
+
+    if [ ! -d $ckpt_dir ]; then
+        mkdir -p $ckpt_dir
+        wget -P $ckpt_dir -c https://paddlespeech.bj.bcebos.com/s2t/aishell/asr0/asr0_deepspeech2_online_aishell_ckpt_0.2.0.model.tar.gz
+        tar xzfv $model_dir/asr0_deepspeech2_online_aishell_ckpt_0.2.0.model.tar.gz -C $ckpt_dir
+    fi
+
+    lm=$data/zh_giga.no_cna_cmn.prune01244.klm
+    if [ ! -f $lm ]; then
+        pushd $data
+        wget -c https://deepspeech.bj.bcebos.com/zh_lm/zh_giga.no_cna_cmn.prune01244.klm
+        popd
+    fi
 fi

 # 3. make feature
+text=$data/test/text
 label_file=./aishell_result
 wer=./aishell_wer

 export GLOG_logtostderr=1

-# 3. gen linear feat
-cmvn=$PWD/cmvn.ark
-cmvn-json2kaldi --json_file=$ckpt_dir/data/mean_std.json --cmvn_write_path=$cmvn

+if [ $stage -le 1 ] && [ $stop_stage -ge 1 ]; then
+    # 3. gen linear feat
+    cmvn=$data/cmvn.ark
+    cmvn-json2kaldi --json_file=$ckpt_dir/data/mean_std.json --cmvn_write_path=$cmvn

-./local/split_data.sh $data $data/$aishell_wav_scp $aishell_wav_scp $nj
+    ./local/split_data.sh $data $data/$aishell_wav_scp $aishell_wav_scp $nj

-utils/run.pl JOB=1:$nj $data/split${nj}/JOB/feat.log \
-linear-spectrogram-wo-db-norm-ol \
-    --wav_rspecifier=scp:$data/split${nj}/JOB/${aishell_wav_scp} \
-    --feature_wspecifier=ark,scp:$data/split${nj}/JOB/feat.ark,$data/split${nj}/JOB/feat.scp \
-    --cmvn_file=$cmvn \
-    --streaming_chunk=0.36
-
-text=$data/test/text
+    utils/run.pl JOB=1:$nj $data/split${nj}/JOB/feat.log \
+    linear-spectrogram-wo-db-norm-ol \
+        --wav_rspecifier=scp:$data/split${nj}/JOB/${aishell_wav_scp} \
+        --feature_wspecifier=ark,scp:$data/split${nj}/JOB/feat.ark,$data/split${nj}/JOB/feat.scp \
+        --cmvn_file=$cmvn \
+        --streaming_chunk=0.36
+fi

-# 4. recognizer
-utils/run.pl JOB=1:$nj $data/split${nj}/JOB/recog.wolm.log \
-  ctc-prefix-beam-search-decoder-ol \
-    --feature_rspecifier=scp:$data/split${nj}/JOB/feat.scp \
-    --model_path=$model_dir/avg_1.jit.pdmodel \
-    --param_path=$model_dir/avg_1.jit.pdiparams \
-    --model_output_names=softmax_0.tmp_0,tmp_5,concat_0.tmp_0,concat_1.tmp_0 \
-    --dict_file=$vocb_dir/vocab.txt \
-    --result_wspecifier=ark,t:$data/split${nj}/JOB/result
-
-cat $data/split${nj}/*/result > ${label_file}
-utils/compute-wer.py --char=1 --v=1 ${label_file} $text > ${wer}
-
-# 4. decode with lm
-utils/run.pl JOB=1:$nj $data/split${nj}/JOB/recog.lm.log \
-  ctc-prefix-beam-search-decoder-ol \
-    --feature_rspecifier=scp:$data/split${nj}/JOB/feat.scp \
-    --model_path=$model_dir/avg_1.jit.pdmodel \
-    --param_path=$model_dir/avg_1.jit.pdiparams \
-    --model_output_names=softmax_0.tmp_0,tmp_5,concat_0.tmp_0,concat_1.tmp_0 \
-    --dict_file=$vocb_dir/vocab.txt \
-    --lm_path=$lm \
-    --result_wspecifier=ark,t:$data/split${nj}/JOB/result_lm
-
-
-cat $data/split${nj}/*/result_lm > ${label_file}_lm
-utils/compute-wer.py --char=1 --v=1 ${label_file}_lm $text > ${wer}_lm
-
-
-graph_dir=./aishell_graph
-if [ ! -d $ ]; then
-    wget -c https://paddlespeech.bj.bcebos.com/s2t/paddle_asr_online/aishell_graph.zip
-    unzip -d aishell_graph.zip
+if [ $stage -le 2 ] && [ $stop_stage -ge 2 ];then
+    #  recognizer
+    utils/run.pl JOB=1:$nj $data/split${nj}/JOB/recog.wolm.log \
+    ctc-prefix-beam-search-decoder-ol \
+        --feature_rspecifier=scp:$data/split${nj}/JOB/feat.scp \
+        --model_path=$model_dir/avg_1.jit.pdmodel \
+        --param_path=$model_dir/avg_1.jit.pdiparams \
+        --model_output_names=softmax_0.tmp_0,tmp_5,concat_0.tmp_0,concat_1.tmp_0 \
+        --dict_file=$vocb_dir/vocab.txt \
+        --result_wspecifier=ark,t:$data/split${nj}/JOB/result
+
+    cat $data/split${nj}/*/result > $exp/${label_file}
+    utils/compute-wer.py --char=1 --v=1 $exp/${label_file} $text > $exp/${wer}
 fi

+if [ $stage -le 3 ] && [ $stop_stage -ge 3 ];then
+    #  decode with lm
+    utils/run.pl JOB=1:$nj $data/split${nj}/JOB/recog.lm.log \
+    ctc-prefix-beam-search-decoder-ol \
+        --feature_rspecifier=scp:$data/split${nj}/JOB/feat.scp \
+        --model_path=$model_dir/avg_1.jit.pdmodel \
+        --param_path=$model_dir/avg_1.jit.pdiparams \
+        --model_output_names=softmax_0.tmp_0,tmp_5,concat_0.tmp_0,concat_1.tmp_0 \
+        --dict_file=$vocb_dir/vocab.txt \
+        --lm_path=$lm \
+        --result_wspecifier=ark,t:$data/split${nj}/JOB/result_lm
+ 
+    cat $data/split${nj}/*/result_lm > $exp/${label_file}_lm
+    utils/compute-wer.py --char=1 --v=1 $exp/${label_file}_lm $text > $exp/${wer}_lm
+fi

-# 5. test TLG decoder
-utils/run.pl JOB=1:$nj $data/split${nj}/JOB/recog.wfst.log \
-  wfst-decoder-ol \
-    --feature_rspecifier=scp:$data/split${nj}/JOB/feat.scp \
-    --model_path=$model_dir/avg_1.jit.pdmodel \
-    --param_path=$model_dir/avg_1.jit.pdiparams \
-    --word_symbol_table=$graph_dir/words.txt \
-    --model_output_names=softmax_0.tmp_0,tmp_5,concat_0.tmp_0,concat_1.tmp_0 \
-     --graph_path=$graph_dir/TLG.fst --max_active=7500 \
-    --acoustic_scale=1.2 \
-    --result_wspecifier=ark,t:$data/split${nj}/JOB/result_tlg

+wfst=$data/wfst/
+mkdir -p $wfst
+if [ ! -f $wfst/aishell_graph.zip ]; then
+    pushd $wfst
+    wget -c https://paddlespeech.bj.bcebos.com/s2t/paddle_asr_online/aishell_graph.zip
+    unzip aishell_graph.zip
+    popd
+fi

-cat $data/split${nj}/*/result_tlg > ${label_file}_tlg
-utils/compute-wer.py --char=1 --v=1 ${label_file}_tlg $text > ${wer}_tlg
+graph_dir=$wfst/aishell_graph
+if [ $stage -le 4 ] && [ $stop_stage -ge 4 ]; then
+    #  TLG decoder
+    utils/run.pl JOB=1:$nj $data/split${nj}/JOB/recog.wfst.log \
+    wfst-decoder-ol \
+        --feature_rspecifier=scp:$data/split${nj}/JOB/feat.scp \
+        --model_path=$model_dir/avg_1.jit.pdmodel \
+        --param_path=$model_dir/avg_1.jit.pdiparams \
+        --word_symbol_table=$graph_dir/words.txt \
+        --model_output_names=softmax_0.tmp_0,tmp_5,concat_0.tmp_0,concat_1.tmp_0 \
+        --graph_path=$graph_dir/TLG.fst --max_active=7500 \
+        --acoustic_scale=1.2 \
+        --result_wspecifier=ark,t:$data/split${nj}/JOB/result_tlg
+
+    cat $data/split${nj}/*/result_tlg > $exp/${label_file}_tlg
+    utils/compute-wer.py --char=1 --v=1 $exp/${label_file}_tlg $text > $exp/${wer}_tlg
+fi
--- a/speechx/examples/ngram/local/aishell_train_lms.sh
+++ b/speechx/examples/ngram/local/aishell_train_lms.sh
@ -0,0 +1,57 @@
+#!/bin/bash
+
+# To be run from one directory above this script.
+. ./path.sh
+
+text=data/local/lm/text
+lexicon=data/local/dict/lexicon.txt
+
+for f in "$text" "$lexicon"; do
+  [ ! -f $x ] && echo "$0: No such file $f" && exit 1;
+done
+
+# Check SRILM tools
+if ! which ngram-count > /dev/null; then
+    echo "srilm tools are not found, please download it and install it from: "
+    echo "http://www.speech.sri.com/projects/srilm/download.html"
+    echo "Then add the tools to your PATH"
+    exit 1
+fi
+
+# This script takes no arguments.  It assumes you have already run
+# aishell_data_prep.sh.
+# It takes as input the files
+# data/local/lm/text
+# data/local/dict/lexicon.txt
+dir=data/local/lm
+mkdir -p $dir
+
+cleantext=$dir/text.no_oov
+
+cat $text | awk -v lex=$lexicon 'BEGIN{while((getline<lex) >0){ seen[$1]=1; } }
+  {for(n=1; n<=NF;n++) {  if (seen[$n]) { printf("%s ", $n); } else {printf("<SPOKEN_NOISE> ");} } printf("\n");}' \
+  > $cleantext || exit 1;
+
+cat $cleantext | awk '{for(n=2;n<=NF;n++) print $n; }' | sort | uniq -c | \
+   sort -nr > $dir/word.counts || exit 1;
+
+# Get counts from acoustic training transcripts, and add  one-count
+# for each word in the lexicon (but not silence, we don't want it
+# in the LM-- we'll add it optionally later).
+cat $cleantext | awk '{for(n=2;n<=NF;n++) print $n; }' | \
+  cat - <(grep -w -v '!SIL' $lexicon | awk '{print $1}') | \
+   sort | uniq -c | sort -nr > $dir/unigram.counts || exit 1;
+
+cat $dir/unigram.counts | awk '{print $2}' | cat - <(echo "<s>"; echo "</s>" ) > $dir/wordlist
+
+heldout_sent=10000 # Don't change this if you want result to be comparable with
+    # kaldi_lm results
+mkdir -p $dir
+cat $cleantext | awk '{for(n=2;n<=NF;n++){ printf $n; if(n<NF) printf " "; else print ""; }}' | \
+  head -$heldout_sent > $dir/heldout
+cat $cleantext | awk '{for(n=2;n<=NF;n++){ printf $n; if(n<NF) printf " "; else print ""; }}' | \
+  tail -n +$heldout_sent > $dir/train
+
+ngram-count -text $dir/train -order 3 -limit-vocab -vocab $dir/wordlist -unk \
+  -map-unk "<UNK>" -kndiscount -interpolate -lm $dir/lm.arpa
+ngram -lm $dir/lm.arpa -ppl $dir/heldout
--- a/speechx/examples/ngram/path.sh
+++ b/speechx/examples/ngram/path.sh
@ -0,0 +1,20 @@
+# This contains the locations of binarys build required for running the examples.
+
+SPEECHX_ROOT=$PWD/../../../
+MAIN_ROOT=$SPEECHX_ROOT/../
+SPEECHX_EXAMPLES=$SPEECHX_ROOT/build/examples
+
+SPEECHX_TOOLS=$SPEECHX_ROOT/tools
+TOOLS_BIN=$SPEECHX_TOOLS/valgrind/install/bin
+
+[ -d $SPEECHX_EXAMPLES ] || { echo "Error: 'build/examples' directory not found. please ensure that the project build successfully"; }
+
+export LC_AL=C
+
+export PATH=$PATH:$TOOLS_BIN
+
+# srilm
+export LIBLBFGS=${MAIN_ROOT}/tools/liblbfgs-1.10
+export LD_LIBRARY_PATH=${LD_LIBRARY_PATH:-}:${LIBLBFGS}/lib/.libs
+export SRILM=${MAIN_ROOT}/tools/srilm
+export PATH=${PATH}:${SRILM}/bin:${SRILM}/bin/i686-m64
--- a/speechx/examples/ngram/run.sh
+++ b/speechx/examples/ngram/run.sh
@ -0,0 +1,61 @@
+#!/bin/bash
+set -eo pipefail
+
+. path.sh
+
+stage=-1
+stop_stage=100
+corpus=aishell
+
+unit=data/vocab.txt     # vocab
+lexicon=  # aishell/resource_aishell/lexicon.txt
+text=     # aishell/data_aishell/transcript/aishell_transcript_v0.8.txt
+
+. parse_options.sh
+
+data=$PWD/data
+mkdir -p $data
+
+if [ ! -f $unit ]; then
+    echo "$0: No such file $unit"
+    exit 1;
+fi
+
+if [ ! which ngram-count ]; then
+    pushd $MAIN_ROOT/tools
+    make srilm.done
+    popd
+fi
+
+if [ ! which fstaddselfloops ]; then
+    pushd $MAIN_ROOT/tools
+    make kaldi.done
+    popd
+fi
+
+mkdir -p data/local/dict
+if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
+    # 7.1 Prepare dict
+    cp $unit data/local/dict/units.txt
+    utils/fst/prepare_dict.py \
+        --unit_file $unit \
+        --in_lexicon ${lexicon} \
+        --out_lexicon data/local/dict/lexicon.txt
+fi
+
+lm=data/local/lm
+mkdir -p data/train
+mkdir -p $lm
+if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
+    # 7.2 Train lm
+    utils/manifest_key_value.py \
+        --manifest_path data/manifest.train \
+        --output_path data/train
+    utils/filter_scp.pl data/train/text \
+        $text > $lm/text
+    
+    local/aishell_train_lms.sh
+fi
+
+echo "build LM done."
+exit 0
--- a/speechx/examples/ngram/utils
+++ b/speechx/examples/ngram/utils
@ -0,0 +1 @@
+../../../utils/
--- a/speechx/tools/install_srilm.sh
+++ b/speechx/tools/install_srilm.sh
@ -1,97 +0,0 @@
-#!/usr/bin/env bash
-
-current_path=`pwd`
-current_dir=`basename "$current_path"`
-
-if [ "tools" != "$current_dir" ]; then
-    echo "You should run this script in tools/ directory!!"
-    exit 1
-fi
-
-if [ ! -d liblbfgs-1.10 ]; then
-    echo Installing libLBFGS library to support MaxEnt LMs
-    bash extras/install_liblbfgs.sh || exit 1
-fi
-
-! command -v gawk > /dev/null && \
-   echo "GNU awk is not installed so SRILM will probably not work correctly: refusing to install" && exit 1;
-
-if [ $# -ne 3 ]; then
-    echo "SRILM download requires some information about you"
-    echo
-    echo "Usage: $0 <name> <organization> <email>"
-    exit 1
-fi
-
-srilm_url="http://www.speech.sri.com/projects/srilm/srilm_download.php"
-post_data="WWW_file=srilm-1.7.3.tar.gz&WWW_name=$1&WWW_org=$2&WWW_email=$3"
-
-if ! wget --post-data "$post_data" -O ./srilm.tar.gz "$srilm_url"; then
-    echo 'There was a problem downloading the file.'
-    echo 'Check you internet connection and try again.'
-    exit 1
-fi
-
-mkdir -p srilm
-cd srilm
-
-
-if [ -f ../srilm.tgz ]; then
-    tar -xvzf ../srilm.tgz # Old SRILM format
-elif [  -f ../srilm.tar.gz ]; then
-    tar -xvzf ../srilm.tar.gz # Changed format type from tgz to tar.gz
-fi
-
-major=`gawk -F. '{ print $1 }' RELEASE`
-minor=`gawk -F. '{ print $2 }' RELEASE`
-micro=`gawk -F. '{ print $3 }' RELEASE`
-
-if [ $major -le 1 ] && [ $minor -le 7 ] && [ $micro -le 1 ]; then
-  echo "Detected version 1.7.1 or earlier. Applying patch."
-  patch -p0 < ../extras/srilm.patch
-fi
-
-# set the SRILM variable in the top-level Makefile to this directory.
-cp Makefile tmpf
-
-cat tmpf | gawk -v pwd=`pwd` '/SRILM =/{printf("SRILM = %s\n", pwd); next;} {print;}' \
-  > Makefile || exit 1
-rm tmpf
-
-mtype=`sbin/machine-type`
-
-echo HAVE_LIBLBFGS=1 >> common/Makefile.machine.$mtype
-grep ADDITIONAL_INCLUDES common/Makefile.machine.$mtype | \
-    sed 's|$| -I$(SRILM)/../liblbfgs-1.10/include|' \
-    >> common/Makefile.machine.$mtype
-
-grep ADDITIONAL_LDFLAGS common/Makefile.machine.$mtype | \
-    sed 's|$| -L$(SRILM)/../liblbfgs-1.10/lib/ -Wl,-rpath -Wl,$(SRILM)/../liblbfgs-1.10/lib/|' \
-    >> common/Makefile.machine.$mtype
-
-make || exit
-
-cd ..
-(
-  [ ! -z "${SRILM}" ] && \
-    echo >&2 "SRILM variable is aleady defined. Undefining..." && \
-    unset SRILM
-
-  [ -f ./env.sh ] && . ./env.sh
-
-  [ ! -z "${SRILM}" ] && \
-    echo >&2 "SRILM config is already in env.sh" && exit
-
-  wd=`pwd`
-  wd=`readlink -f $wd || pwd`
-
-  echo "export SRILM=$wd/srilm"
-  dirs="\${PATH}"
-  for directory in $(cd srilm && find bin -type d ) ; do
-    dirs="$dirs:\${SRILM}/$directory"
-  done
-  echo "export PATH=$dirs"
-) >> env.sh
-
-echo >&2 "Installation of SRILM finished successfully"
-echo >&2 "Please source the tools/env.sh in your path.sh to enable it"
--- a/tools/Makefile
+++ b/tools/Makefile
@ -25,7 +25,7 @@ clean:

 apt.done:
 	apt update -y
-	apt install -y bc flac jq vim tig tree pkg-config libsndfile1 libflac-dev libogg-dev libvorbis-dev libboost-dev swig python3-dev 
+	apt install -y bc flac jq vim tig tree sox pkg-config libsndfile1 libflac-dev libogg-dev libvorbis-dev libboost-dev swig python3-dev 
 	echo "check_certificate = off" >> ~/.wgetrc
 	touch apt.done

@ -50,7 +50,7 @@ openblas.done:
 	bash extras/install_openblas.sh
 	touch openblas.done

-kaldi.done: openblas.done
+kaldi.done: apt.done openblas.done
 	bash extras/install_kaldi.sh
 	touch kaldi.done

@ -58,6 +58,11 @@ sctk.done:
 	./extras/install_sclite.sh
 	touch sctk.done

+srilm.done:
+	./extras/install_liblbfgs.sh
+	extras/install_srilm.sh
+	touch srilm.done
+
 ######################
 dev: python conda_packages.done sctk.done

@ -96,4 +101,4 @@ conda_packages.done: bc.done cmake.done flac.done ffmpeg.done sox.done sndfile.d
 else
 conda_packages.done:
 endif
-	touch conda_packages.done
+	touch conda_packages.done
--- a/tools/extras/install_openfst.sh
+++ b/tools/extras/install_openfst.sh
@ -7,8 +7,9 @@ set -x
 # openfst
 openfst=openfst-1.8.1
 shared=true
+WGET="wget -c --no-check-certificate"

-test -e ${openfst}.tar.gz || wget http://www.openfst.org/twiki/pub/FST/FstDownload/${openfst}.tar.gz
+test -e ${openfst}.tar.gz || $WGET http://www.openfst.org/twiki/pub/FST/FstDownload/${openfst}.tar.gz
 test -d ${openfst} || tar -xvf ${openfst}.tar.gz && chown -R root:root ${openfst}


--- a/utils/espnet_json_to_manifest.py
+++ b/utils/espnet_json_to_manifest.py
--- a/utils/generate_infer_yaml.py
+++ b/utils/generate_infer_yaml.py
--- a/utils/link_wav.py
+++ b/utils/link_wav.py
--- a/utils/manifest_key_value.py
+++ b/utils/manifest_key_value.py
@ -26,23 +26,38 @@ def main(args):
    with wav_scp.open('w') as fwav, dur_scp.open('w') as fdur, text_scp.open(
            'w') as ftxt:
        for line_json in manifest_jsons:
+            # utt:str
+            # utt2spk:str
+            # input: [{name:str, shape:[dur_in_sec, feat_dim], feat:str, filetype:str}, ]
+            # output: [{name:str, shape:[tokenlen, vocab_dim], text:str, token:str, tokenid:str}, ] 
            utt = line_json['utt']
-            feat = line_json['feat']
+            utt2spk = line_json['utt2spk']
+
+            # input
+            assert(len(line_json['input']) == 1), "only support one input now"
+            input_json = line_json['input'][0]
+            feat = input_json['feat']
+            feat_shape = input_json['shape']
+            file_type = input_json['filetype']
+
            file_ext = Path(feat).suffix  # .wav
-            text = line_json['text']
-            feat_shape = line_json['feat_shape']
            dur = feat_shape[0]
            feat_dim = feat_shape[1]
-            if 'token' in line_json:
-                tokens = line_json['token']
-                tokenids = line_json['token_id']
-                token_shape = line_json['token_shape']
-                token_len = token_shape[0]
-                vocab_dim = token_shape[1]

            if file_ext == '.wav':
                fwav.write(f"{utt} {feat}\n")
            fdur.write(f"{utt} {dur}\n")
+
+            # output
+            assert(len(line_json['output']) == 1), "only support one output now"
+            output_json = line_json['output'][0]
+            text = output_json['text']
+            if 'token' in output_json:
+                tokens = output_json['token']
+                tokenids = output_json['tokenid']
+                token_shape = output_json['shape']
+                token_len = token_shape[0]
+                vocab_dim = token_shape[1]
            ftxt.write(f"{utt} {text}\n")

            count += 1