From 40dcbbde262e53679e1dda5fb3633cf4f24d975b Mon Sep 17 00:00:00 2001 From: Hui Zhang Date: Mon, 26 Jul 2021 12:08:11 +0000 Subject: [PATCH] add srilm install --- examples/aishell/s1/path.sh | 7 +- tools/{install => extras}/README.md | 0 tools/{install => extras}/install_gcc.sh | 0 tools/{install => extras}/install_kaldi.sh | 0 tools/extras/install_kenlm.sh | 9 ++ tools/extras/install_liblbfgs.sh | 40 ++++++++ tools/{install => extras}/install_mfa.sh | 0 .../{install => extras}/install_miniconda.sh | 0 tools/{install => extras}/install_mkl.sh | 0 tools/{install => extras}/install_ngram.sh | 0 tools/{install => extras}/install_openblas.sh | 0 tools/{install => extras}/install_openfst.sh | 0 tools/{install => extras}/install_pynini.sh | 0 tools/extras/install_srilm.sh | 91 +++++++++++++++++++ tools/extras/srilm.patch | 17 ++++ 15 files changed, 161 insertions(+), 3 deletions(-) rename tools/{install => extras}/README.md (100%) rename tools/{install => extras}/install_gcc.sh (100%) rename tools/{install => extras}/install_kaldi.sh (100%) create mode 100755 tools/extras/install_kenlm.sh create mode 100755 tools/extras/install_liblbfgs.sh rename tools/{install => extras}/install_mfa.sh (100%) rename tools/{install => extras}/install_miniconda.sh (100%) rename tools/{install => extras}/install_mkl.sh (100%) rename tools/{install => extras}/install_ngram.sh (100%) rename tools/{install => extras}/install_openblas.sh (100%) rename tools/{install => extras}/install_openfst.sh (100%) rename tools/{install => extras}/install_pynini.sh (100%) create mode 100755 tools/extras/install_srilm.sh create mode 100644 tools/extras/srilm.patch diff --git a/examples/aishell/s1/path.sh b/examples/aishell/s1/path.sh index 512b3c9f..4776c757 100644 --- a/examples/aishell/s1/path.sh +++ b/examples/aishell/s1/path.sh @@ -14,13 +14,14 @@ MODEL=u2 export BIN_DIR=${MAIN_ROOT}/deepspeech/exps/${MODEL}/bin -export LIBLBFGS=/workspace/zhanghui/asr/wenet-210713/tools/liblbfgs-1.10 +# srilm +export LIBLBFGS=${MAIN_ROOT}/tools/liblbfgs-1.10 export LD_LIBRARY_PATH=${LD_LIBRARY_PATH:-}:${LIBLBFGS}/lib/.libs -export SRILM=/workspace/zhanghui/asr/wenet-210713/tools/srilm +export SRILM=${MAIN_ROOT}/tools/srilm export PATH=${PATH}:${SRILM}/bin:${SRILM}/bin/i686-m64 # Kaldi -export KALDI_ROOT=/workspace/zhanghui/asr/kaldi +export KALDI_ROOT=${MAIN_ROOT}/tools/kaldi [ -f $KALDI_ROOT/tools/env.sh ] && . $KALDI_ROOT/tools/env.sh export PATH=$PWD/utils/:$KALDI_ROOT/tools/openfst/bin:$PWD:$PATH [ ! -f $KALDI_ROOT/tools/config/common_path.sh ] && echo >&2 "The standard file $KALDI_ROOT/tools/config/common_path.sh is not present -> Exit!" && exit 1 diff --git a/tools/install/README.md b/tools/extras/README.md similarity index 100% rename from tools/install/README.md rename to tools/extras/README.md diff --git a/tools/install/install_gcc.sh b/tools/extras/install_gcc.sh similarity index 100% rename from tools/install/install_gcc.sh rename to tools/extras/install_gcc.sh diff --git a/tools/install/install_kaldi.sh b/tools/extras/install_kaldi.sh similarity index 100% rename from tools/install/install_kaldi.sh rename to tools/extras/install_kaldi.sh diff --git a/tools/extras/install_kenlm.sh b/tools/extras/install_kenlm.sh new file mode 100755 index 00000000..100225bf --- /dev/null +++ b/tools/extras/install_kenlm.sh @@ -0,0 +1,9 @@ +#!/bin/bash + +apt install -y build-essential cmake libboost-system-dev libboost-thread-dev libboost-program-options-dev libboost-test-dev libeigen3-dev zlib1g-dev libbz2-dev liblzma-dev + +apt-get install -y gcc-5 g++-5 && update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-5 50 && update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-5 50 + +test -d kenlm || wget -O - https://kheafield.com/code/kenlm.tar.gz | tar xz + +rm -rf kenlm/build && mkdir -p kenlm/build && cd kenlm/build && cmake .. && make -j4 && make install diff --git a/tools/extras/install_liblbfgs.sh b/tools/extras/install_liblbfgs.sh new file mode 100755 index 00000000..8d6ae4ab --- /dev/null +++ b/tools/extras/install_liblbfgs.sh @@ -0,0 +1,40 @@ +#!/usr/bin/env bash + +VER=1.10 + +WGET=${WGET:-wget} + +if [ ! -f liblbfgs-$VER.tar.gz ]; then + if [ -d "$DOWNLOAD_DIR" ]; then + cp -p "$DOWNLOAD_DIR/liblbfgs-$VER.tar.gz" . || exit 1 + else + $WGET https://github.com/downloads/chokkan/liblbfgs/liblbfgs-$VER.tar.gz || exit 1 + fi +fi + +tar -xzf liblbfgs-$VER.tar.gz +cd liblbfgs-$VER +./configure --prefix=`pwd` +make +# due to the liblbfgs project directory structure, we have to use -i +# but the erros are completely harmless +make -i install +cd .. + +( + [ ! -z "${LIBLBFGS}" ] && \ + echo >&2 "LIBLBFGS variable is aleady defined. Undefining..." && \ + unset LIBLBFGS + + [ -f ./env.sh ] && . ./env.sh + + [ ! -z "${LIBLBFGS}" ] && \ + echo >&2 "libLBFGS config is already in env.sh" && exit + + wd=`pwd` + wd=`readlink -f $wd || pwd` + + echo "export LIBLBFGS=$wd/liblbfgs-1.10" + echo export LD_LIBRARY_PATH='${LD_LIBRARY_PATH:-}':'${LIBLBFGS}'/lib/.libs +) >> env.sh + diff --git a/tools/install/install_mfa.sh b/tools/extras/install_mfa.sh similarity index 100% rename from tools/install/install_mfa.sh rename to tools/extras/install_mfa.sh diff --git a/tools/install/install_miniconda.sh b/tools/extras/install_miniconda.sh similarity index 100% rename from tools/install/install_miniconda.sh rename to tools/extras/install_miniconda.sh diff --git a/tools/install/install_mkl.sh b/tools/extras/install_mkl.sh similarity index 100% rename from tools/install/install_mkl.sh rename to tools/extras/install_mkl.sh diff --git a/tools/install/install_ngram.sh b/tools/extras/install_ngram.sh similarity index 100% rename from tools/install/install_ngram.sh rename to tools/extras/install_ngram.sh diff --git a/tools/install/install_openblas.sh b/tools/extras/install_openblas.sh similarity index 100% rename from tools/install/install_openblas.sh rename to tools/extras/install_openblas.sh diff --git a/tools/install/install_openfst.sh b/tools/extras/install_openfst.sh similarity index 100% rename from tools/install/install_openfst.sh rename to tools/extras/install_openfst.sh diff --git a/tools/install/install_pynini.sh b/tools/extras/install_pynini.sh similarity index 100% rename from tools/install/install_pynini.sh rename to tools/extras/install_pynini.sh diff --git a/tools/extras/install_srilm.sh b/tools/extras/install_srilm.sh new file mode 100755 index 00000000..f359e70c --- /dev/null +++ b/tools/extras/install_srilm.sh @@ -0,0 +1,91 @@ +#!/usr/bin/env bash + +current_path=`pwd` +current_dir=`basename "$current_path"` + +if [ "tools" != "$current_dir" ]; then + echo "You should run this script in tools/ directory!!" + exit 1 +fi + +if [ ! -d liblbfgs-1.10 ]; then + echo Installing libLBFGS library to support MaxEnt LMs + bash extras/install_liblbfgs.sh || exit 1 +fi + +# http://www.speech.sri.com/projects/srilm/download.html +if [ ! -f srilm.tgz ] && [ ! -f srilm.tar.gz ]; then # Changed format type from tgz to tar.gz as the srilm v1.7.3 downloads as tar.gz + echo This script cannot install SRILM in a completely automatic + echo way because you need to put your address in a download form. + echo Please download SRILM from http://www.speech.sri.com/projects/srilm/download.html + echo put it in ./srilm.tar.gz , then run this script. + echo Note: You may have to rename the downloaded file to remove version name from filename eg: mv srilm-1.7.3.tar.gz srilm.tar.gz + exit 1 +fi + +! which gawk 2>/dev/null && \ + echo "GNU awk is not installed so SRILM will probably not work correctly: refusing to install" && exit 1; + +mkdir -p srilm +cd srilm + + +if [ -f ../srilm.tgz ]; then + tar -xvzf ../srilm.tgz # Old SRILM format +elif [ -f ../srilm.tar.gz ]; then + tar -xvzf ../srilm.tar.gz # Changed format type from tgz to tar.gz +fi + +major=`awk -F. '{ print $1 }' RELEASE` +minor=`awk -F. '{ print $2 }' RELEASE` +micro=`awk -F. '{ print $3 }' RELEASE` + +if [ $major -le 1 ] && [ $minor -le 7 ] && [ $micro -le 1 ]; then + echo "Detected version 1.7.1 or earlier. Applying patch." + patch -p0 < ../extras/srilm.patch +fi + +# set the SRILM variable in the top-level Makefile to this directory. +cp Makefile tmpf + +cat tmpf | awk -v pwd=`pwd` '/SRILM =/{printf("SRILM = %s\n", pwd); next;} {print;}' \ + > Makefile || exit 1 +rm tmpf + +mtype=`sbin/machine-type` + +echo HAVE_LIBLBFGS=1 >> common/Makefile.machine.$mtype +grep ADDITIONAL_INCLUDES common/Makefile.machine.$mtype | \ + sed 's|$| -I$(SRILM)/../liblbfgs-1.10/include|' \ + >> common/Makefile.machine.$mtype + +grep ADDITIONAL_LDFLAGS common/Makefile.machine.$mtype | \ + sed 's|$| -L$(SRILM)/../liblbfgs-1.10/lib/ -Wl,-rpath -Wl,$(SRILM)/../liblbfgs-1.10/lib/|' \ + >> common/Makefile.machine.$mtype + +make || exit + +cd .. +( + [ ! -z "${SRILM}" ] && \ + echo >&2 "SRILM variable is aleady defined. Undefining..." && \ + unset SRILM + + [ -f ./env.sh ] && . ./env.sh + + [ ! -z "${SRILM}" ] && \ + echo >&2 "SRILM config is already in env.sh" && exit + + wd=`pwd` + wd=`readlink -f $wd || pwd` + + echo "export SRILM=$wd/srilm" + dirs="\${PATH}" + for directory in $(cd srilm && find bin -type d ) ; do + dirs="$dirs:\${SRILM}/$directory" + done + echo "export PATH=$dirs" +) >> env.sh + +echo >&2 "Installation of SRILM finished successfully" +echo >&2 "Please source the tools/env.sh in your path.sh to enable it" diff --git a/tools/extras/srilm.patch b/tools/extras/srilm.patch new file mode 100644 index 00000000..c54ad21a --- /dev/null +++ b/tools/extras/srilm.patch @@ -0,0 +1,17 @@ +--- dstruct/src/Trie.orig 2016-11-08 19:53:40.524000000 +0000 ++++ dstruct/src/Trie.cc 2016-11-08 19:53:59.088000000 +0000 +@@ -200,11 +200,14 @@ + if (removedData == 0) { + Trie node; + if (sub.remove(keys[0], &node)) { ++#if !defined(__GNUC__) || !(__GNUC__ >= 4 && __GNUC_MINOR__ >= 9 || __GNUC__ > 4) + /* + * XXX: Call subtrie destructor explicitly since we're not + * passing the removed node to the caller. ++ * !!! Triggers bug with gcc >= 4.9 optimization !!! + */ + node.~Trie(); ++#endif + return true; + } else { + return false;