opt to compile asr,cls,vad; add vad; format code (#2968)

2 years ago · b35fc01a3a
parent 78e29c8ec4
commit b35fc01a3a
30 changed files with 3811 additions and 2531 deletions
--- a/runtime/.gitignore
+++ b/runtime/.gitignore
@ -1,3 +1,6 @@
+engine/common/base/flags.h
+engine/common/base/log.h
+
 tools/valgrind*
 *log
 fc_patch/*
--- a/runtime/CMakeLists.txt
+++ b/runtime/CMakeLists.txt
@ -20,8 +20,7 @@ project(paddlespeech VERSION 0.1)

 set(CMAKE_VERBOSE_MAKEFILE on)

-# set std-14
-set(CMAKE_CXX_STANDARD 14)
+

 include(FetchContent)
 include(ExternalProject)
@ -31,15 +30,28 @@ set(FETCHCONTENT_QUIET off)
 get_filename_component(fc_patch "fc_patch" REALPATH BASE_DIR "${CMAKE_SOURCE_DIR}")
 set(FETCHCONTENT_BASE_DIR ${fc_patch})

+set(CMAKE_CXX_FLAGS)
+set(CMAKE_CXX_FLAGS_DEBUG)
+set(CMAKE_CXX_FLAGS_RELEASE)
+
+# set std-14
+set(CMAKE_CXX_STANDARD 14)
+
 # compiler option
 # Keep the same with openfst, -fPIC or -fpic
 set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} --std=c++14 -pthread -fPIC -O0 -Wall -g -ldl")
 SET(CMAKE_CXX_FLAGS_DEBUG "$ENV{CXXFLAGS} --std=c++14 -pthread -fPIC -O0 -Wall -g -ggdb")
 SET(CMAKE_CXX_FLAGS_RELEASE "$ENV{CXXFLAGS} --std=c++14 -pthread -fPIC -O3 -Wall")

+
+add_compile_options(-fPIC)
 ###############################################################################
 # Option Configurations
 ###############################################################################
+option(WITH_ASR "build asr" ON)
+option(WITH_CLS "build cls" ON)
+option(WITH_VAD "build vad" ON)
+
 option(TEST_DEBUG "option for debug" OFF)
 option(USE_PROFILING "enable c++ profling" OFF)
 option(WITH_TESTING "unit test" ON)
@ -47,102 +59,117 @@ option(WITH_TESTING "unit test" ON)
 option(USING_GPU "u2 compute on GPU." OFF)

 ###############################################################################
-# Include third party
+# Include Third Party
 ###############################################################################
 include(gflags)

 include(glog)

-# openfst
-include(openfst)
-add_dependencies(openfst gflags glog)
-
-# paddle lib
-include(paddleinference)
-
 # gtest
 if(WITH_TESTING)
    include(gtest) # download, build, install gtest
 endif()

+
+# fastdeploy
+include(fastdeploy)
+
+if(WITH_ASR)
+    # openfst
+    include(openfst)
+    add_dependencies(openfst gflags glog)
+endif()
+
+###############################################################################
+# Find Package
+###############################################################################
+
 # python/pybind11/threads
 find_package(Threads REQUIRED)
 # https://cmake.org/cmake/help/latest/module/FindPython3.html#module:FindPython3
 find_package(Python3 COMPONENTS Interpreter Development)
 find_package(pybind11 CONFIG)

-if(Python3_FOUND)
-  message(STATUS "Python3_FOUND = ${Python3_FOUND}")
-  message(STATUS "Python3_EXECUTABLE = ${Python3_EXECUTABLE}")
-  message(STATUS "Python3_LIBRARIES = ${Python3_LIBRARIES}")
-  message(STATUS "Python3_INCLUDE_DIRS = ${Python3_INCLUDE_DIRS}")
-  message(STATUS "Python3_LINK_OPTIONS = ${Python3_LINK_OPTIONS}")
-  set(PYTHON_LIBRARIES ${Python3_LIBRARIES} CACHE STRING "python lib" FORCE)
-  set(PYTHON_INCLUDE_DIR ${Python3_INCLUDE_DIRS} CACHE STRING "python inc" FORCE)
-endif()
-
-message(STATUS "PYTHON_LIBRARIES = ${PYTHON_LIBRARIES}")
-message(STATUS "PYTHON_INCLUDE_DIR = ${PYTHON_INCLUDE_DIR}")

-if(pybind11_FOUND)
-  message(STATUS "pybind11_INCLUDES = ${pybind11_INCLUDE_DIRS}")
-  message(STATUS "pybind11_LIBRARIES=${pybind11_LIBRARIES}")
-  message(STATUS "pybind11_DEFINITIONS=${pybind11_DEFINITIONS}")
+if(WITH_ASR)
+    if(Python3_FOUND)
+    message(STATUS "Python3_FOUND = ${Python3_FOUND}")
+    message(STATUS "Python3_EXECUTABLE = ${Python3_EXECUTABLE}")
+    message(STATUS "Python3_LIBRARIES = ${Python3_LIBRARIES}")
+    message(STATUS "Python3_INCLUDE_DIRS = ${Python3_INCLUDE_DIRS}")
+    message(STATUS "Python3_LINK_OPTIONS = ${Python3_LINK_OPTIONS}")
+    set(PYTHON_LIBRARIES ${Python3_LIBRARIES} CACHE STRING "python lib" FORCE)
+    set(PYTHON_INCLUDE_DIR ${Python3_INCLUDE_DIRS} CACHE STRING "python inc" FORCE)
+    endif()
+
+    message(STATUS "PYTHON_LIBRARIES = ${PYTHON_LIBRARIES}")
+    message(STATUS "PYTHON_INCLUDE_DIR = ${PYTHON_INCLUDE_DIR}")
+
+    if(pybind11_FOUND)
+    message(STATUS "pybind11_INCLUDES = ${pybind11_INCLUDE_DIRS}")
+    message(STATUS "pybind11_LIBRARIES=${pybind11_LIBRARIES}")
+    message(STATUS "pybind11_DEFINITIONS=${pybind11_DEFINITIONS}")
+    endif()
+
+
+    # paddle libpaddle.so
+    # paddle include and link option
+    # -L/workspace/DeepSpeech-2.x/engine/venv/lib/python3.7/site-packages/paddle/libs -L/workspace/DeepSpeech-2.x/speechx/venv/lib/python3.7/site-packages/paddle/fluid -l:libpaddle.so -l:libdnnl.so.2 -l:libiomp5.so
+    execute_process(
+        COMMAND python -c "\
+    import os;\
+    import paddle;\
+    include_dir=paddle.sysconfig.get_include();\
+    paddle_dir=os.path.split(include_dir)[0];\
+    libs_dir=os.path.join(paddle_dir, 'libs');\
+    fluid_dir=os.path.join(paddle_dir, 'fluid');\
+    out=' '.join([\"-L\" + libs_dir, \"-L\" + fluid_dir]);\
+    out += \" -l:libpaddle.so -l:libdnnl.so.2 -l:libiomp5.so\"; print(out);\
+        "
+        OUTPUT_VARIABLE PADDLE_LINK_FLAGS
+        RESULT_VARIABLE SUCESS)
+
+    message(STATUS PADDLE_LINK_FLAGS= ${PADDLE_LINK_FLAGS})
+    string(STRIP ${PADDLE_LINK_FLAGS} PADDLE_LINK_FLAGS)
+
+    # paddle compile option
+    # -I/workspace/DeepSpeech-2.x/engine/venv/lib/python3.7/site-packages/paddle/include
+    execute_process(
+        COMMAND python -c "\
+    import paddle; \
+    include_dir = paddle.sysconfig.get_include(); \
+    print(f\"-I{include_dir}\"); \
+        "
+        OUTPUT_VARIABLE PADDLE_COMPILE_FLAGS)
+    message(STATUS PADDLE_COMPILE_FLAGS= ${PADDLE_COMPILE_FLAGS})
+    string(STRIP ${PADDLE_COMPILE_FLAGS} PADDLE_COMPILE_FLAGS)
+
+
+    # for LD_LIBRARY_PATH
+    # set(PADDLE_LIB_DIRS /workspace/DeepSpeech-2.x/tools/venv/lib/python3.7/site-packages/paddle/fluid:/workspace/DeepSpeech-2.x/tools/venv/lib/python3.7/site-packages/paddle/libs/)
+    execute_process(
+        COMMAND python -c "\
+    import os; \
+    import paddle; \
+    include_dir=paddle.sysconfig.get_include(); \
+    paddle_dir=os.path.split(include_dir)[0]; \
+    libs_dir=os.path.join(paddle_dir, 'libs'); \
+    fluid_dir=os.path.join(paddle_dir, 'fluid'); \
+    out=':'.join([libs_dir, fluid_dir]); print(out); \
+        "
+        OUTPUT_VARIABLE PADDLE_LIB_DIRS)
+    message(STATUS PADDLE_LIB_DIRS= ${PADDLE_LIB_DIRS})
 endif()


-# paddle libpaddle.so
-# paddle include and link option
-# -L/workspace/DeepSpeech-2.x/engine/venv/lib/python3.7/site-packages/paddle/libs -L/workspace/DeepSpeech-2.x/speechx/venv/lib/python3.7/site-packages/paddle/fluid -l:libpaddle.so -l:libdnnl.so.2 -l:libiomp5.so
-execute_process(
-    COMMAND python -c "\
-import os;\
-import paddle;\
-include_dir=paddle.sysconfig.get_include();\
-paddle_dir=os.path.split(include_dir)[0];\
-libs_dir=os.path.join(paddle_dir, 'libs');\
-fluid_dir=os.path.join(paddle_dir, 'fluid');\
-out=' '.join([\"-L\" + libs_dir, \"-L\" + fluid_dir]);\
-out += \" -l:libpaddle.so -l:libdnnl.so.2 -l:libiomp5.so\"; print(out);\
-    "
-    OUTPUT_VARIABLE PADDLE_LINK_FLAGS
-    RESULT_VARIABLE SUCESS)
-
-message(STATUS PADDLE_LINK_FLAGS= ${PADDLE_LINK_FLAGS})
-string(STRIP ${PADDLE_LINK_FLAGS} PADDLE_LINK_FLAGS)
-
-# paddle compile option
-# -I/workspace/DeepSpeech-2.x/engine/venv/lib/python3.7/site-packages/paddle/include
-execute_process(
-    COMMAND python -c "\
-import paddle; \
-include_dir = paddle.sysconfig.get_include(); \
-print(f\"-I{include_dir}\"); \
-    "
-    OUTPUT_VARIABLE PADDLE_COMPILE_FLAGS)
-message(STATUS PADDLE_COMPILE_FLAGS= ${PADDLE_COMPILE_FLAGS})
-string(STRIP ${PADDLE_COMPILE_FLAGS} PADDLE_COMPILE_FLAGS)
-
-
-# for LD_LIBRARY_PATH
-# set(PADDLE_LIB_DIRS /workspace/DeepSpeech-2.x/tools/venv/lib/python3.7/site-packages/paddle/fluid:/workspace/DeepSpeech-2.x/tools/venv/lib/python3.7/site-packages/paddle/libs/)
-execute_process(
-    COMMAND python -c "\
-import os; \
-import paddle; \
-include_dir=paddle.sysconfig.get_include(); \
-paddle_dir=os.path.split(include_dir)[0]; \
-libs_dir=os.path.join(paddle_dir, 'libs'); \
-fluid_dir=os.path.join(paddle_dir, 'fluid'); \
-out=':'.join([libs_dir, fluid_dir]); print(out); \
-    "
-    OUTPUT_VARIABLE PADDLE_LIB_DIRS)
-message(STATUS PADDLE_LIB_DIRS= ${PADDLE_LIB_DIRS})
-
-add_compile_options(-fPIC)
 ###############################################################################
 # Add local library
 ###############################################################################
 set(ENGINE_ROOT ${CMAKE_CURRENT_SOURCE_DIR}/engine)

+message(STATUS "CMAKE_CXX_FLAGS=${CMAKE_CXX_FLAGS}")
+message(STATUS "CMAKE_CXX_FLAGS_DEBUG=${CMAKE_CXX_FLAGS_DEBUG}")
+message(STATUS "CMAKE_CXX_FLAGS_RELEASE=${CMAKE_CXX_FLAGS_RELEASE}")
+
+
 add_subdirectory(engine)
--- a/runtime/build.sh
+++ b/runtime/build.sh
@ -4,5 +4,5 @@ set -xe
 # the build script had verified in the paddlepaddle docker image.
 # please follow the instruction below to install PaddlePaddle image.
 # https://www.paddlepaddle.org.cn/documentation/docs/zh/install/docker/linux-docker.html 
-cmake -B build
+cmake -B build -DWITH_ASR=OFF -DWITH_CLS=OFF
 cmake --build build -j
--- a/runtime/cmake/fastdeploy.cmake
+++ b/runtime/cmake/fastdeploy.cmake
@ -8,11 +8,11 @@ windows_x86")
 set(CMAKE_VERBOSE_MAKEFILE ON)

 set(FASTDEPLOY_DIR ${CMAKE_SOURCE_DIR}/fc_patch/fastdeploy)
-if(NOT EXISTS ${FASTDEPLOY_DIR}/fastdeploy-linux-x64-1.0.2.tgz)
+if(NOT EXISTS ${FASTDEPLOY_DIR}/fastdeploy-linux-x64-1.0.4.tgz)
    exec_program("mkdir -p ${FASTDEPLOY_DIR} &&
-    wget https://bj.bcebos.com/fastdeploy/release/cpp/fastdeploy-linux-x64-1.0.2.tgz -P ${FASTDEPLOY_DIR} &&
-    tar xzvf ${FASTDEPLOY_DIR}/fastdeploy-linux-x64-1.0.2.tgz -C ${FASTDEPLOY_DIR} &&
-    mv ${FASTDEPLOY_DIR}/fastdeploy-linux-x64-1.0.2 ${FASTDEPLOY_DIR}/linux-x64")
+    wget https://bj.bcebos.com/fastdeploy/release/cpp/fastdeploy-linux-x64-1.0.4.tgz -P ${FASTDEPLOY_DIR} &&
+    tar xzvf ${FASTDEPLOY_DIR}/fastdeploy-linux-x64-1.0.4.tgz -C ${FASTDEPLOY_DIR} &&
+    mv ${FASTDEPLOY_DIR}/fastdeploy-linux-x64-1.0.4 ${FASTDEPLOY_DIR}/linux-x64")
 endif()

 if(NOT EXISTS ${FASTDEPLOY_DIR}/fastdeploy-android-1.0.0-shared.tgz)
@ -36,4 +36,9 @@ elseif (ARCH STREQUAL "android_armv7")
 endif()

 include(${FASTDEPLOY_INSTALL_DIR}/FastDeploy.cmake)
-include_directories(${FASTDEPLOY_INCS})
+
+# fix compiler flags conflict, since fastdeploy using c++11 for project
+set(CMAKE_CXX_STANDARD 14)
+
+include_directories(${FASTDEPLOY_INCS})
+message(STATUS "FASTDEPLOY_INCS=${FASTDEPLOY_INCS}")
--- a/runtime/engine/CMakeLists.txt
+++ b/runtime/engine/CMakeLists.txt
@ -6,8 +6,19 @@ include_directories(${CMAKE_CURRENT_SOURCE_DIR})
 include_directories(${CMAKE_CURRENT_SOURCE_DIR}/kaldi)
 include_directories(${CMAKE_CURRENT_SOURCE_DIR}/common)

-add_subdirectory(asr)
-add_subdirectory(common)
 add_subdirectory(kaldi)
-add_subdirectory(codelab)
-add_subdirectory(cls)
+add_subdirectory(common)
+
+if(WITH_ASR)
+    add_subdirectory(asr)
+endif()
+
+if(WITH_CLS)
+    add_subdirectory(cls)
+endif()
+
+if(WITH_VAD)
+    add_subdirectory(vad)
+endif()
+
+add_subdirectory(codelab)
--- a/runtime/engine/asr/recognizer/u2_recognizer.cc
+++ b/runtime/engine/asr/recognizer/u2_recognizer.cc
@ -38,7 +38,8 @@ U2Recognizer::U2Recognizer(const U2RecognizerResource& resource)
        decoder_ = std::make_unique<CTCPrefixBeamSearch>(
            resource.vocab_path, resource.decoder_opts.ctc_prefix_search_opts);
    } else {
-        decoder_ = std::make_unique<TLGDecoder>(resource.decoder_opts.tlg_decoder_opts);
+        decoder_ = std::make_unique<TLGDecoder>(
+            resource.decoder_opts.tlg_decoder_opts);
    }

    symbol_table_ = decoder_->WordSymbolTable();
--- a/runtime/engine/common/CMakeLists.txt
+++ b/runtime/engine/common/CMakeLists.txt
@ -3,7 +3,7 @@ ${CMAKE_CURRENT_SOURCE_DIR}
 ${CMAKE_CURRENT_SOURCE_DIR}/../
 )
 add_subdirectory(utils)
-
+add_subdirectory(base)
 add_subdirectory(matrix)

 include_directories(
--- a/runtime/engine/common/base/CMakeLists.txt
+++ b/runtime/engine/common/base/CMakeLists.txt
@ -0,0 +1,20 @@
+if(WITH_ASR)
+  add_compile_options(-DWITH_ASR)
+  set(PPS_FLAGS_LIB "fst/flags.h")
+  set(PPS_GLOB_LIB "fst/log.h")
+else()
+  set(PPS_FLAGS_LIB "gflags/gflags.h")
+  set(PPS_GLOB_LIB "glog/logging.h")
+endif()
+
+configure_file(
+    ${CMAKE_CURRENT_SOURCE_DIR}/flags.h.in
+    ${CMAKE_CURRENT_SOURCE_DIR}/flags.h @ONLY
+  )
+message(STATUS "Generated ${CMAKE_CURRENT_SOURCE_DIR}/flags.h")
+
+configure_file(
+    ${CMAKE_CURRENT_SOURCE_DIR}/log.h.in
+    ${CMAKE_CURRENT_SOURCE_DIR}/log.h @ONLY
+  )
+message(STATUS "Generated ${CMAKE_CURRENT_SOURCE_DIR}/log.h")
--- a/runtime/engine/common/base/flags.h.in
+++ b/runtime/engine/common/base/flags.h.in
@ -14,4 +14,4 @@

 #pragma once

-#include "fst/flags.h"
+#include "@PPS_FLAGS_LIB@"
--- a/runtime/engine/common/base/log.h.in
+++ b/runtime/engine/common/base/log.h.in
@ -14,4 +14,4 @@

 #pragma once

-#include "fst/log.h"
+#include "@PPS_GLOB_LIB@"
--- a/runtime/engine/common/frontend/cmvn.cc
+++ b/runtime/engine/common/frontend/cmvn.cc
@ -33,7 +33,7 @@ CMVN::CMVN(std::string cmvn_file, unique_ptr<FrontendInterface> base_extractor)
    dim_ = mean_stats_.size() - 1;
 }

-void CMVN::ReadCMVNFromJson(string cmvn_file) {
+void CMVN::ReadCMVNFromJson(std::string cmvn_file) {
    std::string json_str = ppspeech::ReadFile2String(cmvn_file);
    picojson::value value;
    std::string err;
--- a/runtime/engine/common/frontend/feature-fbank.h
+++ b/runtime/engine/common/frontend/feature-fbank.h
@ -21,6 +21,7 @@
 #ifndef KALDI_NATIVE_FBANK_CSRC_FEATURE_FBANK_H_
 #define KALDI_NATIVE_FBANK_CSRC_FEATURE_FBANK_H_

+#include <limits>
 #include <map>

 #include "frontend/feature-window.h"
--- a/runtime/engine/common/frontend/feature-window.cc
+++ b/runtime/engine/common/frontend/feature-window.cc
@ -7,6 +7,7 @@
 #include "frontend/feature-window.h"

 #include <cmath>
+#include <limits>
 #include <vector>

 #ifndef M_2PI
--- a/runtime/engine/common/frontend/rfft.cc
+++ b/runtime/engine/common/frontend/rfft.cc
@ -17,12 +17,12 @@
 */

 #include "frontend/rfft.h"
+#include "base/log.h"

 #include <cmath>
+#include <memory>
 #include <vector>

-#include "base/log.h"
-
 // see fftsg.c
 #ifdef __cplusplus
 extern "C" void rdft(int n, int isgn, double *a, int *ip, double *w);
--- a/runtime/engine/common/matrix/kaldi-matrix-inl.h
+++ b/runtime/engine/common/matrix/kaldi-matrix-inl.h
@ -25,40 +25,41 @@
 namespace kaldi {

 /// Empty constructor
-template<typename Real>
-Matrix<Real>::Matrix(): MatrixBase<Real>(NULL, 0, 0, 0) { }
+template <typename Real>
+Matrix<Real>::Matrix() : MatrixBase<Real>(NULL, 0, 0, 0) {}

 /*
 template<>
 template<>
-void MatrixBase<float>::AddVecVec(const float alpha, const VectorBase<float> &ra, const VectorBase<float> &rb);
+void MatrixBase<float>::AddVecVec(const float alpha, const VectorBase<float>
+&ra, const VectorBase<float> &rb);

 template<>
 template<>
-void MatrixBase<double>::AddVecVec(const double alpha, const VectorBase<double> &ra, const VectorBase<double> &rb);
+void MatrixBase<double>::AddVecVec(const double alpha, const VectorBase<double>
+&ra, const VectorBase<double> &rb);
 */

-template<typename Real>
-inline std::ostream & operator << (std::ostream & os, const MatrixBase<Real> & M) {
-  M.Write(os, false);
-  return os;
+template <typename Real>
+inline std::ostream& operator<<(std::ostream& os, const MatrixBase<Real>& M) {
+    M.Write(os, false);
+    return os;
 }

-template<typename Real>
-inline std::istream & operator >> (std::istream & is, Matrix<Real> & M) {
-  M.Read(is, false);
-  return is;
+template <typename Real>
+inline std::istream& operator>>(std::istream& is, Matrix<Real>& M) {
+    M.Read(is, false);
+    return is;
 }


-template<typename Real>
-inline std::istream & operator >> (std::istream & is, MatrixBase<Real> & M) {
-  M.Read(is, false);
-  return is;
+template <typename Real>
+inline std::istream& operator>>(std::istream& is, MatrixBase<Real>& M) {
+    M.Read(is, false);
+    return is;
 }

-}// namespace kaldi
+}  // namespace kaldi


 #endif  // KALDI_MATRIX_KALDI_MATRIX_INL_H_
-
--- a/runtime/engine/common/matrix/kaldi-matrix.cc
+++ b/runtime/engine/common/matrix/kaldi-matrix.cc
--- a/runtime/engine/common/matrix/kaldi-matrix.h
+++ b/runtime/engine/common/matrix/kaldi-matrix.h
--- a/runtime/engine/common/matrix/kaldi-vector-inl.h
+++ b/runtime/engine/common/matrix/kaldi-vector-inl.h
@ -26,32 +26,33 @@

 namespace kaldi {

-template<typename Real>
-std::ostream & operator << (std::ostream &os, const VectorBase<Real> &rv) {
-  rv.Write(os, false);
-  return os;
+template <typename Real>
+std::ostream &operator<<(std::ostream &os, const VectorBase<Real> &rv) {
+    rv.Write(os, false);
+    return os;
 }

-template<typename Real>
-std::istream &operator >> (std::istream &is, VectorBase<Real> &rv) {
-  rv.Read(is, false);
-  return is;
+template <typename Real>
+std::istream &operator>>(std::istream &is, VectorBase<Real> &rv) {
+    rv.Read(is, false);
+    return is;
 }

-template<typename Real>
-std::istream &operator >> (std::istream &is, Vector<Real> &rv) {
-  rv.Read(is, false);
-  return is;
+template <typename Real>
+std::istream &operator>>(std::istream &is, Vector<Real> &rv) {
+    rv.Read(is, false);
+    return is;
 }

-//template<>
-//template<>
-//void VectorBase<float>::AddVec(const float alpha, const VectorBase<float> &rv);
+// template<>
+// template<>
+// void VectorBase<float>::AddVec(const float alpha, const VectorBase<float>
+// &rv);

-//template<>
-//template<>
-//void VectorBase<double>::AddVec<double>(const double alpha,
-                                        //const VectorBase<double> &rv);
+// template<>
+// template<>
+// void VectorBase<double>::AddVec<double>(const double alpha,
+// const VectorBase<double> &rv);

 }  // namespace kaldi

--- a/runtime/engine/common/matrix/kaldi-vector.cc
+++ b/runtime/engine/common/matrix/kaldi-vector.cc
--- a/runtime/engine/common/matrix/kaldi-vector.h
+++ b/runtime/engine/common/matrix/kaldi-vector.h
@ -37,265 +37,274 @@ namespace kaldi {
 ///  Provides a vector abstraction class.
 ///  This class provides a way to work with vectors in kaldi.
 ///  It encapsulates basic operations and memory optimizations.
-template<typename Real>
+template <typename Real>
 class VectorBase {
- public:
-  /// Set vector to all zeros.
-  void SetZero();
-
-  /// Returns true if matrix is all zeros.
-  bool IsZero(Real cutoff = 1.0e-06) const;     // replace magic number
-
-  /// Set all members of a vector to a specified value.
-  void Set(Real f);
-
-  /// Returns the  dimension of the vector.
-  inline MatrixIndexT Dim() const { return dim_; }
-
-  /// Returns the size in memory of the vector, in bytes.
-  inline MatrixIndexT SizeInBytes() const { return (dim_*sizeof(Real)); }
-
-  /// Returns a pointer to the start of the vector's data.
-  inline Real* Data() { return data_; }
-
-  /// Returns a pointer to the start of the vector's data (const).
-  inline const Real* Data() const { return data_; }
-
-  /// Indexing  operator (const).
-  inline Real operator() (MatrixIndexT i) const {
-    KALDI_PARANOID_ASSERT(static_cast<UnsignedMatrixIndexT>(i) <
-                 static_cast<UnsignedMatrixIndexT>(dim_));
-    return *(data_ + i);
-  }
-
-  /// Indexing operator (non-const).
-  inline Real & operator() (MatrixIndexT i) {
-    KALDI_PARANOID_ASSERT(static_cast<UnsignedMatrixIndexT>(i) <
-                 static_cast<UnsignedMatrixIndexT>(dim_));
-    return *(data_ + i);
-  }
-
-  /** @brief Returns a sub-vector of a vector (a range of elements).
-   *  @param o [in] Origin, 0 < o < Dim()
-   *  @param l [in] Length 0 < l < Dim()-o
-   *  @return A SubVector object that aliases the data of the Vector object.
-   *  See @c SubVector class for details   */
-  SubVector<Real> Range(const MatrixIndexT o, const MatrixIndexT l) {
-    return SubVector<Real>(*this, o, l);
-  }
-
-  /** @brief Returns a const sub-vector of a vector (a range of elements).
-   *  @param o [in] Origin, 0 < o < Dim()
-   *  @param l [in] Length 0 < l < Dim()-o
-   *  @return A SubVector object that aliases the data of the Vector object.
-   *  See @c SubVector class for details   */
-  const SubVector<Real> Range(const MatrixIndexT o,
-                              const MatrixIndexT l) const {
-    return SubVector<Real>(*this, o, l);
-  }
-
-  /// Copy data from another vector (must match own size).
-  void CopyFromVec(const VectorBase<Real> &v);
-
-  /// Copy data from another vector of different type (double vs. float)
-  template<typename OtherReal>
-  void CopyFromVec(const VectorBase<OtherReal> &v);
-
-  /// Performs a row stack of the matrix M
-  void CopyRowsFromMat(const MatrixBase<Real> &M);
-  template<typename OtherReal>
-  void CopyRowsFromMat(const MatrixBase<OtherReal> &M);
-
-  /// Performs a column stack of the matrix M
-  void CopyColsFromMat(const MatrixBase<Real> &M);
-
-  /// Extracts a row of the matrix M.  Could also do this with
-  /// this->Copy(M[row]).
-  void CopyRowFromMat(const MatrixBase<Real> &M, MatrixIndexT row);
-  /// Extracts a row of the matrix M with type conversion.
-  template<typename OtherReal>
-  void CopyRowFromMat(const MatrixBase<OtherReal> &M, MatrixIndexT row);
-
-  /// Extracts a column of the matrix M.
-  template<typename OtherReal>
-  void CopyColFromMat(const MatrixBase<OtherReal> &M , MatrixIndexT col);
-
-  /// Reads from C++ stream (option to add to existing contents).
-  /// Throws exception on failure
-  void Read(std::istream &in, bool binary);
-
-  /// Writes to C++ stream (option to write in binary).
-  void Write(std::ostream &Out, bool binary) const;
-
-  friend class VectorBase<double>;
-  friend class VectorBase<float>;
- protected:
-  /// Destructor;  does not deallocate memory, this is handled by child classes.
-  /// This destructor is protected so this object can only be
-  /// deleted via a child.
-  ~VectorBase() {}
-
-  /// Empty initializer, corresponds to vector of zero size.
-  explicit VectorBase(): data_(NULL), dim_(0) {
-    KALDI_ASSERT_IS_FLOATING_TYPE(Real);
-  }
-
-  /// data memory area
-  Real* data_;
-  /// dimension of vector
-  MatrixIndexT dim_;
-  KALDI_DISALLOW_COPY_AND_ASSIGN(VectorBase);
-}; // class VectorBase
+  public:
+    /// Set vector to all zeros.
+    void SetZero();
+
+    /// Returns true if matrix is all zeros.
+    bool IsZero(Real cutoff = 1.0e-06) const;  // replace magic number
+
+    /// Set all members of a vector to a specified value.
+    void Set(Real f);
+
+    /// Returns the  dimension of the vector.
+    inline MatrixIndexT Dim() const { return dim_; }
+
+    /// Returns the size in memory of the vector, in bytes.
+    inline MatrixIndexT SizeInBytes() const { return (dim_ * sizeof(Real)); }
+
+    /// Returns a pointer to the start of the vector's data.
+    inline Real *Data() { return data_; }
+
+    /// Returns a pointer to the start of the vector's data (const).
+    inline const Real *Data() const { return data_; }
+
+    /// Indexing  operator (const).
+    inline Real operator()(MatrixIndexT i) const {
+        KALDI_PARANOID_ASSERT(static_cast<UnsignedMatrixIndexT>(i) <
+                              static_cast<UnsignedMatrixIndexT>(dim_));
+        return *(data_ + i);
+    }
+
+    /// Indexing operator (non-const).
+    inline Real &operator()(MatrixIndexT i) {
+        KALDI_PARANOID_ASSERT(static_cast<UnsignedMatrixIndexT>(i) <
+                              static_cast<UnsignedMatrixIndexT>(dim_));
+        return *(data_ + i);
+    }
+
+    /** @brief Returns a sub-vector of a vector (a range of elements).
+     *  @param o [in] Origin, 0 < o < Dim()
+     *  @param l [in] Length 0 < l < Dim()-o
+     *  @return A SubVector object that aliases the data of the Vector object.
+     *  See @c SubVector class for details   */
+    SubVector<Real> Range(const MatrixIndexT o, const MatrixIndexT l) {
+        return SubVector<Real>(*this, o, l);
+    }
+
+    /** @brief Returns a const sub-vector of a vector (a range of elements).
+     *  @param o [in] Origin, 0 < o < Dim()
+     *  @param l [in] Length 0 < l < Dim()-o
+     *  @return A SubVector object that aliases the data of the Vector object.
+     *  See @c SubVector class for details   */
+    const SubVector<Real> Range(const MatrixIndexT o,
+                                const MatrixIndexT l) const {
+        return SubVector<Real>(*this, o, l);
+    }
+
+    /// Copy data from another vector (must match own size).
+    void CopyFromVec(const VectorBase<Real> &v);
+
+    /// Copy data from another vector of different type (double vs. float)
+    template <typename OtherReal>
+    void CopyFromVec(const VectorBase<OtherReal> &v);
+
+    /// Performs a row stack of the matrix M
+    void CopyRowsFromMat(const MatrixBase<Real> &M);
+    template <typename OtherReal>
+    void CopyRowsFromMat(const MatrixBase<OtherReal> &M);
+
+    /// Performs a column stack of the matrix M
+    void CopyColsFromMat(const MatrixBase<Real> &M);
+
+    /// Extracts a row of the matrix M.  Could also do this with
+    /// this->Copy(M[row]).
+    void CopyRowFromMat(const MatrixBase<Real> &M, MatrixIndexT row);
+    /// Extracts a row of the matrix M with type conversion.
+    template <typename OtherReal>
+    void CopyRowFromMat(const MatrixBase<OtherReal> &M, MatrixIndexT row);
+
+    /// Extracts a column of the matrix M.
+    template <typename OtherReal>
+    void CopyColFromMat(const MatrixBase<OtherReal> &M, MatrixIndexT col);
+
+    /// Reads from C++ stream (option to add to existing contents).
+    /// Throws exception on failure
+    void Read(std::istream &in, bool binary);
+
+    /// Writes to C++ stream (option to write in binary).
+    void Write(std::ostream &Out, bool binary) const;
+
+    friend class VectorBase<double>;
+    friend class VectorBase<float>;
+
+  protected:
+    /// Destructor;  does not deallocate memory, this is handled by child
+    /// classes.
+    /// This destructor is protected so this object can only be
+    /// deleted via a child.
+    ~VectorBase() {}
+
+    /// Empty initializer, corresponds to vector of zero size.
+    explicit VectorBase() : data_(NULL), dim_(0) {
+        KALDI_ASSERT_IS_FLOATING_TYPE(Real);
+    }
+
+    /// data memory area
+    Real *data_;
+    /// dimension of vector
+    MatrixIndexT dim_;
+    KALDI_DISALLOW_COPY_AND_ASSIGN(VectorBase);
+};  // class VectorBase

 /** @brief A class representing a vector.
 *
 *  This class provides a way to work with vectors in kaldi.
 *  It encapsulates basic operations and memory optimizations.  */
-template<typename Real>
-class Vector: public VectorBase<Real> {
- public:
-  /// Constructor that takes no arguments.  Initializes to empty.
-  Vector(): VectorBase<Real>() {}
-
-  /// Constructor with specific size.  Sets to all-zero by default
-  /// if set_zero == false, memory contents are undefined.
-  explicit Vector(const MatrixIndexT s,
-                  MatrixResizeType resize_type = kSetZero)
-      : VectorBase<Real>() {  Resize(s, resize_type);  }
-
-  /// Copy constructor from CUDA vector
-  /// This is defined in ../cudamatrix/cu-vector.h
-  //template<typename OtherReal>
-  //explicit Vector(const CuVectorBase<OtherReal> &cu);
-
-  /// Copy constructor.  The need for this is controversial.
-  Vector(const Vector<Real> &v) : VectorBase<Real>()  { //  (cannot be explicit)
-    Resize(v.Dim(), kUndefined);
-    this->CopyFromVec(v);
-  }
-
-  /// Copy-constructor from base-class, needed to copy from SubVector.
-  explicit Vector(const VectorBase<Real> &v) : VectorBase<Real>() {
-    Resize(v.Dim(), kUndefined);
-    this->CopyFromVec(v);
-  }
-
-  /// Type conversion constructor.
-  template<typename OtherReal>
-  explicit Vector(const VectorBase<OtherReal> &v): VectorBase<Real>() {
-    Resize(v.Dim(), kUndefined);
-    this->CopyFromVec(v);
-  }
-
-// Took this out since it is unsafe : Arnab
-//  /// Constructor from a pointer and a size; copies the data to a location
-//  /// it owns.
-//  Vector(const Real* Data, const MatrixIndexT s): VectorBase<Real>() {
-//    Resize(s);
-  //    CopyFromPtr(Data, s);
-//  }
-
-
-  /// Swaps the contents of *this and *other.  Shallow swap.
-  void Swap(Vector<Real> *other);
-
-  /// Destructor.  Deallocates memory.
-  ~Vector() { Destroy(); }
-
-  /// Read function using C++ streams.  Can also add to existing contents
-  /// of matrix.
-  void Read(std::istream &in, bool binary);
-
-  /// Set vector to a specified size (can be zero).
-  /// The value of the new data depends on resize_type:
-  ///   -if kSetZero, the new data will be zero
-  ///   -if kUndefined, the new data will be undefined
-  ///   -if kCopyData, the new data will be the same as the old data in any
-  ///      shared positions, and zero elsewhere.
-  /// This function takes time proportional to the number of data elements.
-  void Resize(MatrixIndexT length, MatrixResizeType resize_type = kSetZero);
-
-  /// Remove one element and shifts later elements down.
-  void RemoveElement(MatrixIndexT i);
-
-  /// Assignment operator.
-  Vector<Real> &operator = (const Vector<Real> &other) {
-    Resize(other.Dim(), kUndefined);
-    this->CopyFromVec(other);
-    return *this;
-  }
-
-  /// Assignment operator that takes VectorBase.
-  Vector<Real> &operator = (const VectorBase<Real> &other) {
-    Resize(other.Dim(), kUndefined);
-    this->CopyFromVec(other);
-    return *this;
-  }
- private:
-  /// Init assumes the current contents of the class are invalid (i.e. junk or
-  /// has already been freed), and it sets the vector to newly allocated memory
-  /// with the specified dimension.  dim == 0 is acceptable.  The memory contents
-  /// pointed to by data_ will be undefined.
-  void Init(const MatrixIndexT dim);
-
-  /// Destroy function, called internally.
-  void Destroy();
-
+template <typename Real>
+class Vector : public VectorBase<Real> {
+  public:
+    /// Constructor that takes no arguments.  Initializes to empty.
+    Vector() : VectorBase<Real>() {}
+
+    /// Constructor with specific size.  Sets to all-zero by default
+    /// if set_zero == false, memory contents are undefined.
+    explicit Vector(const MatrixIndexT s,
+                    MatrixResizeType resize_type = kSetZero)
+        : VectorBase<Real>() {
+        Resize(s, resize_type);
+    }
+
+    /// Copy constructor from CUDA vector
+    /// This is defined in ../cudamatrix/cu-vector.h
+    // template<typename OtherReal>
+    // explicit Vector(const CuVectorBase<OtherReal> &cu);
+
+    /// Copy constructor.  The need for this is controversial.
+    Vector(const Vector<Real> &v)
+        : VectorBase<Real>() {  //  (cannot be explicit)
+        Resize(v.Dim(), kUndefined);
+        this->CopyFromVec(v);
+    }
+
+    /// Copy-constructor from base-class, needed to copy from SubVector.
+    explicit Vector(const VectorBase<Real> &v) : VectorBase<Real>() {
+        Resize(v.Dim(), kUndefined);
+        this->CopyFromVec(v);
+    }
+
+    /// Type conversion constructor.
+    template <typename OtherReal>
+    explicit Vector(const VectorBase<OtherReal> &v) : VectorBase<Real>() {
+        Resize(v.Dim(), kUndefined);
+        this->CopyFromVec(v);
+    }
+
+    // Took this out since it is unsafe : Arnab
+    //  /// Constructor from a pointer and a size; copies the data to a location
+    //  /// it owns.
+    //  Vector(const Real* Data, const MatrixIndexT s): VectorBase<Real>() {
+    //    Resize(s);
+    //    CopyFromPtr(Data, s);
+    //  }
+
+
+    /// Swaps the contents of *this and *other.  Shallow swap.
+    void Swap(Vector<Real> *other);
+
+    /// Destructor.  Deallocates memory.
+    ~Vector() { Destroy(); }
+
+    /// Read function using C++ streams.  Can also add to existing contents
+    /// of matrix.
+    void Read(std::istream &in, bool binary);
+
+    /// Set vector to a specified size (can be zero).
+    /// The value of the new data depends on resize_type:
+    ///   -if kSetZero, the new data will be zero
+    ///   -if kUndefined, the new data will be undefined
+    ///   -if kCopyData, the new data will be the same as the old data in any
+    ///      shared positions, and zero elsewhere.
+    /// This function takes time proportional to the number of data elements.
+    void Resize(MatrixIndexT length, MatrixResizeType resize_type = kSetZero);
+
+    /// Remove one element and shifts later elements down.
+    void RemoveElement(MatrixIndexT i);
+
+    /// Assignment operator.
+    Vector<Real> &operator=(const Vector<Real> &other) {
+        Resize(other.Dim(), kUndefined);
+        this->CopyFromVec(other);
+        return *this;
+    }
+
+    /// Assignment operator that takes VectorBase.
+    Vector<Real> &operator=(const VectorBase<Real> &other) {
+        Resize(other.Dim(), kUndefined);
+        this->CopyFromVec(other);
+        return *this;
+    }
+
+  private:
+    /// Init assumes the current contents of the class are invalid (i.e. junk or
+    /// has already been freed), and it sets the vector to newly allocated
+    /// memory
+    /// with the specified dimension.  dim == 0 is acceptable.  The memory
+    /// contents
+    /// pointed to by data_ will be undefined.
+    void Init(const MatrixIndexT dim);
+
+    /// Destroy function, called internally.
+    void Destroy();
 };


 /// Represents a non-allocating general vector which can be defined
 /// as a sub-vector of higher-level vector [or as the row of a matrix].
-template<typename Real>
+template <typename Real>
 class SubVector : public VectorBase<Real> {
- public:
-  /// Constructor from a Vector or SubVector.
-  /// SubVectors are not const-safe and it's very hard to make them
-  /// so for now we just give up.  This function contains const_cast.
-  SubVector(const VectorBase<Real> &t, const MatrixIndexT origin,
-            const MatrixIndexT length) : VectorBase<Real>() {
-    // following assert equiv to origin>=0 && length>=0 &&
-    // origin+length <= rt.dim_
-    KALDI_ASSERT(static_cast<UnsignedMatrixIndexT>(origin)+
-                 static_cast<UnsignedMatrixIndexT>(length) <=
-                 static_cast<UnsignedMatrixIndexT>(t.Dim()));
-    VectorBase<Real>::data_ = const_cast<Real*> (t.Data()+origin);
-    VectorBase<Real>::dim_   = length;
-  }
-
-  /// This constructor initializes the vector to point at the contents
-  /// of this packed matrix (SpMatrix or TpMatrix).
- // SubVector(const PackedMatrix<Real> &M) {
-    //VectorBase<Real>::data_ = const_cast<Real*> (M.Data());
-    //VectorBase<Real>::dim_   = (M.NumRows()*(M.NumRows()+1))/2;
-  //}
-
-  /// Copy constructor
-  SubVector(const SubVector &other) : VectorBase<Real> () {
-    // this copy constructor needed for Range() to work in base class.
-    VectorBase<Real>::data_ = other.data_;
-    VectorBase<Real>::dim_ = other.dim_;
-  }
-
-  /// Constructor from a pointer to memory and a length.  Keeps a pointer
-  /// to the data but does not take ownership (will never delete).
-  /// Caution: this constructor enables you to evade const constraints.
-  SubVector(const Real *data, MatrixIndexT length) : VectorBase<Real> () {
-    VectorBase<Real>::data_ = const_cast<Real*>(data);
-    VectorBase<Real>::dim_   = length;
-  }
-
-  /// This operation does not preserve const-ness, so be careful.
-  SubVector(const MatrixBase<Real> &matrix, MatrixIndexT row) {
-    VectorBase<Real>::data_ = const_cast<Real*>(matrix.RowData(row));
-    VectorBase<Real>::dim_   = matrix.NumCols();
-  }
-
-  ~SubVector() {}  ///< Destructor (does nothing; no pointers are owned here).
-
- private:
-  /// Disallow assignment operator.
-  SubVector & operator = (const SubVector &other) {}
+  public:
+    /// Constructor from a Vector or SubVector.
+    /// SubVectors are not const-safe and it's very hard to make them
+    /// so for now we just give up.  This function contains const_cast.
+    SubVector(const VectorBase<Real> &t,
+              const MatrixIndexT origin,
+              const MatrixIndexT length)
+        : VectorBase<Real>() {
+        // following assert equiv to origin>=0 && length>=0 &&
+        // origin+length <= rt.dim_
+        KALDI_ASSERT(static_cast<UnsignedMatrixIndexT>(origin) +
+                         static_cast<UnsignedMatrixIndexT>(length) <=
+                     static_cast<UnsignedMatrixIndexT>(t.Dim()));
+        VectorBase<Real>::data_ = const_cast<Real *>(t.Data() + origin);
+        VectorBase<Real>::dim_ = length;
+    }
+
+    /// This constructor initializes the vector to point at the contents
+    /// of this packed matrix (SpMatrix or TpMatrix).
+    // SubVector(const PackedMatrix<Real> &M) {
+    // VectorBase<Real>::data_ = const_cast<Real*> (M.Data());
+    // VectorBase<Real>::dim_   = (M.NumRows()*(M.NumRows()+1))/2;
+    //}
+
+    /// Copy constructor
+    SubVector(const SubVector &other) : VectorBase<Real>() {
+        // this copy constructor needed for Range() to work in base class.
+        VectorBase<Real>::data_ = other.data_;
+        VectorBase<Real>::dim_ = other.dim_;
+    }
+
+    /// Constructor from a pointer to memory and a length.  Keeps a pointer
+    /// to the data but does not take ownership (will never delete).
+    /// Caution: this constructor enables you to evade const constraints.
+    SubVector(const Real *data, MatrixIndexT length) : VectorBase<Real>() {
+        VectorBase<Real>::data_ = const_cast<Real *>(data);
+        VectorBase<Real>::dim_ = length;
+    }
+
+    /// This operation does not preserve const-ness, so be careful.
+    SubVector(const MatrixBase<Real> &matrix, MatrixIndexT row) {
+        VectorBase<Real>::data_ = const_cast<Real *>(matrix.RowData(row));
+        VectorBase<Real>::dim_ = matrix.NumCols();
+    }
+
+    ~SubVector() {}  ///< Destructor (does nothing; no pointers are owned here).
+
+  private:
+    /// Disallow assignment operator.
+    SubVector &operator=(const SubVector &other) {}
 };

 /// @} end of "addtogroup matrix_group"
@ -303,43 +312,41 @@ class SubVector : public VectorBase<Real> {
 /// @{
 /// Output to a C++ stream.  Non-binary by default (use Write for
 /// binary output).
-template<typename Real>
-std::ostream & operator << (std::ostream & out, const VectorBase<Real> & v);
+template <typename Real>
+std::ostream &operator<<(std::ostream &out, const VectorBase<Real> &v);

 /// Input from a C++ stream.  Will automatically read text or
 /// binary data from the stream.
-template<typename Real>
-std::istream & operator >> (std::istream & in, VectorBase<Real> & v);
+template <typename Real>
+std::istream &operator>>(std::istream &in, VectorBase<Real> &v);

 /// Input from a C++ stream. Will automatically read text or
 /// binary data from the stream.
-template<typename Real>
-std::istream & operator >> (std::istream & in, Vector<Real> & v);
+template <typename Real>
+std::istream &operator>>(std::istream &in, Vector<Real> &v);
 /// @} end of \addtogroup matrix_funcs_io

 /// \addtogroup matrix_funcs_scalar
 /// @{


-//template<typename Real>
-//bool ApproxEqual(const VectorBase<Real> &a,
-                 //const VectorBase<Real> &b, Real tol = 0.01) {
-  //return a.ApproxEqual(b, tol);
+// template<typename Real>
+// bool ApproxEqual(const VectorBase<Real> &a,
+// const VectorBase<Real> &b, Real tol = 0.01) {
+// return a.ApproxEqual(b, tol);
 //}

-//template<typename Real>
-//inline void AssertEqual(VectorBase<Real> &a, VectorBase<Real> &b,
-                        //float tol = 0.01) {
-  //KALDI_ASSERT(a.ApproxEqual(b, tol));
+// template<typename Real>
+// inline void AssertEqual(VectorBase<Real> &a, VectorBase<Real> &b,
+// float tol = 0.01) {
+// KALDI_ASSERT(a.ApproxEqual(b, tol));
 //}


-
 }  // namespace kaldi

 // we need to include the implementation
 #include "matrix/kaldi-vector-inl.h"


-
 #endif  // KALDI_MATRIX_KALDI_VECTOR_H_
--- a/runtime/engine/common/matrix/matrix-common.h
+++ b/runtime/engine/common/matrix/matrix-common.h
@ -27,52 +27,58 @@

 namespace kaldi {
 // this enums equal to CblasTrans and CblasNoTrans constants from CBLAS library
-// we are writing them as literals because we don't want to include here matrix/kaldi-blas.h,
-// which puts many symbols into global scope (like "real") via the header f2c.h 
+// we are writing them as literals because we don't want to include here
+// matrix/kaldi-blas.h,
+// which puts many symbols into global scope (like "real") via the header f2c.h
 typedef enum {
-  kTrans    = 112, // = CblasTrans
-  kNoTrans  = 111  // = CblasNoTrans
+    kTrans = 112,   // = CblasTrans
+    kNoTrans = 111  // = CblasNoTrans
 } MatrixTransposeType;

-typedef enum {
-  kSetZero,
-  kUndefined,
-  kCopyData
-} MatrixResizeType;
+typedef enum { kSetZero, kUndefined, kCopyData } MatrixResizeType;


 typedef enum {
-  kDefaultStride,
-  kStrideEqualNumCols,
+    kDefaultStride,
+    kStrideEqualNumCols,
 } MatrixStrideType;

 typedef enum {
-  kTakeLower,
-  kTakeUpper,
-  kTakeMean,
-  kTakeMeanAndCheck
+    kTakeLower,
+    kTakeUpper,
+    kTakeMean,
+    kTakeMeanAndCheck
 } SpCopyType;

-template<typename Real> class VectorBase;
-template<typename Real> class Vector;
-template<typename Real> class SubVector;
-template<typename Real> class MatrixBase;
-template<typename Real> class SubMatrix;
-template<typename Real> class Matrix;
+template <typename Real>
+class VectorBase;
+template <typename Real>
+class Vector;
+template <typename Real>
+class SubVector;
+template <typename Real>
+class MatrixBase;
+template <typename Real>
+class SubMatrix;
+template <typename Real>
+class Matrix;


 /// This class provides a way for switching between double and float types.
-template<typename T> class OtherReal { };  // useful in reading+writing routines
-                                           // to switch double and float.
+template <typename T>
+class OtherReal {};  // useful in reading+writing routines
+                     // to switch double and float.
 /// A specialized class for switching from float to double.
-template<> class OtherReal<float> {
- public:
-  typedef double Real;
+template <>
+class OtherReal<float> {
+  public:
+    typedef double Real;
 };
 /// A specialized class for switching from double to float.
-template<> class OtherReal<double> {
- public:
-  typedef float Real;
+template <>
+class OtherReal<double> {
+  public:
+    typedef float Real;
 };


@ -81,12 +87,10 @@ typedef int32 SignedMatrixIndexT;
 typedef uint32 UnsignedMatrixIndexT;

 // If you want to use size_t for the index type, do as follows instead:
-//typedef size_t MatrixIndexT;
-//typedef ssize_t SignedMatrixIndexT;
-//typedef size_t UnsignedMatrixIndexT;
-
+// typedef size_t MatrixIndexT;
+// typedef ssize_t SignedMatrixIndexT;
+// typedef size_t UnsignedMatrixIndexT;
 }


-
 #endif  // KALDI_MATRIX_MATRIX_COMMON_H_
--- a/runtime/engine/kaldi/CMakeLists.txt
+++ b/runtime/engine/kaldi/CMakeLists.txt
@ -1,14 +1,15 @@
-project(kaldi)
 include_directories(
 ${CMAKE_CURRENT_SOURCE_DIR}
 )

 add_subdirectory(base)
 add_subdirectory(util)
-add_subdirectory(lat)
-add_subdirectory(fstext)
-add_subdirectory(decoder)
-add_subdirectory(lm)
+if(WITH_ASR)
+    add_subdirectory(lat)
+    add_subdirectory(fstext)
+    add_subdirectory(decoder)
+    add_subdirectory(lm)

-add_subdirectory(fstbin)
-add_subdirectory(lmbin)
+    add_subdirectory(fstbin)
+    add_subdirectory(lmbin)
+endif()
--- a/runtime/engine/kaldi/base/kaldi-types.h
+++ b/runtime/engine/kaldi/base/kaldi-types.h
@ -44,7 +44,19 @@ typedef float   BaseFloat;

 #ifndef COMPILE_WITHOUT_OPENFST

+#ifdef WITH_ASR
 #include <fst/types.h>
+#else
+using int8 = int8_t;
+using int16 = int16_t;
+using int32 = int32_t;
+using int64 = int64_t;
+
+using uint8 = uint8_t;
+using uint16 = uint16_t;
+using uint32 = uint32_t;
+using uint64 = uint64_t;
+#endif

 namespace kaldi {
  using ::int16;
--- a/runtime/engine/vad/CMakeLists.txt
+++ b/runtime/engine/vad/CMakeLists.txt
@ -0,0 +1,18 @@
+# set(CMAKE_CXX_STANDARD 11)
+
+# # 指定下载解压后的fastdeploy库路径
+# set(FASTDEPLOY_INSTALL_DIR "fdlib/fastdeploy-linux-x64-1.0.4" CACHE STRING force)
+
+# if(NOT EXISTS ${FASTDEPLOY_INSTALL_DIR})
+#     message(FATAL_ERROR "Please using cmake -B build -DFASTDEPLOY_INSTALL_DIR=${FASTDEPLOY_INSTALL_DIR}")
+# endif()
+
+# include(${FASTDEPLOY_INSTALL_DIR}/FastDeploy.cmake)
+
+# # 添加FastDeploy依赖头文件
+# include_directories(${FASTDEPLOY_INCS})
+
+add_executable(infer_onnx_silero_vad ${CMAKE_CURRENT_SOURCE_DIR}/infer_onnx_silero_vad.cc wav.h vad.cc vad.h)
+
+# 添加FastDeploy库依赖
+target_link_libraries(infer_onnx_silero_vad ${FASTDEPLOY_LIBS})
--- a/runtime/engine/vad/README.md
+++ b/runtime/engine/vad/README.md
@ -0,0 +1,121 @@
+English | [简体中文](README_CN.md)
+
+# Silero VAD Deployment Example
+
+This directory provides examples that `infer_onnx_silero_vad` fast finishes the deployment of VAD models on CPU/GPU.
+
+Before deployment, two steps require confirmation.
+
+- 1. Software and hardware should meet the requirements. Please refer to [FastDeploy Environment Requirements](../../../../docs/en/build_and_install/download_prebuilt_libraries.md).  
+- 2. Download the precompiled deployment library and samples code according to your development environment. Refer to [FastDeploy Precompiled Library](../../../../docs/en/build_and_install/download_prebuilt_libraries.md).
+
+Taking VAD inference on Linux as an example, the compilation test can be completed by executing the following command in this directory.
+
+```bash
+mkdir build
+cd build
+# Download the FastDeploy precompiled library. Users can choose your appropriate version in the `FastDeploy Precompiled Library` mentioned above
+wget https://bj.bcebos.com/fastdeploy/release/cpp/fastdeploy-linux-x64-x.x.x.tgz
+tar xvf fastdeploy-linux-x64-x.x.x.tgz
+cmake .. -DFASTDEPLOY_INSTALL_DIR=${PWD}/fastdeploy-linux-x64-x.x.x
+make -j
+
+# Download the VAD model file and test audio. After decompression, place the model and test audio in the infer_onnx_silero_vad.cc peer directory
+wget https://bj.bcebos.com/paddlehub/fastdeploy/silero_vad.tgz
+wget https://bj.bcebos.com/paddlehub/fastdeploy/silero_vad_sample.wav
+
+# inference
+./infer_onnx_silero_vad ../silero_vad.onnx ../silero_vad_sample.wav
+```
+
+- The above command works for Linux or MacOS. Refer to:
+  - [How to use FastDeploy C++ SDK in Windows](../../../../docs/en/faq/use_sdk_on_windows.md)  for SDK use-pattern in Windows
+
+## VAD C++ Interface
+
+### Vad Class
+
+```c++
+Vad::Vad(const std::string& model_file,
+    const fastdeploy::RuntimeOption& custom_option = fastdeploy::RuntimeOption())
+```
+
+**Parameter**
+
+> * **model_file**(str): Model file path
+> * **runtime_option**(RuntimeOption): Backend inference configuration. None by default. (use the default configuration)
+
+### setAudioCofig function
+
+**Must be called before the `init` function**
+
+```c++
+void Vad::setAudioCofig(int sr, int frame_ms, float threshold, int min_silence_duration_ms, int speech_pad_ms);
+```
+
+**Parameter**
+
+> * **sr**(int): sampling rate
+> * **frame_ms**(int): The length of each detection frame, and it is used to calculate the detection window size
+> * **threshold**(float): Result probability judgment threshold
+> * **min_silence_duration_ms**(int): The threshold used to calculate whether it is silence
+> * **speech_pad_ms**(int): Used to calculate the end time of the speech
+
+### init function
+
+Used to initialize audio-related parameters.
+
+```c++
+void Vad::init();
+```
+
+### loadAudio function
+
+Load audio.
+
+```c++
+void Vad::loadAudio(const std::string& wavPath)
+```
+
+**Parameter**
+
+> * **wavPath**(str): Audio file path
+
+### Predict function
+
+Used to start model reasoning.
+
+```c++
+bool Vad::Predict();
+```
+
+### getResult function
+
+**Used to obtain reasoning results**
+
+```c++
+std::vector<std::map<std::string, float>> Vad::getResult(
+            float removeThreshold = 1.6, float expandHeadThreshold = 0.32, float expandTailThreshold = 0,
+            float mergeThreshold = 0.3);
+```
+
+**Parameter**
+
+> * **removeThreshold**(float): Discard result fragment threshold; If some recognition results are too short, they will be discarded according to this threshold
+> * **expandHeadThreshold**(float): Offset at the beginning of the segment; The recognized start time may be too close to the voice part, so move forward the start time accordingly
+> * **expandTailThreshold**(float): Offset at the end of the segment; The recognized end time may be too close to the voice part, so the end time is moved back accordingly
+> * **mergeThreshold**(float): Some result segments are very close and can be combined into one, and the vocal segments can be combined accordingly
+
+**The output result format is**`std::vector<std::map<std::string, float>>`
+
+> Output a list, each element is a speech fragment
+>
+> Each clip can use 'start' to get the start time and 'end' to get the end time
+
+### Tips
+
+1. `The setAudioCofig`function must be called before the `init` function
+2. The sampling rate of the input audio file must be consistent with that set in the code
+
+- [Model Description](../)
+- [How to switch the model inference backend engine](../../../../docs/en/faq/how_to_change_backend.md)
--- a/runtime/engine/vad/README_CN.md
+++ b/runtime/engine/vad/README_CN.md
@ -0,0 +1,119 @@
+[English](README.md) | 简体中文
+# Silero VAD 部署示例
+
+本目录下提供`infer_onnx_silero_vad`快速完成 Silero VAD 模型在CPU/GPU。
+
+在部署前，需确认以下两个步骤
+
+- 1. 软硬件环境满足要求，参考[FastDeploy环境要求](../../../../docs/cn/build_and_install/download_prebuilt_libraries.md)
+- 2. 根据开发环境，下载预编译部署库和samples代码，参考[FastDeploy预编译库](../../../../docs/cn/build_and_install/download_prebuilt_libraries.md)
+
+以Linux上 VAD 推理为例，在本目录执行如下命令即可完成编译测试。
+
+```bash
+mkdir build
+cd build
+# 下载FastDeploy预编译库，用户可在上文提到的`FastDeploy预编译库`中自行选择合适的版本使用
+wget https://bj.bcebos.com/fastdeploy/release/cpp/fastdeploy-linux-x64-x.x.x.tgz
+tar xvf fastdeploy-linux-x64-x.x.x.tgz
+cmake .. -DFASTDEPLOY_INSTALL_DIR=${PWD}/fastdeploy-linux-x64-x.x.x
+make -j
+
+# 下载 VAD 模型文件和测试音频，解压后将模型和测试音频放置在与 infer_onnx_silero_vad.cc 同级目录下
+wget https://bj.bcebos.com/paddlehub/fastdeploy/silero_vad.tgz
+wget https://bj.bcebos.com/paddlehub/fastdeploy/silero_vad_sample.wav
+
+# 推理
+./infer_onnx_silero_vad ../silero_vad.onnx ../silero_vad_sample.wav
+```
+
+以上命令只适用于Linux或MacOS, Windows下SDK的使用方式请参考:
+- [如何在Windows中使用FastDeploy C++ SDK](../../../../docs/cn/faq/use_sdk_on_windows.md)
+
+## VAD C++ 接口
+### Vad 类
+
+```c++
+Vad::Vad(const std::string& model_file,
+    const fastdeploy::RuntimeOption& custom_option = fastdeploy::RuntimeOption())
+```
+
+**参数**
+
+> * **model_file**(str): 模型文件路径
+> * **runtime_option**(RuntimeOption): 后端推理配置，默认为None，即采用默认配置
+
+### setAudioCofig 函数
+
+**必须在`init`函数前调用**
+
+```c++
+void Vad::setAudioCofig(int sr, int frame_ms, float threshold, int min_silence_duration_ms, int speech_pad_ms);
+```
+
+**参数**
+
+> * **sr**(int): 采样率
+> * **frame_ms**(int): 每次检测帧长，用于计算检测窗口大小
+> * **threshold**(float): 结果概率判断阈值
+> * **min_silence_duration_ms**(int): 用于计算判断是否是 silence 的阈值
+> * **speech_pad_ms**(int): 用于计算 speach 结束时刻
+
+### init 函数
+
+用于初始化音频相关参数
+
+```c++
+void Vad::init();
+```
+
+### loadAudio 函数
+
+加载音频
+
+```c++
+void Vad::loadAudio(const std::string& wavPath)
+```
+
+**参数**
+
+> * **wavPath**(str): 音频文件路径
+
+### Predict 函数
+
+用于开始模型推理
+
+```c++
+bool Vad::Predict();
+```
+
+### getResult 函数
+
+**用于获取推理结果**
+
+```c++
+std::vector<std::map<std::string, float>> Vad::getResult(
+            float removeThreshold = 1.6, float expandHeadThreshold = 0.32, float expandTailThreshold = 0,
+            float mergeThreshold = 0.3);
+```
+
+**参数**
+
+> * **removeThreshold**(float): 丢弃结果片段阈值；部分识别结果太短则根据此阈值丢弃
+> * **expandHeadThreshold**(float): 结果片段开始时刻偏移；识别到的开始时刻可能过于贴近发声部分，因此据此前移开始时刻
+> * **expandTailThreshold**(float): 结果片段结束时刻偏移；识别到的结束时刻可能过于贴近发声部分，因此据此后移结束时刻
+> * **mergeThreshold**(float): 有的结果片段十分靠近，可以合并成一个，据此合并发声片段
+
+**输出结果格式为**`std::vector<std::map<std::string, float>>`
+
+> 输出一个列表，每个元素是一个讲话片段
+>
+> 每个片段可以用 'start' 获取到开始时刻，用 'end' 获取到结束时刻
+
+### 提示
+
+1. `setAudioCofig`函数必须在`init`函数前调用
+2. 输入的音频文件的采样率必须与代码中设置的保持一致
+
+- [模型介绍](../)
+- [如何切换模型推理后端引擎](../../../../docs/cn/faq/how_to_change_backend.md)
--- a/runtime/engine/vad/infer_onnx_silero_vad.cc
+++ b/runtime/engine/vad/infer_onnx_silero_vad.cc
@ -0,0 +1,65 @@
+
+#include "vad.h"
+
+int main(int argc, char* argv[]) {
+    if (argc < 3) {
+        std::cout << "Usage: infer_onnx_silero_vad path/to/model path/to/audio "
+                     "run_option, "
+                     "e.g ./infer_onnx_silero_vad silero_vad.onnx sample.wav"
+                  << std::endl;
+        return -1;
+    }
+
+    std::string model_file = argv[1];
+    std::string audio_file = argv[2];
+
+    int sr = 16000;
+    Vad vad(model_file);
+    // custom config, but must be set before init
+    vad.SetConfig(sr, 32, 0.45f, 200, 0, 0);
+    vad.Init();
+
+    std::vector<float> inputWav;  // [0, 1]
+    wav::WavReader wav_reader = wav::WavReader(audio_file);
+    assert(wav_reader.sample_rate() == sr);
+
+
+    auto num_samples = wav_reader.num_samples();
+    inputWav.resize(num_samples);
+    for (int i = 0; i < num_samples; i++) {
+        inputWav[i] = wav_reader.data()[i] / 32768;
+    }
+
+    int window_size_samples = vad.WindowSizeSamples();
+    for (int64_t j = 0; j < num_samples; j += window_size_samples) {
+        auto start = j;
+        auto end = start + window_size_samples >= num_samples
+                       ? num_samples
+                       : start + window_size_samples;
+        auto current_chunk_size = end - start;
+
+        std::vector<float> r{&inputWav[0] + start, &inputWav[0] + end};
+        assert(r.size() == current_chunk_size);
+
+        if (!vad.ForwardChunk(r)) {
+            std::cerr << "Failed to inference while using model:"
+                      << vad.ModelName() << "." << std::endl;
+            return false;
+        }
+
+        Vad::State s = vad.Postprocess();
+        std::cout << s << " ";
+    }
+    std::cout << std::endl;
+
+    std::vector<std::map<std::string, float>> result = vad.GetResult();
+    for (auto& res : result) {
+        std::cout << "speak start: " << res["start"]
+                  << " s, end: " << res["end"] << " s | ";
+    }
+    std::cout << "\b\b " << std::endl;
+
+    vad.Reset();
+
+    return 0;
+}
--- a/runtime/engine/vad/vad.cc
+++ b/runtime/engine/vad/vad.cc
@ -0,0 +1,306 @@
+// Copyright (c) 2023 Chen Qianhe Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#include "vad.h"
+#include <cstring>
+#include <iomanip>
+
+
+#ifdef NDEBUG
+#define LOG_DEBUG                                                              \
+    ::fastdeploy::FDLogger(true, "[DEBUG]") << __REL_FILE__ << "(" << __LINE__ \
+                                            << ")::" << __FUNCTION__ << "\t"
+#else
+#define LOG_DEBUG                            \
+    ::fastdeploy::FDLogger(false, "[DEBUG]") \
+        << __REL_FILE__ << "(" << __LINE__ << ")::" << __FUNCTION__ << "\t"
+#endif
+
+Vad::Vad(const std::string& model_file,
+         const fastdeploy::RuntimeOption&
+             custom_option /* = fastdeploy::RuntimeOption() */) {
+    valid_cpu_backends = {fastdeploy::Backend::ORT,
+                          fastdeploy::Backend::OPENVINO};
+    valid_gpu_backends = {fastdeploy::Backend::ORT, fastdeploy::Backend::TRT};
+
+    runtime_option = custom_option;
+    // ORT backend
+    runtime_option.UseCpu();
+    runtime_option.UseOrtBackend();
+    runtime_option.model_format = fastdeploy::ModelFormat::ONNX;
+    // grap opt level
+    runtime_option.ort_option.graph_optimization_level = 99;
+    // one-thread
+    runtime_option.ort_option.intra_op_num_threads = 1;
+    runtime_option.ort_option.inter_op_num_threads = 1;
+    // model path
+    runtime_option.model_file = model_file;
+}
+
+void Vad::Init() {
+    std::call_once(init_, [&]() { initialized = Initialize(); });
+}
+
+std::string Vad::ModelName() const { return "VAD"; }
+
+void Vad::SetConfig(int sr,
+                    int frame_ms,
+                    float threshold,
+                    int min_silence_duration_ms,
+                    int speech_pad_left_ms,
+                    int speech_pad_right_ms) {
+    if (initialized) {
+        fastdeploy::FDERROR << "SetConfig must be called before init"
+                            << std::endl;
+        throw std::runtime_error("SetConfig must be called before init");
+    }
+    sample_rate_ = sr;
+    sr_per_ms_ = sr / 1000;
+    threshold_ = threshold;
+    frame_ms_ = frame_ms;
+    min_silence_samples_ = min_silence_duration_ms * sr_per_ms_;
+    speech_pad_left_samples_ = speech_pad_left_ms * sr_per_ms_;
+    speech_pad_right_samples_ = speech_pad_right_ms * sr_per_ms_;
+
+    // init chunk size
+    window_size_samples_ = frame_ms * sr_per_ms_;
+    current_chunk_size_ = window_size_samples_;
+
+    fastdeploy::FDINFO << "sr=" << sr << " threshold=" << threshold
+                       << " frame_ms=" << frame_ms
+                       << " min_silence_duration_ms=" << min_silence_duration_ms
+                       << " speech_pad_left_ms=" << speech_pad_left_ms
+                       << " speech_pad_right_ms=" << speech_pad_right_ms;
+}
+
+void Vad::Reset() {
+    std::memset(h_.data(), 0.0f, h_.size() * sizeof(float));
+    std::memset(c_.data(), 0.0f, c_.size() * sizeof(float));
+
+    triggerd_ = false;
+    temp_end_ = 0;
+    current_sample_ = 0;
+
+    speakStart_.clear();
+    speakEnd_.clear();
+
+    states_.clear();
+}
+
+bool Vad::Initialize() {
+    // input & output holder
+    inputTensors_.resize(4);
+    outputTensors_.resize(3);
+
+    // input shape
+    input_node_dims_.emplace_back(1);
+    input_node_dims_.emplace_back(window_size_samples_);
+    // sr buffer
+    sr_.resize(1);
+    sr_[0] = sample_rate_;
+    // hidden state buffer
+    h_.resize(size_hc_);
+    c_.resize(size_hc_);
+
+    Reset();
+
+    // InitRuntime
+    if (!InitRuntime()) {
+        fastdeploy::FDERROR << "Failed to initialize fastdeploy backend."
+                            << std::endl;
+        return false;
+    }
+    fastdeploy::FDINFO << "init done.";
+    return true;
+}
+
+bool Vad::ForwardChunk(std::vector<float>& chunk) {
+    // last chunk may not be window_size_samples_
+    input_node_dims_.back() = chunk.size();
+    assert(window_size_samples_ >= chunk.size());
+    current_chunk_size_ = chunk.size();
+
+    inputTensors_[0].name = "input";
+    inputTensors_[0].SetExternalData(
+        input_node_dims_, fastdeploy::FDDataType::FP32, chunk.data());
+    inputTensors_[1].name = "sr";
+    inputTensors_[1].SetExternalData(
+        sr_node_dims_, fastdeploy::FDDataType::INT64, sr_.data());
+    inputTensors_[2].name = "h";
+    inputTensors_[2].SetExternalData(
+        hc_node_dims_, fastdeploy::FDDataType::FP32, h_.data());
+    inputTensors_[3].name = "c";
+    inputTensors_[3].SetExternalData(
+        hc_node_dims_, fastdeploy::FDDataType::FP32, c_.data());
+
+    if (!Infer(inputTensors_, &outputTensors_)) {
+        return false;
+    }
+
+    // Push forward sample index
+    current_sample_ += current_chunk_size_;
+    return true;
+}
+
+const Vad::State& Vad::Postprocess() {
+    // update prob, h, c
+    outputProb_ = *(float*)outputTensors_[0].Data();
+    auto* hn = static_cast<float*>(outputTensors_[1].MutableData());
+    std::memcpy(h_.data(), hn, h_.size() * sizeof(float));
+    auto* cn = static_cast<float*>(outputTensors_[2].MutableData());
+    std::memcpy(c_.data(), cn, c_.size() * sizeof(float));
+
+    if (outputProb_ < threshold_ && !triggerd_) {
+        // 1. Silence
+        LOG_DEBUG << "{ silence: " << 1.0 * current_sample_ / sample_rate_
+                  << " s; prob: " << outputProb_ << " }";
+        states_.emplace_back(Vad::State::SIL);
+    } else if (outputProb_ >= threshold_ && !triggerd_) {
+        // 2. Start
+        triggerd_ = true;
+        speech_start_ =
+            current_sample_ - current_chunk_size_ - speech_pad_left_samples_;
+        float start_sec = 1.0 * speech_start_ / sample_rate_;
+        speakStart_.emplace_back(start_sec);
+        LOG_DEBUG << "{ speech start: " << start_sec
+                  << " s; prob: " << outputProb_ << " }";
+        states_.emplace_back(Vad::State::START);
+    } else if (outputProb_ >= threshold_ - 0.15 && triggerd_) {
+        // 3. Continue
+
+        if (temp_end_ != 0) {
+            // speech prob relaxation, speech continues again
+            LOG_DEBUG << "{ speech fake end(sil < min_silence_ms) to continue: "
+                      << 1.0 * current_sample_ / sample_rate_
+                      << " s; prob: " << outputProb_ << " }";
+            temp_end_ = 0;
+        } else {
+            // speech prob relaxation, keep tracking speech
+            LOG_DEBUG << "{ speech continue: "
+                      << 1.0 * current_sample_ / sample_rate_
+                      << " s; prob: " << outputProb_ << " }";
+        }
+
+        states_.emplace_back(Vad::State::SPEECH);
+    } else if (outputProb_ < threshold_ - 0.15 && triggerd_) {
+        // 4. End
+        if (temp_end_ == 0) {
+            temp_end_ = current_sample_;
+        }
+
+        // check possible speech end
+        if (current_sample_ - temp_end_ < min_silence_samples_) {
+            // a. silence < min_slience_samples, continue speaking
+            LOG_DEBUG << "{ speech fake end(sil < min_silence_ms): "
+                      << 1.0 * current_sample_ / sample_rate_
+                      << " s; prob: " << outputProb_ << " }";
+            states_.emplace_back(Vad::State::SIL);
+        } else {
+            // b. silence >= min_slience_samples, end speaking
+            speech_end_ = current_sample_ + speech_pad_right_samples_;
+            temp_end_ = 0;
+            triggerd_ = false;
+            auto end_sec = 1.0 * speech_end_ / sample_rate_;
+            speakEnd_.emplace_back(end_sec);
+            LOG_DEBUG << "{ speech end: " << end_sec
+                      << " s; prob: " << outputProb_ << " }";
+            states_.emplace_back(Vad::State::END);
+        }
+    }
+
+    return states_.back();
+}
+
+const std::vector<std::map<std::string, float>> Vad::GetResult(
+    float removeThreshold,
+    float expandHeadThreshold,
+    float expandTailThreshold,
+    float mergeThreshold) const {
+    float audioLength = 1.0 * current_sample_ / sample_rate_;
+    if (speakStart_.empty() && speakEnd_.empty()) {
+        return {};
+    }
+    if (speakEnd_.size() != speakStart_.size()) {
+        // set the audio length as the last end
+        speakEnd_.emplace_back(audioLength);
+    }
+    // Remove too short segments
+    //  auto startIter = speakStart_.begin();
+    //  auto endIter = speakEnd_.begin();
+    //  while (startIter != speakStart_.end()) {
+    //      if (removeThreshold < audioLength &&
+    //          *endIter - *startIter < removeThreshold) {
+    //          startIter = speakStart_.erase(startIter);
+    //          endIter = speakEnd_.erase(endIter);
+    //      } else {
+    //          startIter++;
+    //          endIter++;
+    //      }
+    //  }
+    //  // Expand to avoid to tight cut.
+    //  startIter = speakStart_.begin();
+    //  endIter = speakEnd_.begin();
+    //  *startIter = std::fmax(0.f, *startIter - expandHeadThreshold);
+    //  *endIter = std::fmin(*endIter + expandTailThreshold, *(startIter + 1));
+    //  endIter = speakEnd_.end() - 1;
+    //  startIter = speakStart_.end() - 1;
+    //  *startIter = fmax(*startIter - expandHeadThreshold, *(endIter - 1));
+    //  *endIter = std::fmin(*endIter + expandTailThreshold, audioLength);
+    //  for (int i = 1; i < speakStart_.size() - 1; ++i) {
+    //      speakStart_[i] = std::fmax(speakStart_[i] - expandHeadThreshold,
+    //      speakEnd_[i - 1]);
+    //      speakEnd_[i] = std::fmin(speakEnd_[i] + expandTailThreshold,
+    //      speakStart_[i + 1]);
+    //  }
+    //  // Merge very closed segments
+    //  startIter = speakStart_.begin() + 1;
+    //  endIter = speakEnd_.begin();
+    //  while (startIter != speakStart_.end()) {
+    //      if (*startIter - *endIter < mergeThreshold) {
+    //          startIter = speakStart_.erase(startIter);
+    //          endIter = speakEnd_.erase(endIter);
+    //      } else {
+    //          startIter++;
+    //          endIter++;
+    //      }
+    //  }
+
+    std::vector<std::map<std::string, float>> result;
+    for (int i = 0; i < speakStart_.size(); ++i) {
+        result.emplace_back(std::map<std::string, float>(
+            {{"start", speakStart_[i]}, {"end", speakEnd_[i]}}));
+    }
+    return result;
+}
+
+std::ostream& operator<<(std::ostream& os, const Vad::State& s) {
+    switch (s) {
+        case Vad::State::SIL:
+            os << "[SIL]";
+            break;
+        case Vad::State::START:
+            os << "[STA]";
+            break;
+        case Vad::State::SPEECH:
+            os << "[SPE]";
+            break;
+        case Vad::State::END:
+            os << "[END]";
+            break;
+        default:
+            // illegal state
+            os << "[ILL]";
+            break;
+    }
+    return os;
+}
--- a/runtime/engine/vad/vad.h
+++ b/runtime/engine/vad/vad.h
@ -0,0 +1,124 @@
+// Copyright (c) 2023 Chen Qianhe Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#pragma once
+#include <iostream>
+#include <mutex>
+#include <vector>
+#include "./wav.h"
+#include "fastdeploy/fastdeploy_model.h"
+#include "fastdeploy/runtime.h"
+
+class Vad : public fastdeploy::FastDeployModel {
+  public:
+    enum class State { SIL = 0, START, SPEECH, END };
+    friend std::ostream& operator<<(std::ostream& os, const Vad::State& s);
+
+    Vad(const std::string& model_file,
+        const fastdeploy::RuntimeOption& custom_option =
+            fastdeploy::RuntimeOption());
+
+    void Init();
+
+    void Reset();
+
+    void SetConfig(int sr,
+                   int frame_ms,
+                   float threshold,
+                   int min_silence_duration_ms,
+                   int speech_pad_left_ms,
+                   int speech_pad_right_ms);
+
+    bool ForwardChunk(std::vector<float>& chunk);
+
+    const State& Postprocess();
+
+    const std::vector<std::map<std::string, float>> GetResult(
+        float removeThreshold = 0.0,
+        float expandHeadThreshold = 0.0,
+        float expandTailThreshold = 0,
+        float mergeThreshold = 0.0) const;
+
+    const std::vector<State> GetStates() const { return states_; }
+
+    int SampleRate() const { return sample_rate_; }
+
+    int FrameMs() const { return frame_ms_; }
+    int64_t WindowSizeSamples() const { return window_size_samples_; }
+
+    float Threshold() const { return threshold_; }
+
+    int MinSilenceDurationMs() const {
+        return min_silence_samples_ / sample_rate_;
+    }
+    int SpeechPadLeftMs() const {
+        return speech_pad_left_samples_ / sample_rate_;
+    }
+    int SpeechPadRightMs() const {
+        return speech_pad_right_samples_ / sample_rate_;
+    }
+
+    int MinSilenceSamples() const { return min_silence_samples_; }
+    int SpeechPadLeftSamples() const { return speech_pad_left_samples_; }
+    int SpeechPadRightSamples() const { return speech_pad_right_samples_; }
+
+    std::string ModelName() const override;
+
+  private:
+    bool Initialize();
+
+  private:
+    std::once_flag init_;
+    // input and output
+    std::vector<fastdeploy::FDTensor> inputTensors_;
+    std::vector<fastdeploy::FDTensor> outputTensors_;
+
+    // model states
+    bool triggerd_ = false;
+    unsigned int speech_start_ = 0;
+    unsigned int speech_end_ = 0;
+    unsigned int temp_end_ = 0;
+    unsigned int current_sample_ = 0;
+    unsigned int current_chunk_size_ = 0;
+    // MAX 4294967295 samples / 8sample per ms / 1000 / 60 = 8947 minutes
+    float outputProb_;
+
+    std::vector<float> speakStart_;
+    mutable std::vector<float> speakEnd_;
+
+    std::vector<State> states_;
+
+    /* ========================================================================
+     */
+    int sample_rate_ = 16000;
+    int frame_ms_ = 32;  // 32, 64, 96 for 16k
+    float threshold_ = 0.5f;
+
+    int64_t window_size_samples_;  // support 256 512 768 for 8k; 512 1024 1536
+                                   // for 16k.
+    int sr_per_ms_;                // support 8 or 16
+    int min_silence_samples_;      // sr_per_ms_ * frame_ms_
+    int speech_pad_left_samples_{0};   // usually 250ms
+    int speech_pad_right_samples_{0};  // usually 0
+
+    /* ========================================================================
+     */
+    std::vector<int64_t> sr_;
+    const size_t size_hc_ = 2 * 1 * 64;  // It's FIXED.
+    std::vector<float> h_;
+    std::vector<float> c_;
+
+    std::vector<int64_t> input_node_dims_;
+    const std::vector<int64_t> sr_node_dims_ = {1};
+    const std::vector<int64_t> hc_node_dims_ = {2, 1, 64};
+};
--- a/runtime/engine/vad/wav.h
+++ b/runtime/engine/vad/wav.h
@ -0,0 +1,197 @@
+// Copyright (c) 2016 Personal (Binbin Zhang)
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#pragma once
+#include <assert.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <string>
+
+namespace wav {
+
+struct WavHeader {
+    char riff[4];  // "riff"
+    unsigned int size;
+    char wav[4];  // "WAVE"
+    char fmt[4];  // "fmt "
+    unsigned int fmt_size;
+    uint16_t format;
+    uint16_t channels;
+    unsigned int sample_rate;
+    unsigned int bytes_per_second;
+    uint16_t block_size;
+    uint16_t bit;
+    char data[4];  // "data"
+    unsigned int data_size;
+};
+
+class WavReader {
+  public:
+    WavReader() : data_(nullptr) {}
+    explicit WavReader(const std::string& filename) { Open(filename); }
+
+    bool Open(const std::string& filename) {
+        FILE* fp = fopen(filename.c_str(), "rb");
+        if (NULL == fp) {
+            std::cout << "Error in read " << filename;
+            return false;
+        }
+
+        WavHeader header;
+        fread(&header, 1, sizeof(header), fp);
+        if (header.fmt_size < 16) {
+            fprintf(stderr,
+                    "WaveData: expect PCM format data "
+                    "to have fmt chunk of at least size 16.\n");
+            return false;
+        } else if (header.fmt_size > 16) {
+            int offset = 44 - 8 + header.fmt_size - 16;
+            fseek(fp, offset, SEEK_SET);
+            fread(header.data, 8, sizeof(char), fp);
+        }
+        // check "riff" "WAVE" "fmt " "data"
+
+        // Skip any sub-chunks between "fmt" and "data".  Usually there will
+        // be a single "fact" sub chunk, but on Windows there can also be a
+        // "list" sub chunk.
+        while (0 != strncmp(header.data, "data", 4)) {
+            // We will just ignore the data in these chunks.
+            fseek(fp, header.data_size, SEEK_CUR);
+            // read next sub chunk
+            fread(header.data, 8, sizeof(char), fp);
+        }
+
+        num_channel_ = header.channels;
+        sample_rate_ = header.sample_rate;
+        bits_per_sample_ = header.bit;
+        int num_data = header.data_size / (bits_per_sample_ / 8);
+        data_ = new float[num_data];  // Create 1-dim array
+        num_samples_ = num_data / num_channel_;
+
+        for (int i = 0; i < num_data; ++i) {
+            switch (bits_per_sample_) {
+                case 8: {
+                    char sample;
+                    fread(&sample, 1, sizeof(char), fp);
+                    data_[i] = static_cast<float>(sample);
+                    break;
+                }
+                case 16: {
+                    int16_t sample;
+                    fread(&sample, 1, sizeof(int16_t), fp);
+                    // std::cout << sample;
+                    data_[i] = static_cast<float>(sample);
+                    // std::cout << data_[i];
+                    break;
+                }
+                case 32: {
+                    int sample;
+                    fread(&sample, 1, sizeof(int), fp);
+                    data_[i] = static_cast<float>(sample);
+                    break;
+                }
+                default:
+                    fprintf(stderr, "unsupported quantization bits");
+                    exit(1);
+            }
+        }
+        fclose(fp);
+        return true;
+    }
+
+    int num_channel() const { return num_channel_; }
+    int sample_rate() const { return sample_rate_; }
+    int bits_per_sample() const { return bits_per_sample_; }
+    int num_samples() const { return num_samples_; }
+    const float* data() const { return data_; }
+
+  private:
+    int num_channel_;
+    int sample_rate_;
+    int bits_per_sample_;
+    int num_samples_;  // sample points per channel
+    float* data_;
+};
+
+class WavWriter {
+  public:
+    WavWriter(const float* data,
+              int num_samples,
+              int num_channel,
+              int sample_rate,
+              int bits_per_sample)
+        : data_(data),
+          num_samples_(num_samples),
+          num_channel_(num_channel),
+          sample_rate_(sample_rate),
+          bits_per_sample_(bits_per_sample) {}
+
+    void Write(const std::string& filename) {
+        FILE* fp = fopen(filename.c_str(), "w");
+        // init char 'riff' 'WAVE' 'fmt ' 'data'
+        WavHeader header;
+        char wav_header[44] = {
+            0x52, 0x49, 0x46, 0x46, 0x00, 0x00, 0x00, 0x00, 0x57, 0x41, 0x56,
+            0x45, 0x66, 0x6d, 0x74, 0x20, 0x10, 0x00, 0x00, 0x00, 0x01, 0x00,
+            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+            0x00, 0x00, 0x00, 0x64, 0x61, 0x74, 0x61, 0x00, 0x00, 0x00, 0x00};
+        memcpy(&header, wav_header, sizeof(header));
+        header.channels = num_channel_;
+        header.bit = bits_per_sample_;
+        header.sample_rate = sample_rate_;
+        header.data_size = num_samples_ * num_channel_ * (bits_per_sample_ / 8);
+        header.size = sizeof(header) - 8 + header.data_size;
+        header.bytes_per_second =
+            sample_rate_ * num_channel_ * (bits_per_sample_ / 8);
+        header.block_size = num_channel_ * (bits_per_sample_ / 8);
+
+        fwrite(&header, 1, sizeof(header), fp);
+
+        for (int i = 0; i < num_samples_; ++i) {
+            for (int j = 0; j < num_channel_; ++j) {
+                switch (bits_per_sample_) {
+                    case 8: {
+                        char sample =
+                            static_cast<char>(data_[i * num_channel_ + j]);
+                        fwrite(&sample, 1, sizeof(sample), fp);
+                        break;
+                    }
+                    case 16: {
+                        int16_t sample =
+                            static_cast<int16_t>(data_[i * num_channel_ + j]);
+                        fwrite(&sample, 1, sizeof(sample), fp);
+                        break;
+                    }
+                    case 32: {
+                        int sample =
+                            static_cast<int>(data_[i * num_channel_ + j]);
+                        fwrite(&sample, 1, sizeof(sample), fp);
+                        break;
+                    }
+                }
+            }
+        }
+        fclose(fp);
+    }
+
+  private:
+    const float* data_;
+    int num_samples_;  // total float points in data_
+    int num_channel_;
+    int sample_rate_;
+    int bits_per_sample_;
+};
+
+}  // namespace wav