[runtime] optimization compile and add vad interface (#3026)

* vad recipe ok * refactor vad, add vad conf, vad inerface, vad recipe * format * install vad lib/bin/inc * using cpack * add vad doc, fix vad state name * add comment * refactor fastdeploy download * add vad jni; format code * add timer; compute vad rtf; vad add beam param * andorid find library * fix log; add vad rtf * fix glog * fix BUILD_TYPE bug * update doc * rm jni
2 years ago · bf914a9c8b
parent 2beb7ffce0
commit bf914a9c8b
64 changed files with 1128 additions and 465 deletions
--- a/runtime/CMakeLists.txt
+++ b/runtime/CMakeLists.txt
@ -1,4 +1,5 @@
-cmake_minimum_required(VERSION 3.14 FATAL_ERROR)
+# >=3.17 support -DCMAKE_FIND_DEBUG_MODE=ON
+cmake_minimum_required(VERSION 3.17 FATAL_ERROR)

 set(CMAKE_PROJECT_INCLUDE_BEFORE "${CMAKE_CURRENT_SOURCE_DIR}/cmake/EnableCMP0048.cmake")

@ -6,20 +7,12 @@ set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_CURRENT_SOURCE_DIR}/cmake")

 include(system)

-# Ninja Generator will set CMAKE_BUILD_TYPE to Debug
-if(NOT CMAKE_BUILD_TYPE)
-  set(CMAKE_BUILD_TYPE
-      "Release"
-      CACHE
-        STRING
-        "Choose the type of build, options are: Debug Release RelWithDebInfo MinSizeRel"
-        FORCE)
-endif()
-
 project(paddlespeech VERSION 0.1)

-include(FetchContent)
-include(ExternalProject)
+set(PPS_VERSION_MAJOR 1)
+set(PPS_VERSION_MINOR 0)
+set(PPS_VERSION_PATCH 0)
+set(PPS_VERSION "${PPS_VERSION_MAJOR}.${PPS_VERSION_MINOR}.${PPS_VERSION_PATCH}")

 # fc_patch dir
 set(FETCHCONTENT_QUIET off)
@ -27,21 +20,36 @@ get_filename_component(fc_patch "fc_patch" REALPATH BASE_DIR "${CMAKE_SOURCE_DIR
 set(FETCHCONTENT_BASE_DIR ${fc_patch})

 set(CMAKE_VERBOSE_MAKEFILE ON)
+set(CMAKE_FIND_DEBUG_MODE OFF)
 set(PPS_CXX_STANDARD 14)

 # set std-14
 set(CMAKE_CXX_STANDARD ${PPS_CXX_STANDARD})
-add_compile_options(-fPIC)

-# compiler option
-# Keep the same with openfst, -fPIC or -fpic
-set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} --std=c++14 -pthread -fPIC -O0 -Wall -g -ldl")
-SET(CMAKE_CXX_FLAGS_DEBUG "$ENV{CXXFLAGS} --std=c++14 -pthread -fPIC -O0 -Wall -g -ggdb")
-SET(CMAKE_CXX_FLAGS_RELEASE "$ENV{CXXFLAGS} --std=c++14 -pthread -fPIC -O3 -Wall")
+# Ninja Generator will set CMAKE_BUILD_TYPE to Debug
+if(NOT CMAKE_BUILD_TYPE)
+    set(CMAKE_BUILD_TYPE "Release" CACHE STRING "Choose the type of build, options are: Debug Release RelWithDebInfo MinSizeRel" FORCE)
+endif()

+# find_* e.g. find_library work when Cross-Compiling
+if(ANDROID)
+    set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM BOTH)
+    set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY BOTH)
+    set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE BOTH)
+    set(CMAKE_FIND_ROOT_PATH_MODE_PACKAGE BOTH)
+endif()
+
+# install dir into `build/install`
+set(CMAKE_INSTALL_PREFIX ${CMAKE_CURRENT_BINARY_DIR}/install)
+
+include(FetchContent)
+include(ExternalProject)
 ###############################################################################
 # Option Configurations
 ###############################################################################
+# https://github.com/google/brotli/pull/655
+option(BUILD_SHARED_LIBS "Build shared libraries" ON)
+
 option(WITH_ASR "build asr" ON)
 option(WITH_CLS "build cls" ON)
 option(WITH_VAD "build vad" ON)
@ -77,6 +85,7 @@ endif()
 ###############################################################################
 # Find Package
 ###############################################################################
+# https://github.com/Kitware/CMake/blob/v3.1.0/Modules/FindThreads.cmake#L207
 find_package(Threads REQUIRED)

 if(WITH_ASR)
@ -157,6 +166,22 @@ include(summary)
 ###############################################################################
 # Add local library
 ###############################################################################
-set(ENGINE_ROOT ${CMAKE_CURRENT_SOURCE_DIR}/engine)
+set(ENGINE_ROOT ${CMAKE_SOURCE_DIR}/engine)

-add_subdirectory(engine)
+add_subdirectory(engine)
+
+
+###############################################################################
+# CPack library
+###############################################################################
+# build a CPack driven installer package
+include (InstallRequiredSystemLibraries)
+set(CPACK_PACKAGE_NAME "paddlespeech_library")
+set(CPACK_PACKAGE_VENDOR "paddlespeech")
+set(CPACK_PACKAGE_VERSION_MAJOR 1)
+set(CPACK_PACKAGE_VERSION_MINOR 0)
+set(CPACK_PACKAGE_VERSION_PATCH 0)
+set(CPACK_PACKAGE_DESCRIPTION "paddlespeech library")
+set(CPACK_PACKAGE_CONTACT "paddlespeech@baidu.com")
+set(CPACK_SOURCE_GENERATOR "TGZ")
+include (CPack)
--- a/runtime/build.sh
+++ b/runtime/build.sh
@ -1,8 +1,20 @@
 #!/usr/bin/env bash
 set -xe

+BUILD_ROOT=build/Linux
+BUILD_DIR=${BUILD_ROOT}/x86_64
+
+mkdir -p ${BUILD_DIR}
+
 # the build script had verified in the paddlepaddle docker image.
 # please follow the instruction below to install PaddlePaddle image.
 # https://www.paddlepaddle.org.cn/documentation/docs/zh/install/docker/linux-docker.html 
-cmake -B build -DWITH_ASR=ON -DWITH_CLS=OFF -DWITH_VAD=OFF
-cmake --build build -j
+#cmake -B build -DBUILD_SHARED_LIBS=OFF -DWITH_ASR=OFF -DWITH_CLS=OFF -DWITH_VAD=ON -DFASTDEPLOY_INSTALL_DIR=/workspace/zhanghui/paddle/FastDeploy/build/Android/arm64-v8a-api-21/install
+cmake -B ${BUILD_DIR} \
+       	-DCMAKE_BUILD_TYPE=Release \
+       	-DBUILD_SHARED_LIBS=OFF \
+	-DWITH_ASR=OFF \
+	-DWITH_CLS=OFF \
+	-DWITH_VAD=ON \
+	-DFASTDEPLOY_INSTALL_DIR=/workspace/zhanghui/paddle/FastDeploy/build/Linux/x86_64/install
+cmake --build ${BUILD_DIR} -j
--- a/runtime/build_android.sh
+++ b/runtime/build_android.sh
@ -14,8 +14,8 @@ TOOLCHAIN_FILE=${ANDROID_NDK}/build/cmake/android.toolchain.cmake
 # Create build directory
 BUILD_ROOT=build/Android
 BUILD_DIR=${BUILD_ROOT}/${ANDROID_ABI}-api-21
-#FASDEPLOY_INSTALL_DIR="${BUILD_DIR}/install"
-#mkdir build && mkdir ${BUILD_ROOT} && mkdir ${BUILD_DIR}
+FASTDEPLOY_INSTALL_DIR="/workspace/zhanghui/paddle/FastDeploy/build/Android/arm64-v8a-api-21/install"
+
 mkdir -p ${BUILD_DIR}
 cd ${BUILD_DIR}

@ -27,10 +27,13 @@ cmake -DCMAKE_TOOLCHAIN_FILE=${TOOLCHAIN_FILE} \
      -DANDROID_PLATFORM=${ANDROID_PLATFORM} \
      -DANDROID_STL=${ANDROID_STL} \
      -DANDROID_TOOLCHAIN=${ANDROID_TOOLCHAIN} \
+      -DBUILD_SHARED_LIBS=OFF \
      -DWITH_ASR=OFF \
      -DWITH_CLS=OFF \
+      -DWITH_VAD=ON \
+      -DFASTDEPLOY_INSTALL_DIR=${FASTDEPLOY_INSTALL_DIR} \
+      -DCMAKE_FIND_DEBUG_MODE=OFF \
      -Wno-dev ../../..
-      #-DFASTDEPLOY_INSTALL_DIR=${FASTDEPLOY_INSTALL_DIR} \

 # Build FastDeploy Android C++ SDK
 make
--- a/runtime/cmake/fastdeploy.cmake
+++ b/runtime/cmake/fastdeploy.cmake
@ -1,42 +1,119 @@
-set(ARCH "mserver_x86_64" CACHE STRING "Target Architecture:
-android_arm, android_armv7, android_armv8, android_x86, android_x86_64,
-mserver_x86_64, ubuntu_x86_64, ios_armv7, ios_armv7s, ios_armv8, ios_x86_64, ios_x86,
-windows_x86")
-
-set(FASTDEPLOY_DIR ${CMAKE_SOURCE_DIR}/fc_patch/fastdeploy)
-if(NOT EXISTS ${FASTDEPLOY_DIR}/fastdeploy-linux-x64-1.0.4.tgz)
-    exec_program("mkdir -p ${FASTDEPLOY_DIR} &&
-    wget -c https://bj.bcebos.com/fastdeploy/release/cpp/fastdeploy-linux-x64-1.0.4.tgz -P ${FASTDEPLOY_DIR} &&
-    tar xzvf ${FASTDEPLOY_DIR}/fastdeploy-linux-x64-1.0.4.tgz -C ${FASTDEPLOY_DIR} &&
-    mv ${FASTDEPLOY_DIR}/fastdeploy-linux-x64-1.0.4 ${FASTDEPLOY_DIR}/linux-x64")
-endif()
+include(FetchContent)

-if(NOT EXISTS ${FASTDEPLOY_DIR}/fastdeploy-android-1.0.4-shared.tgz)
-    exec_program("mkdir -p ${FASTDEPLOY_DIR} &&
-    wget -c https://bj.bcebos.com/fastdeploy/release/android/fastdeploy-android-1.0.4-shared.tgz -P ${FASTDEPLOY_DIR} &&
-    tar xzvf ${FASTDEPLOY_DIR}/fastdeploy-android-1.0.4-shared.tgz -C ${FASTDEPLOY_DIR} &&
-    mv ${FASTDEPLOY_DIR}/fastdeploy-android-1.0.4-shared ${FASTDEPLOY_DIR}/android-armv7v8")
-endif()
+set(EXTERNAL_PROJECT_LOG_ARGS
+    LOG_DOWNLOAD 1 # Wrap download in script to log output
+    LOG_UPDATE 1 # Wrap update in script to log output
+    LOG_PATCH 1
+    LOG_CONFIGURE 1# Wrap configure in script to log output
+    LOG_BUILD 1 # Wrap build in script to log output
+    LOG_INSTALL 1
+    LOG_TEST 1 # Wrap test in script to log output
+    LOG_MERGED_STDOUTERR 1
+    LOG_OUTPUT_ON_FAILURE 1
+)
+
+if(NOT FASTDEPLOY_INSTALL_DIR)
+    if(ANDROID)
+        FetchContent_Declare(
+            fastdeploy
+            URL      https://bj.bcebos.com/fastdeploy/release/android/fastdeploy-android-1.0.4-shared.tgz
+            URL_HASH MD5=2a15301158e9eb157a4f11283689e7ba
+            ${EXTERNAL_PROJECT_LOG_ARGS}
+        )
+        add_definitions("-DUSE_PADDLE_LITE_BAKEND")
+        set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -g -mfloat-abi=softfp -mfpu=vfpv3 -mfpu=neon -fPIC -pie -fPIE")
+        set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -g0 -O3 -mfloat-abi=softfp -mfpu=vfpv3 -mfpu=neon -fPIC -pie -fPIE")
+    else() # Linux
+        FetchContent_Declare(
+            fastdeploy
+            URL      https://bj.bcebos.com/fastdeploy/release/cpp/fastdeploy-linux-x64-1.0.4.tgz 
+            URL_HASH MD5=125df3bfce603521960cc5c8b47faab0
+            ${EXTERNAL_PROJECT_LOG_ARGS}
+        )

+        add_definitions("-DUSE_PADDLE_INFERENCE_BACKEND")
+        # add_definitions("-DUSE_ORT_BACKEND")
+        set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -msse -msse2")
+        set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -msse -msse2 -mavx -O3")
+    endif()

-if(ANDROID)
-    set(FASTDEPLOY_INSTALL_DIR ${FASTDEPLOY_DIR}/android-armv7v8)
-    add_definitions("-DUSE_PADDLE_LITE_BAKEND")
-    set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -g -mfloat-abi=softfp -mfpu=vfpv3 -mfpu=neon -fPIC -pie -fPIE")
-    set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -g0 -O3 -mfloat-abi=softfp -mfpu=vfpv3 -mfpu=neon -fPIC -pie -fPIE")
-elseif(UNIX)
-    set(FASTDEPLOY_INSTALL_DIR ${FASTDEPLOY_DIR}/linux-x64)
-    add_definitions("-DUSE_PADDLE_INFERENCE_BACKEND")
-    # add_definitions("-DUSE_ORT_BACKEND")
-    set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -msse -msse2")
-    set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -msse -msse2 -mavx -O3")
+    FetchContent_MakeAvailable(fastdeploy)
+
+    set(FASTDEPLOY_INSTALL_DIR ${fc_patch}/fastdeploy-src)
 endif()

-message(STATUS "FASTDEPLOY_INSTALL_DIR=${FASTDEPLOY_INSTALL_DIR} ${UNIX}")
 include(${FASTDEPLOY_INSTALL_DIR}/FastDeploy.cmake)

 # fix compiler flags conflict, since fastdeploy using c++11 for project
+# this line must after `include(${FASTDEPLOY_INSTALL_DIR}/FastDeploy.cmake)`
 set(CMAKE_CXX_STANDARD ${PPS_CXX_STANDARD})

 include_directories(${FASTDEPLOY_INCS})
-message(STATUS "FASTDEPLOY_INCS=${FASTDEPLOY_INCS}")
+
+# install fastdeploy and dependents lib
+# install_fastdeploy_libraries(${CMAKE_INSTALL_PREFIX}/${CMAKE_INSTALL_LIBDIR})
+# No dynamic libs need to install while using
+# FastDeploy static lib.
+if(ANDROID AND WITH_ANDROID_STATIC_LIB)
+    return()
+endif()
+
+set(DYN_LIB_SUFFIX "*.so*")
+if(WIN32)
+    set(DYN_LIB_SUFFIX "*.dll")
+elseif(APPLE)
+    set(DYN_LIB_SUFFIX "*.dylib*")
+endif()
+
+if(FastDeploy_DIR)
+    set(DYN_SEARCH_DIR ${FastDeploy_DIR})
+elseif(FASTDEPLOY_INSTALL_DIR)
+    set(DYN_SEARCH_DIR ${FASTDEPLOY_INSTALL_DIR})
+else()
+    message(FATAL_ERROR "Please set FastDeploy_DIR/FASTDEPLOY_INSTALL_DIR before call install_fastdeploy_libraries.")
+endif()
+
+file(GLOB_RECURSE ALL_NEED_DYN_LIBS ${DYN_SEARCH_DIR}/lib/${DYN_LIB_SUFFIX})
+file(GLOB_RECURSE ALL_DEPS_DYN_LIBS ${DYN_SEARCH_DIR}/third_libs/${DYN_LIB_SUFFIX})
+
+if(ENABLE_VISION)
+    # OpenCV
+    if(ANDROID)
+        file(GLOB_RECURSE ALL_OPENCV_DYN_LIBS ${OpenCV_NATIVE_DIR}/libs/${DYN_LIB_SUFFIX})
+    else()
+        file(GLOB_RECURSE ALL_OPENCV_DYN_LIBS ${OpenCV_DIR}/../../${DYN_LIB_SUFFIX})
+    endif()
+   
+    list(REMOVE_ITEM ALL_DEPS_DYN_LIBS ${ALL_OPENCV_DYN_LIBS})
+
+    if(WIN32)
+        file(GLOB OPENCV_DYN_LIBS ${OpenCV_DIR}/x64/vc15/bin/${DYN_LIB_SUFFIX})
+        install(FILES ${OPENCV_DYN_LIBS} DESTINATION lib)
+    elseif(ANDROID AND (NOT WITH_ANDROID_OPENCV_STATIC))
+        file(GLOB OPENCV_DYN_LIBS ${OpenCV_NATIVE_DIR}/libs/${ANDROID_ABI}/${DYN_LIB_SUFFIX})
+        install(FILES ${OPENCV_DYN_LIBS} DESTINATION lib)
+    else() # linux/mac
+        file(GLOB OPENCV_DYN_LIBS ${OpenCV_DIR}/lib/${DYN_LIB_SUFFIX})
+        install(FILES ${OPENCV_DYN_LIBS} DESTINATION lib)
+    endif()
+
+    # FlyCV
+    if(ENABLE_FLYCV)
+        file(GLOB_RECURSE ALL_FLYCV_DYN_LIBS ${FLYCV_LIB_DIR}/${DYN_LIB_SUFFIX})
+        list(REMOVE_ITEM ALL_DEPS_DYN_LIBS ${ALL_FLYCV_DYN_LIBS})
+        if(ANDROID AND (NOT WITH_ANDROID_FLYCV_STATIC))
+        install(FILES ${ALL_FLYCV_DYN_LIBS} DESTINATION lib)
+        endif()
+    endif()
+endif()
+
+if(ENABLE_OPENVINO_BACKEND)
+    # need plugins.xml for openvino backend
+    set(OPENVINO_RUNTIME_BIN_DIR ${OPENVINO_DIR}/bin)
+    file(GLOB OPENVINO_PLUGIN_XML ${OPENVINO_RUNTIME_BIN_DIR}/*.xml)
+    install(FILES ${OPENVINO_PLUGIN_XML} DESTINATION lib)
+endif()
+
+# Install other libraries
+install(FILES ${ALL_NEED_DYN_LIBS} DESTINATION lib)
+install(FILES ${ALL_DEPS_DYN_LIBS} DESTINATION lib)
--- a/runtime/cmake/gflags.cmake
+++ b/runtime/cmake/gflags.cmake
@ -9,3 +9,5 @@ FetchContent_MakeAvailable(gflags)

 # openfst need
 include_directories(${gflags_BINARY_DIR}/include)
+
+install(FILES ${gflags_BINARY_DIR}/libgflags_nothreads.a DESTINATION lib)
--- a/runtime/cmake/glog.cmake
+++ b/runtime/cmake/glog.cmake
@ -7,6 +7,19 @@ else() # UNIX
    glog
    URL      https://paddleaudio.bj.bcebos.com/build/glog-0.4.0.zip
    URL_HASH SHA256=9e1b54eb2782f53cd8af107ecf08d2ab64b8d0dc2b7f5594472f3bd63ca85cdc
+    CMAKE_ARGS      -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}
+                    -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER}
+                    -DCMAKE_CXX_FLAGS=${GLOG_CMAKE_CXX_FLAGS}
+                    -DCMAKE_CXX_FLAGS_RELEASE=${CMAKE_CXX_FLAGS_RELEASE}
+                    -DCMAKE_CXX_FLAGS_DEBUG=${CMAKE_CXX_FLAGS_DEBUG}
+                    -DCMAKE_C_FLAGS=${CMAKE_C_FLAGS}
+                    -DCMAKE_C_FLAGS_DEBUG=${CMAKE_C_FLAGS_DEBUG}
+                    -DCMAKE_C_FLAGS_RELEASE=${CMAKE_C_FLAGS_RELEASE}
+                    -DCMAKE_POSITION_INDEPENDENT_CODE=ON
+                    -DWITH_GFLAGS=OFF
+                    -DBUILD_TESTING=OFF
+                    -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE}
+                    ${EXTERNAL_OPTIONAL_ARGS}
  )
  FetchContent_MakeAvailable(glog)
  include_directories(${glog_BINARY_DIR} ${glog_SOURCE_DIR}/src)
@ -15,7 +28,8 @@ endif()

 if(ANDROID)
  add_library(extern_glog INTERFACE)
+  add_dependencies(extern_glog gflags)
 else() # UNIX
-  add_dependencies(glog gflags)
  add_library(extern_glog ALIAS glog)
+  add_dependencies(extern_glog gflags)
 endif()
--- a/runtime/cmake/openfst.cmake
+++ b/runtime/cmake/openfst.cmake
@ -10,9 +10,19 @@ include(FetchContent)
 #Application of Automata, (CIAA 2007), volume 4783 of Lecture Notes in 
 #Computer Science, pages 11-23. Springer, 2007. http://www.openfst.org.

+set(EXTERNAL_PROJECT_LOG_ARGS
+    LOG_DOWNLOAD 1 # Wrap download in script to log output
+    LOG_UPDATE 1 # Wrap update in script to log output
+    LOG_CONFIGURE 1# Wrap configure in script to log output
+    LOG_BUILD 1 # Wrap build in script to log output
+    LOG_TEST 1 # Wrap test in script to log output
+    LOG_INSTALL 1 # Wrap install in script to log output
+)
+
 ExternalProject_Add(openfst
  URL               https://paddleaudio.bj.bcebos.com/build/openfst_1.7.2.zip
  URL_HASH          SHA256=ffc56931025579a8af3515741c0f3b0fc3a854c023421472c07ca0c6389c75e6
+  ${EXTERNAL_PROJECT_LOG_ARGS}
  PREFIX            ${openfst_PREFIX_DIR} 
  SOURCE_DIR        ${openfst_SOURCE_DIR}
  BINARY_DIR        ${openfst_BINARY_DIR}
--- a/runtime/cmake/summary.cmake
+++ b/runtime/cmake/summary.cmake
@ -15,6 +15,7 @@
 function(pps_summary)
  message(STATUS "")
  message(STATUS "*************PaddleSpeech Building Summary**********")
+  message(STATUS "  PPS_VERSION               : ${PPS_VERSION}")
  message(STATUS "  CMake version             : ${CMAKE_VERSION}")
  message(STATUS "  CMake command             : ${CMAKE_COMMAND}")
  message(STATUS "  UNIX                      : ${UNIX}")
@ -24,10 +25,13 @@ function(pps_summary)
  message(STATUS "  C++ compiler version      : ${CMAKE_CXX_COMPILER_VERSION}")
  message(STATUS "  CXX flags                 : ${CMAKE_CXX_FLAGS}")
  message(STATUS "  Build type                : ${CMAKE_BUILD_TYPE}")
+  message(STATUS "  BUILD_SHARED_LIBS         : ${BUILD_SHARED_LIBS}")
  get_directory_property(tmp DIRECTORY ${PROJECT_SOURCE_DIR} COMPILE_DEFINITIONS)
  message(STATUS "  Compile definitions       : ${tmp}")
  message(STATUS "  CMAKE_PREFIX_PATH         : ${CMAKE_PREFIX_PATH}")
+  message(STATUS "  CMAKE_CURRENT_BINARY_DIR  : ${CMAKE_CURRENT_BINARY_DIR}")
  message(STATUS "  CMAKE_INSTALL_PREFIX      : ${CMAKE_INSTALL_PREFIX}")
+  message(STATUS "  CMAKE_INSTALL_LIBDIR      : ${CMAKE_INSTALL_LIBDIR}")
  message(STATUS "  CMAKE_MODULE_PATH         : ${CMAKE_MODULE_PATH}")
  message(STATUS "  CMAKE_SYSTEM_NAME         : ${CMAKE_SYSTEM_NAME}")
  message(STATUS "")
@ -39,6 +43,8 @@ function(pps_summary)
  message(STATUS "  WITH_TESTING              : ${WITH_TESTING}")
  message(STATUS "  WITH_PROFILING            : ${WITH_PROFILING}")
  message(STATUS "  FASTDEPLOY_INSTALL_DIR    : ${FASTDEPLOY_INSTALL_DIR}")
+  message(STATUS "  FASTDEPLOY_INCS           : ${FASTDEPLOY_INCS}")
+  message(STATUS "  FASTDEPLOY_LIBS           : ${FASTDEPLOY_LIBS}")
  if(WITH_GPU)
    message(STATUS "  CUDA_DIRECTORY            : ${CUDA_DIRECTORY}")
  endif()
--- a/runtime/engine/asr/decoder/ctc_prefix_beam_search_decoder_main.cc
+++ b/runtime/engine/asr/decoder/ctc_prefix_beam_search_decoder_main.cc
@ -12,8 +12,8 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.

-#include "decoder/ctc_prefix_beam_search_decoder.h"
 #include "base/common.h"
+#include "decoder/ctc_prefix_beam_search_decoder.h"
 #include "frontend/data_cache.h"
 #include "fst/symbol-table.h"
 #include "kaldi/util/table-types.h"
@ -117,9 +117,9 @@ int main(int argc, char* argv[]) {
                    ori_feature_len - chunk_idx * chunk_stride, chunk_size);
            }
            if (this_chunk_size < receptive_field_length) {
-                LOG(WARNING) << "utt: " << utt << " skip last "
-                             << this_chunk_size << " frames, expect is "
-                             << receptive_field_length;
+                LOG(WARNING)
+                    << "utt: " << utt << " skip last " << this_chunk_size
+                    << " frames, expect is " << receptive_field_length;
                break;
            }

--- a/runtime/engine/asr/decoder/ctc_tlg_decoder_main.cc
+++ b/runtime/engine/asr/decoder/ctc_tlg_decoder_main.cc
@ -14,8 +14,8 @@

 // todo refactor, repalce with gtest

-#include "decoder/ctc_tlg_decoder.h"
 #include "base/common.h"
+#include "decoder/ctc_tlg_decoder.h"
 #include "decoder/param.h"
 #include "frontend/data_cache.h"
 #include "kaldi/util/table-types.h"
--- a/runtime/engine/asr/nnet/nnet_producer.cc
+++ b/runtime/engine/asr/nnet/nnet_producer.cc
@ -13,12 +13,13 @@
 // limitations under the License.

 #include "nnet/nnet_producer.h"
+
 #include "matrix/kaldi-matrix.h"

 namespace ppspeech {

-using std::vector;
 using kaldi::BaseFloat;
+using std::vector;

 NnetProducer::NnetProducer(std::shared_ptr<NnetBase> nnet,
                           std::shared_ptr<FrontendInterface> frontend)
--- a/runtime/engine/asr/nnet/u2_nnet_main.cc
+++ b/runtime/engine/asr/nnet/u2_nnet_main.cc
@ -13,13 +13,13 @@
 // limitations under the License.


-#include "nnet/u2_nnet.h"
 #include "base/common.h"
 #include "decoder/param.h"
 #include "frontend/assembler.h"
 #include "frontend/data_cache.h"
 #include "kaldi/util/table-types.h"
 #include "nnet/decodable.h"
+#include "nnet/u2_nnet.h"


 DEFINE_string(feature_rspecifier, "", "test feature rspecifier");
@ -93,9 +93,9 @@ int main(int argc, char* argv[]) {
                    ori_feature_len - chunk_idx * chunk_stride, chunk_size);
            }
            if (this_chunk_size < receptive_field_length) {
-                LOG(WARNING) << "utt: " << utt << " skip last "
-                             << this_chunk_size << " frames, expect is "
-                             << receptive_field_length;
+                LOG(WARNING)
+                    << "utt: " << utt << " skip last " << this_chunk_size
+                    << " frames, expect is " << receptive_field_length;
                break;
            }

--- a/runtime/engine/asr/nnet/u2_nnet_thread_main.cc
+++ b/runtime/engine/asr/nnet/u2_nnet_thread_main.cc
@ -13,7 +13,6 @@
 // limitations under the License.


-#include "nnet/u2_nnet.h"
 #include "base/common.h"
 #include "decoder/param.h"
 #include "frontend/feature_pipeline.h"
@ -21,6 +20,7 @@
 #include "kaldi/util/table-types.h"
 #include "nnet/decodable.h"
 #include "nnet/nnet_producer.h"
+#include "nnet/u2_nnet.h"

 DEFINE_string(wav_rspecifier, "", "test wav rspecifier");
 DEFINE_string(nnet_prob_wspecifier, "", "nnet porb wspecifier");
--- a/runtime/engine/asr/recognizer/u2_recognizer_batch_main.cc
+++ b/runtime/engine/asr/recognizer/u2_recognizer_batch_main.cc
@ -12,7 +12,6 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.

-#include "recognizer/u2_recognizer.h"
 #include "common/base/thread_pool.h"
 #include "common/utils/file_utils.h"
 #include "common/utils/strings.h"
@ -20,6 +19,7 @@
 #include "frontend/wave-reader.h"
 #include "kaldi/util/table-types.h"
 #include "nnet/u2_nnet.h"
+#include "recognizer/u2_recognizer.h"

 DEFINE_string(wav_rspecifier, "", "test feature rspecifier");
 DEFINE_string(result_wspecifier, "", "test result wspecifier");
--- a/runtime/engine/asr/recognizer/u2_recognizer_main.cc
+++ b/runtime/engine/asr/recognizer/u2_recognizer_main.cc
@ -12,10 +12,10 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.

-#include "recognizer/u2_recognizer.h"
 #include "decoder/param.h"
 #include "frontend/wave-reader.h"
 #include "kaldi/util/table-types.h"
+#include "recognizer/u2_recognizer.h"

 DEFINE_string(wav_rspecifier, "", "test feature rspecifier");
 DEFINE_string(result_wspecifier, "", "test result wspecifier");
--- a/runtime/engine/asr/recognizer/u2_recognizer_thread_main.cc
+++ b/runtime/engine/asr/recognizer/u2_recognizer_thread_main.cc
@ -12,10 +12,10 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.

-#include "recognizer/u2_recognizer.h"
 #include "decoder/param.h"
 #include "frontend/wave-reader.h"
 #include "kaldi/util/table-types.h"
+#include "recognizer/u2_recognizer.h"

 DEFINE_string(wav_rspecifier, "", "test feature rspecifier");
 DEFINE_string(result_wspecifier, "", "test result wspecifier");
--- a/runtime/engine/asr/server/websocket/websocket_client_main.cc
+++ b/runtime/engine/asr/server/websocket/websocket_client_main.cc
@ -12,10 +12,10 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.

-#include "websocket/websocket_client.h"
 #include "kaldi/feat/wave-reader.h"
 #include "kaldi/util/kaldi-io.h"
 #include "kaldi/util/table-types.h"
+#include "websocket/websocket_client.h"

 DEFINE_string(host, "127.0.0.1", "host of websocket server");
 DEFINE_int32(port, 8082, "port of websocket server");
--- a/runtime/engine/asr/server/websocket/websocket_server_main.cc
+++ b/runtime/engine/asr/server/websocket/websocket_server_main.cc
@ -12,8 +12,8 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.

-#include "websocket/websocket_server.h"
 #include "decoder/param.h"
+#include "websocket/websocket_server.h"

 DEFINE_int32(port, 8082, "websocket listening port");

--- a/runtime/engine/cls/nnet/CMakeLists.txt
+++ b/runtime/engine/cls/nnet/CMakeLists.txt
@ -3,7 +3,7 @@ set(srcs
    panns_interface.cc
 )

-add_library(cls SHARED ${srcs})
+add_library(cls ${srcs})
 target_link_libraries(cls INTERFACE -static-libstdc++;-Wl,-Bsymbolic ${FASTDEPLOY_LIBS} kaldi-matrix kaldi-base frontend utils )

 set(bin_name panns_nnet_main)
--- a/runtime/engine/cls/nnet/panns_interface.cc
+++ b/runtime/engine/cls/nnet/panns_interface.cc
@ -13,6 +13,7 @@
 // limitations under the License.

 #include "cls/nnet/panns_interface.h"
+
 #include "cls/nnet/panns_nnet.h"
 #include "common/base/config.h"

--- a/runtime/engine/cls/nnet/panns_nnet_main.cc
+++ b/runtime/engine/cls/nnet/panns_nnet_main.cc
@ -14,6 +14,7 @@

 #include <fstream>
 #include <string>
+
 #include "base/flags.h"
 #include "cls/nnet/panns_interface.h"

--- a/runtime/engine/common/CMakeLists.txt
+++ b/runtime/engine/common/CMakeLists.txt
@ -12,4 +12,8 @@ ${CMAKE_CURRENT_SOURCE_DIR}/frontend
 add_subdirectory(frontend)

 add_library(common INTERFACE)
-add_definitions(common base utils kaldi-matrix  frontend)
+target_link_libraries(common  INTERFACE base utils kaldi-matrix frontend)
+install(TARGETS base DESTINATION lib)
+install(TARGETS utils DESTINATION lib)
+install(TARGETS kaldi-matrix DESTINATION lib)
+install(TARGETS frontend DESTINATION lib)
--- a/runtime/engine/common/base/CMakeLists.txt
+++ b/runtime/engine/common/base/CMakeLists.txt
@ -36,6 +36,7 @@ if(ANDROID)
    glog_utils.cc
  )
  add_library(base ${csrc})
+  target_link_libraries(base gflags)
 else() # UNIX
  set(csrc)
  add_library(base INTERFACE)
--- a/runtime/engine/common/base/basic_types.h
+++ b/runtime/engine/common/base/basic_types.h
@ -28,7 +28,7 @@ typedef int int32;    // NOLINT
 #if defined(__LP64__) && !defined(OS_MACOSX) && !defined(OS_OPENBSD)
 typedef long int64;  // NOLINT
 #else
-typedef long long int64;  // NOLINT
+typedef long long int64;            // NOLINT
 #endif

 typedef unsigned char uint8;    // NOLINT
--- a/runtime/engine/common/base/common.h
+++ b/runtime/engine/common/base/common.h
@ -50,4 +50,5 @@
 #include "base/log.h"
 #include "base/macros.h"
 #include "utils/file_utils.h"
-#include "utils/math.h"
+#include "utils/math.h"
+#include "utils/timer.h"
--- a/runtime/engine/common/base/config.h
+++ b/runtime/engine/common/base/config.h
@ -10,11 +10,14 @@ using namespace std;

 #pragma once

+#ifdef _MSC_VER
 #pragma region ParseIniFile
+#endif
+
 /*
-* \brief Generic configuration Class
-*
-*/
+ * \brief Generic configuration Class
+ *
+ */
 class Config {
    // Data
  protected:
@ -32,7 +35,7 @@ class Config {
           std::string comment = "#");
    Config();
    template <class T>
-    T Read(const std::string& in_key) const;  //!<Search for key and read value
+    T Read(const std::string& in_key) const;  //!< Search for key and read value
    //! or optional default value, call
    //! as read<T>
    template <class T>
@ -335,4 +338,6 @@ void Config::ReadFile(string filename, string delimiter, string comment) {
    in >> (*this);
 }

+#ifdef _MSC_VER
 #pragma endregion ParseIniFIle
+#endif
--- a/runtime/engine/common/base/log_impl.cc
+++ b/runtime/engine/common/base/log_impl.cc
@ -29,9 +29,9 @@ LogMessage::LogMessage(const char* file,
                       bool out_to_file /* = false */)
    : level_(level), verbose_(verbose), out_to_file_(out_to_file) {
    if (FLAGS_logtostderr == 0) {
-        stream_ = std::shared_ptr<std::ostream>(&std::cout);
+        stream_ = static_cast<std::ostream*>(&std::cout);
    } else if (FLAGS_logtostderr == 1) {
-        stream_ = std::shared_ptr<std::ostream>(&std::cerr);
+        stream_ = static_cast<std::ostream*>(&std::cerr);
    } else if (out_to_file_) {
        // logfile
        lock_.lock();
@ -46,11 +46,21 @@ LogMessage::~LogMessage() {
        lock_.unlock();
    }

-    if (level_ == FATAL) {
+    if (verbose_ && level_ == FATAL) {
        std::abort();
    }
 }

+std::ostream* LogMessage::nullstream() {
+    thread_local static std::ofstream os;
+    thread_local static bool flag_set = false;
+    if (!flag_set) {
+        os.setstate(std::ios_base::badbit);
+        flag_set = true;
+    }
+    return &os;
+}
+
 void LogMessage::init(const char* file, int line) {
    time_t t = time(0);
    char tmp[100];
@ -73,30 +83,20 @@ void LogMessage::init(const char* file, int line) {
            std::string("log." + proc_name + ".log.FATAL." + tmp + "." + pid);
    }

-    std::ofstream ofs;
+    thread_local static std::ofstream ofs;
    if (level_ == DEBUG) {
-        stream_ = std::make_shared<std::ofstream>(
-            s_debug_logfile_.c_str(), std::ios::out | std::ios::app);
-        // ofs.open(s_debug_logfile_.c_str(), std::ios::out | std::ios::app);
+        ofs.open(s_debug_logfile_.c_str(), std::ios::out | std::ios::app);
    } else if (level_ == INFO) {
-        // ofs.open(s_warning_logfile_.c_str(), std::ios::out | std::ios::app);
-        stream_ = std::make_shared<std::ofstream>(
-            s_warning_logfile_.c_str(), std::ios::out | std::ios::app);
+        ofs.open(s_info_logfile_.c_str(), std::ios::out | std::ios::app);
    } else if (level_ == WARNING) {
-        // ofs.open(s_warning_logfile_.c_str(), std::ios::out | std::ios::app);
-        stream_ = std::make_shared<std::ofstream>(
-            s_warning_logfile_.c_str(), std::ios::out | std::ios::app);
+        ofs.open(s_warning_logfile_.c_str(), std::ios::out | std::ios::app);
    } else if (level_ == ERROR) {
-        // ofs.open(s_error_logfile_.c_str(), std::ios::out | std::ios::app);
-        stream_ = std::make_shared<std::ofstream>(
-            s_error_logfile_.c_str(), std::ios::out | std::ios::app);
+        ofs.open(s_error_logfile_.c_str(), std::ios::out | std::ios::app);
    } else {
-        // ofs.open(s_fatal_logfile_.c_str(), std::ios::out | std::ios::app);
-        stream_ = std::make_shared<std::ofstream>(
-            s_fatal_logfile_.c_str(), std::ios::out | std::ios::app);
+        ofs.open(s_fatal_logfile_.c_str(), std::ios::out | std::ios::app);
    }

-    // stream_ = &ofs;
+    stream_ = &ofs;

    stream() << tmp << " " << file << " line " << line << "; ";
    stream() << std::flush;
--- a/runtime/engine/common/base/log_impl.h
+++ b/runtime/engine/common/base/log_impl.h
@ -18,6 +18,9 @@

 #pragma once

+#include <stdlib.h>
+#include <unistd.h>
+
 #include <fstream>
 #include <iostream>
 #include <mutex>
@ -25,9 +28,6 @@
 #include <string>
 #include <thread>

-#include <stdlib.h>
-#include <unistd.h>
-
 #include "base/common.h"
 #include "base/macros.h"
 #ifndef WITH_GLOG
@ -61,13 +61,15 @@ class LogMessage {

    ~LogMessage();

-    std::ostream& stream() { return *stream_; }
+    std::ostream& stream() { return verbose_ ? *stream_ : *nullstream(); }

  private:
    void init(const char* file, int line);
+    std::ostream* nullstream();

  private:
-    std::shared_ptr<std::ostream> stream_;
+    std::ostream* stream_;
+    std::ostream* null_stream_;
    Severity level_;
    bool verbose_;
    bool out_to_file_;
@ -88,14 +90,16 @@ class LogMessage {
 }  // namespace ppspeech


-#ifndef NDEBUG
-#define DLOG_DEBUG \
-    ppspeech::log::LogMessage(__FILE__, __LINE__, ppspeech::log::DEBUG, false)
+#ifdef NDEBUG
+#define DLOG_INFO \
+    ppspeech::log::LogMessage(__FILE__, __LINE__, ppspeech::log::INFO, false)
+#define DLOG_WARNING \
+    ppspeech::log::LogMessage(__FILE__, __LINE__, ppspeech::log::WARNING, false)
+#define DLOG_ERROR \
+    ppspeech::log::LogMessage(__FILE__, __LINE__, ppspeech::log::ERROR, false)
+#define DLOG_FATAL \
+    ppspeech::log::LogMessage(__FILE__, __LINE__, ppspeech::log::FATAL, false)
 #else
-#define DLOG_DEBUG \
-    ppspeech::log::LogMessage(__FILE__, __LINE__, ppspeech::log::DEBUG, true)
-#endif
-
 #define DLOG_INFO \
    ppspeech::log::LogMessage(__FILE__, __LINE__, ppspeech::log::INFO, true)
 #define DLOG_WARNING \
@ -104,17 +108,30 @@ class LogMessage {
    ppspeech::log::LogMessage(__FILE__, __LINE__, ppspeech::log::ERROR, true)
 #define DLOG_FATAL \
    ppspeech::log::LogMessage(__FILE__, __LINE__, ppspeech::log::FATAL, true)
+#endif

-#define DLOG_0 DLOG_DEBUG
-#define DLOG_1 DLOG_INFO
-#define DLOG_2 DLOG_WARNING
-#define DLOG_3 DLOG_ERROR
-#define DLOG_4 DLOG_FATAL

-#define LOG(level) DLOG_##level.stream()
+#define LOG_INFO \
+    ppspeech::log::LogMessage(__FILE__, __LINE__, ppspeech::log::INFO, true)
+#define LOG_WARNING \
+    ppspeech::log::LogMessage(__FILE__, __LINE__, ppspeech::log::WARNING, true)
+#define LOG_ERROR \
+    ppspeech::log::LogMessage(__FILE__, __LINE__, ppspeech::log::ERROR, true)
+#define LOG_FATAL \
+    ppspeech::log::LogMessage(__FILE__, __LINE__, ppspeech::log::FATAL, true)

-#define VLOG(verboselevel) LOG(verboselevel)

+#define LOG_0 LOG_DEBUG
+#define LOG_1 LOG_INFO
+#define LOG_2 LOG_WARNING
+#define LOG_3 LOG_ERROR
+#define LOG_4 LOG_FATAL
+
+#define LOG(level) LOG_##level.stream()
+
+#define DLOG(level) DLOG_##level.stream()
+
+#define VLOG(verboselevel) LOG(verboselevel)

 #define CHECK(exp)                                        \
    ppspeech::log::LogMessage(                            \
--- a/runtime/engine/common/frontend/CMakeLists.txt
+++ b/runtime/engine/common/frontend/CMakeLists.txt
@ -6,6 +6,7 @@ add_library(kaldi-native-fbank-core
  mel-computations.cc
  rfft.cc
 )
+target_link_libraries(kaldi-native-fbank-core PUBLIC utils base)

 add_library(frontend STATIC
  cmvn.cc
@ -15,7 +16,7 @@ add_library(frontend STATIC
  assembler.cc
  wave-reader.cc
 )
-target_link_libraries(frontend PUBLIC kaldi-native-fbank-core utils)
+target_link_libraries(frontend PUBLIC kaldi-native-fbank-core utils base)

 set(BINS 
  compute_fbank_main
@ -24,5 +25,6 @@ set(BINS
 foreach(bin_name IN LISTS BINS)
  add_executable(${bin_name} ${CMAKE_CURRENT_SOURCE_DIR}/${bin_name}.cc)
  target_include_directories(${bin_name} PRIVATE ${SPEECHX_ROOT} ${SPEECHX_ROOT}/kaldi)
-  target_link_libraries(${bin_name} PUBLIC frontend base utils kaldi-util gflags extern_glog)
+  # https://github.com/Kitware/CMake/blob/v3.1.0/Modules/FindThreads.cmake#L207
+  target_link_libraries(${bin_name} PUBLIC frontend base utils kaldi-util gflags Threads::Threads extern_glog)
 endforeach()
--- a/runtime/engine/common/frontend/assembler.cc
+++ b/runtime/engine/common/frontend/assembler.cc
@ -17,9 +17,8 @@
 namespace ppspeech {

 using kaldi::BaseFloat;
-using std::vector;
-using std::vector;
 using std::unique_ptr;
+using std::vector;

 Assembler::Assembler(AssemblerOptions opts,
                     unique_ptr<FrontendInterface> base_extractor) {
--- a/runtime/engine/common/frontend/fftsg.c
+++ b/runtime/engine/common/frontend/fftsg.c
@ -821,12 +821,12 @@ void cftfsub(int n, double *a, int *ip, int nw, double *w) {
            } else
 #endif /* USE_CDFT_THREADS */
                if (n > 512) {
-                cftrec4(n, a, nw, w);
-            } else if (n > 128) {
-                cftleaf(n, 1, a, nw, w);
-            } else {
-                cftfx41(n, a, nw, w);
-            }
+                    cftrec4(n, a, nw, w);
+                } else if (n > 128) {
+                    cftleaf(n, 1, a, nw, w);
+                } else {
+                    cftfx41(n, a, nw, w);
+                }
            bitrv2(n, ip, a);
        } else if (n == 32) {
            cftf161(a, &w[nw - 8]);
@ -868,12 +868,12 @@ void cftbsub(int n, double *a, int *ip, int nw, double *w) {
            } else
 #endif /* USE_CDFT_THREADS */
                if (n > 512) {
-                cftrec4(n, a, nw, w);
-            } else if (n > 128) {
-                cftleaf(n, 1, a, nw, w);
-            } else {
-                cftfx41(n, a, nw, w);
-            }
+                    cftrec4(n, a, nw, w);
+                } else if (n > 128) {
+                    cftleaf(n, 1, a, nw, w);
+                } else {
+                    cftfx41(n, a, nw, w);
+                }
            bitrv2conj(n, ip, a);
        } else if (n == 32) {
            cftf161(a, &w[nw - 8]);
--- a/runtime/engine/common/frontend/rfft.cc
+++ b/runtime/engine/common/frontend/rfft.cc
@ -17,12 +17,13 @@
 */

 #include "frontend/rfft.h"
-#include "base/log.h"

 #include <cmath>
 #include <memory>
 #include <vector>

+#include "base/log.h"
+
 // see fftsg.c
 #ifdef __cplusplus
 extern "C" void rdft(int n, int isgn, double *a, int *ip, double *w);
--- a/runtime/engine/common/frontend/wave-reader.cc
+++ b/runtime/engine/common/frontend/wave-reader.cc
@ -19,6 +19,8 @@
 // See the Apache 2 License for the specific language governing permissions and
 // limitations under the License.

+#include "frontend/wave-reader.h"
+
 #include <algorithm>
 #include <cstdio>
 #include <limits>
@ -27,7 +29,6 @@

 #include "base/kaldi-error.h"
 #include "base/kaldi-utils.h"
-#include "frontend/wave-reader.h"

 namespace kaldi {

@ -243,10 +244,9 @@ void WaveInfo::Read(std::istream &is) {
                      << ", data chunk size: " << data_chunk_size
                      << ". Assume 'stream mode' (reading data to EOF).";

-    if (!is_stream_mode &&
-        std::abs(static_cast<int64>(riff_chunk_read) +
-                 static_cast<int64>(data_chunk_size) -
-                 static_cast<int64>(riff_chunk_size)) > 1) {
+    if (!is_stream_mode && std::abs(static_cast<int64>(riff_chunk_read) +
+                                    static_cast<int64>(data_chunk_size) -
+                                    static_cast<int64>(riff_chunk_size)) > 1) {
        // We allow the size to be off by one without warning, because there is
        // a
        // weirdness in the format of RIFF files that means that the input may
--- a/runtime/engine/common/matrix/kaldi-matrix.h
+++ b/runtime/engine/common/matrix/kaldi-matrix.h
@ -590,7 +590,7 @@ class MatrixBase {
     * SpMatrix and use Eig() function there, which uses eigenvalue
     * decomposition
     * directly rather than SVD.
-    */
+     */

    /// stream read.
    /// Use instead of stream<<*this, if you want to add to existing contents.
--- a/runtime/engine/common/matrix/kaldi-vector.cc
+++ b/runtime/engine/common/matrix/kaldi-vector.cc
@ -24,8 +24,10 @@
 // limitations under the License.

 #include "matrix/kaldi-vector.h"
+
 #include <algorithm>
 #include <string>
+
 #include "matrix/kaldi-matrix.h"

 namespace kaldi {
--- a/runtime/engine/common/matrix/matrix-common.h
+++ b/runtime/engine/common/matrix/matrix-common.h
@ -90,7 +90,7 @@ typedef uint32 UnsignedMatrixIndexT;
 // typedef size_t MatrixIndexT;
 // typedef ssize_t SignedMatrixIndexT;
 // typedef size_t UnsignedMatrixIndexT;
-}
+}  // namespace kaldi


 #endif  // KALDI_MATRIX_MATRIX_COMMON_H_
--- a/runtime/engine/common/utils/CMakeLists.txt
+++ b/runtime/engine/common/utils/CMakeLists.txt
@ -5,6 +5,7 @@ set(csrc
  math.cc
  strings.cc
  audio_process.cc
+  timer.cc
 )

 add_library(utils ${csrc})
--- a/runtime/engine/common/utils/timer.cc
+++ b/runtime/engine/common/utils/timer.cc
@ -0,0 +1,63 @@
+// Copyright      2020  Xiaomi Corporation (authors: Haowen Qiu)
+//                      Mobvoi Inc.        (authors: Fangjun Kuang)
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+
+#include <chrono>
+
+#include "common/utils/timer.h"
+
+namespace ppspeech{
+
+struct TimerImpl{
+    TimerImpl() = default;
+    virtual ~TimerImpl() = default;
+    virtual void Reset() = 0;
+    // time in seconds
+    virtual double Elapsed() = 0;
+};
+
+class CpuTimerImpl : public TimerImpl {
+ public:
+  CpuTimerImpl() { Reset(); }
+
+  using high_resolution_clock = std::chrono::high_resolution_clock;
+
+  void Reset() override { begin_ = high_resolution_clock::now(); }
+
+  // time in seconds
+  double Elapsed() override {
+    auto end = high_resolution_clock::now();
+    auto dur =
+        std::chrono::duration_cast<std::chrono::microseconds>(end - begin_);
+    return dur.count() / 1000000.0;
+  }
+
+ private:
+  high_resolution_clock::time_point begin_;
+};
+
+Timer::Timer() {
+    impl_ = std::make_unique<CpuTimerImpl>();
+}
+
+Timer::~Timer() = default;
+
+void Timer::Reset() const { impl_->Reset(); }
+
+double Timer::Elapsed() const { return impl_->Elapsed(); }
+
+
+} //namespace ppspeech
--- a/runtime/engine/common/utils/timer.h
+++ b/runtime/engine/common/utils/timer.h
@ -0,0 +1,39 @@
+// Copyright      2020  Xiaomi Corporation (authors: Haowen Qiu)
+//                      Mobvoi Inc.        (authors: Fangjun Kuang)
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include <memory>
+
+namespace ppspeech {
+
+struct TimerImpl;
+
+class Timer {
+    public:
+        Timer();
+        ~Timer();
+
+        void Reset() const;
+
+        // time in seconds
+        double Elapsed() const;
+
+    private:
+        std::unique_ptr<TimerImpl> impl_;
+};
+
+} //namespace ppspeech
--- a/runtime/engine/vad/CMakeLists.txt
+++ b/runtime/engine/vad/CMakeLists.txt
@ -1,5 +1,7 @@
+include_directories(
+${CMAKE_CURRENT_SOURCE_DIR}/../
+)

+add_subdirectory(nnet)

-set(bin_name silero_vad_main)
-add_executable(${bin_name} ${CMAKE_CURRENT_SOURCE_DIR}/${bin_name}.cc vad.cc)
-target_link_libraries(${bin_name} ${FASTDEPLOY_LIBS} gflags extern_glog)
+add_subdirectory(interface)
--- a/runtime/engine/vad/frontend/wav.h
+++ b/runtime/engine/vad/frontend/wav.h
@ -17,6 +17,8 @@
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
+
+#include <iostream>
 #include <string>

 namespace wav {
--- a/runtime/engine/vad/interface/CMakeLists.txt
+++ b/runtime/engine/vad/interface/CMakeLists.txt
@ -0,0 +1,25 @@
+set(srcs 
+    vad_interface.cc 
+)
+
+add_library(pps_vad_interface ${srcs})
+target_link_libraries(pps_vad_interface PUBLIC pps_vad extern_glog)
+
+
+set(bin_name vad_interface_main)
+add_executable(${bin_name} ${CMAKE_CURRENT_SOURCE_DIR}/${bin_name}.cc)
+target_link_libraries(${bin_name} pps_vad_interface)
+# set_target_properties(${bin_name} PROPERTIES PUBLIC_HEADER "vad_interface.h;../frontend/wav.h")
+
+
+file(RELATIVE_PATH DEST_DIR ${ENGINE_ROOT} ${CMAKE_CURRENT_SOURCE_DIR})
+install(TARGETS pps_vad_interface DESTINATION lib)
+install(FILES vad_interface.h DESTINATION include/${DEST_DIR})
+
+install(TARGETS vad_interface_main 
+        RUNTIME DESTINATION bin
+        LIBRARY DESTINATION lib
+        ARCHIVE DESTINATION lib
+        PUBLIC_HEADER DESTINATION include/${DEST_DIR}
+)
+install(FILES vad_interface_main.cc DESTINATION demo/${DEST_DIR})
--- a/runtime/engine/vad/interface/vad_interface.cc
+++ b/runtime/engine/vad/interface/vad_interface.cc
@ -0,0 +1,94 @@
+// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+
+#include "vad/interface/vad_interface.h"
+
+#include "common/base/config.h"
+#include "vad/nnet/vad.h"
+
+
+PPSHandle_t PPSVadCreateInstance(const char* conf_path) {
+    Config conf(conf_path);
+    ppspeech::VadNnetConf nnet_conf;
+    nnet_conf.sr = conf.Read("sr", 16000);
+    nnet_conf.frame_ms = conf.Read("frame_ms", 32);
+    nnet_conf.threshold = conf.Read("threshold", 0.45f);
+    nnet_conf.beam = conf.Read("beam", 0.15f);
+    nnet_conf.min_silence_duration_ms =
+        conf.Read("min_silence_duration_ms", 200);
+    nnet_conf.speech_pad_left_ms = conf.Read("speech_pad_left_ms", 0);
+    nnet_conf.speech_pad_right_ms = conf.Read("speech_pad_right_ms", 0);
+
+    nnet_conf.model_file_path = conf.Read("model_path", std::string(""));
+    nnet_conf.param_file_path = conf.Read("param_path", std::string(""));
+    nnet_conf.num_cpu_thread = conf.Read("num_cpu_thread", 1);
+
+    ppspeech::Vad* model = new ppspeech::Vad(nnet_conf.model_file_path);
+
+    // custom config, but must be set before init
+    model->SetConfig(nnet_conf);
+    model->Init();
+
+    return static_cast<PPSHandle_t>(model);
+}
+
+
+int PPSVadDestroyInstance(PPSHandle_t instance) {
+    ppspeech::Vad* model = static_cast<ppspeech::Vad*>(instance);
+    if (model != nullptr) {
+        delete model;
+        model = nullptr;
+    }
+    return 0;
+}
+
+int PPSVadChunkSizeSamples(PPSHandle_t instance) {
+    ppspeech::Vad* model = static_cast<ppspeech::Vad*>(instance);
+    if (model == nullptr) {
+        printf("instance is null\n");
+        return -1;
+    }
+
+    return model->WindowSizeSamples();
+}
+
+PPSVadState_t PPSVadFeedForward(PPSHandle_t instance,
+                                float* chunk,
+                                int num_element) {
+    ppspeech::Vad* model = static_cast<ppspeech::Vad*>(instance);
+    if (model == nullptr) {
+        printf("instance is null\n");
+        return PPS_VAD_ILLEGAL;
+    }
+
+    std::vector<float> chunk_in(chunk, chunk + num_element);
+    if (!model->ForwardChunk(chunk_in)) {
+        printf("forward chunk failed\n");
+        return PPS_VAD_ILLEGAL;
+    }
+    ppspeech::Vad::State s = model->Postprocess();
+    PPSVadState_t ret = (PPSVadState_t)s;
+    return ret;
+}
+
+int PPSVadReset(PPSHandle_t instance) {
+    ppspeech::Vad* model = static_cast<ppspeech::Vad*>(instance);
+    if (model == nullptr) {
+        printf("instance is null\n");
+        return -1;
+    }
+    model->Reset();
+    return 0;
+}
--- a/runtime/engine/vad/interface/vad_interface.h
+++ b/runtime/engine/vad/interface/vad_interface.h
@ -0,0 +1,46 @@
+// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef void* PPSHandle_t;
+
+typedef enum {
+    PPS_VAD_ILLEGAL = 0,  // error
+    PPS_VAD_SIL,          // silence
+    PPS_VAD_START,        // start speech
+    PPS_VAD_SPEECH,       // in speech
+    PPS_VAD_END,          // end speech
+    PPS_VAD_NUMSTATES,    // number of states
+} PPSVadState_t;
+
+PPSHandle_t PPSVadCreateInstance(const char* conf_path);
+
+int PPSVadDestroyInstance(PPSHandle_t instance);
+
+int PPSVadReset(PPSHandle_t instance);
+
+int PPSVadChunkSizeSamples(PPSHandle_t instance);
+
+PPSVadState_t PPSVadFeedForward(PPSHandle_t instance,
+                                float* chunk,
+                                int num_element);
+
+#ifdef __cplusplus
+}
+#endif  // __cplusplus
--- a/runtime/engine/vad/interface/vad_interface_main.cc
+++ b/runtime/engine/vad/interface/vad_interface_main.cc
@ -0,0 +1,71 @@
+// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+
+#include <iostream>
+#include <vector>
+
+#include "common/base/common.h"
+#include "vad/frontend/wav.h"
+#include "vad/interface/vad_interface.h"
+
+int main(int argc, char* argv[]) {
+    if (argc < 3) {
+        std::cout << "Usage: vad_interface_main path/to/config path/to/audio "
+                     "run_option, "
+                     "e.g ./vad_interface_main config sample.wav"
+                  << std::endl;
+        return -1;
+    }
+
+    std::string config_path = argv[1];
+    std::string audio_file = argv[2];
+
+    PPSHandle_t handle = PPSVadCreateInstance(config_path.c_str());
+
+    std::vector<float> inputWav;  // [0, 1]
+    wav::WavReader wav_reader = wav::WavReader(audio_file);
+    auto sr = wav_reader.sample_rate();
+    CHECK(sr == 16000) << " sr is " << sr << " expect 16000";
+
+    auto num_samples = wav_reader.num_samples();
+    inputWav.resize(num_samples);
+    for (int i = 0; i < num_samples; i++) {
+        inputWav[i] = wav_reader.data()[i] / 32768;
+    }
+
+    ppspeech::Timer timer;
+    int window_size_samples = PPSVadChunkSizeSamples(handle);
+    for (int64_t j = 0; j < num_samples; j += window_size_samples) {
+        auto start = j;
+        auto end = start + window_size_samples >= num_samples
+                       ? num_samples
+                       : start + window_size_samples;
+        auto current_chunk_size = end - start;
+
+        std::vector<float> r{&inputWav[0] + start, &inputWav[0] + end};
+        assert(r.size() == static_cast<size_t>(current_chunk_size));
+
+        PPSVadState_t s = PPSVadFeedForward(handle, r.data(), r.size());
+        std::cout << s << " ";
+    }
+    std::cout << std::endl;
+
+    std::cout << "RTF=" << timer.Elapsed() / double(num_samples / sr)
+              << std::endl;
+
+    PPSVadReset(handle);
+
+    return 0;
+}
--- a/runtime/engine/vad/nnet/CMakeLists.txt
+++ b/runtime/engine/vad/nnet/CMakeLists.txt
@ -0,0 +1,16 @@
+set(srcs 
+    vad.cc 
+)
+
+add_library(pps_vad ${srcs})
+target_link_libraries(pps_vad PUBLIC ${FASTDEPLOY_LIBS} common extern_glog)
+
+
+set(bin_name vad_nnet_main)
+add_executable(${bin_name} ${CMAKE_CURRENT_SOURCE_DIR}/${bin_name}.cc)
+target_link_libraries(${bin_name} pps_vad)
+
+
+file(RELATIVE_PATH DEST_DIR ${ENGINE_ROOT} ${CMAKE_CURRENT_SOURCE_DIR})
+install(TARGETS pps_vad DESTINATION lib)
+install(TARGETS extern_glog DESTINATION lib)
--- a/runtime/engine/vad/nnet/vad.cc
+++ b/runtime/engine/vad/nnet/vad.cc
@ -1,4 +1,5 @@
 // Copyright (c) 2023 Chen Qianhe Authors. All Rights Reserved.
+// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@ -11,20 +12,15 @@
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
-#include "vad.h"
+#include "vad/nnet/vad.h"
+
 #include <cstring>
 #include <iomanip>

+#include "common/base/common.h"
+

-#ifdef NDEBUG
-#define LOG_DEBUG                                                              \
-    ::fastdeploy::FDLogger(true, "[DEBUG]") << __REL_FILE__ << "(" << __LINE__ \
-                                            << ")::" << __FUNCTION__ << "\t"
-#else
-#define LOG_DEBUG                            \
-    ::fastdeploy::FDLogger(false, "[DEBUG]") \
-        << __REL_FILE__ << "(" << __LINE__ << ")::" << __FUNCTION__ << "\t"
-#endif
+namespace ppspeech {

 Vad::Vad(const std::string& model_file,
         const fastdeploy::RuntimeOption&
@ -48,18 +44,30 @@ Vad::Vad(const std::string& model_file,
 }

 void Vad::Init() {
-    std::call_once(init_, [&]() { initialized = Initialize(); });
+    std::lock_guard<std::mutex> lock(init_lock_);
+    Initialize();
 }

 std::string Vad::ModelName() const { return "VAD"; }

-void Vad::SetConfig(int sr,
-                    int frame_ms,
-                    float threshold,
-                    int min_silence_duration_ms,
-                    int speech_pad_left_ms,
-                    int speech_pad_right_ms) {
-    if (initialized) {
+void Vad::SetConfig(const VadNnetConf conf) {
+    SetConfig(conf.sr,
+              conf.frame_ms,
+              conf.threshold,
+              conf.beam,
+              conf.min_silence_duration_ms,
+              conf.speech_pad_left_ms,
+              conf.speech_pad_right_ms);
+}
+
+void Vad::SetConfig(const int& sr,
+                    const int& frame_ms,
+                    const float& threshold,
+                    const float& beam,
+                    const int& min_silence_duration_ms,
+                    const int& speech_pad_left_ms,
+                    const int& speech_pad_right_ms) {
+    if (initialized_) {
        fastdeploy::FDERROR << "SetConfig must be called before init"
                            << std::endl;
        throw std::runtime_error("SetConfig must be called before init");
@ -67,6 +75,7 @@ void Vad::SetConfig(int sr,
    sample_rate_ = sr;
    sr_per_ms_ = sr / 1000;
    threshold_ = threshold;
+    beam_ = beam;
    frame_ms_ = frame_ms;
    min_silence_samples_ = min_silence_duration_ms * sr_per_ms_;
    speech_pad_left_samples_ = speech_pad_left_ms * sr_per_ms_;
@ -76,8 +85,8 @@ void Vad::SetConfig(int sr,
    window_size_samples_ = frame_ms * sr_per_ms_;
    current_chunk_size_ = window_size_samples_;

-    fastdeploy::FDINFO << "sr=" << sr << " threshold=" << threshold
-                       << " frame_ms=" << frame_ms
+    fastdeploy::FDINFO << "sr=" << sr_per_ms_ << " threshold=" << threshold_
+                       << " beam=" << beam_ << " frame_ms=" << frame_ms_
                       << " min_silence_duration_ms=" << min_silence_duration_ms
                       << " speech_pad_left_ms=" << speech_pad_left_ms
                       << " speech_pad_right_ms=" << speech_pad_right_ms;
@ -114,12 +123,17 @@ bool Vad::Initialize() {

    Reset();

+
    // InitRuntime
    if (!InitRuntime()) {
        fastdeploy::FDERROR << "Failed to initialize fastdeploy backend."
                            << std::endl;
        return false;
    }
+
+    initialized_ = true;
+
+
    fastdeploy::FDINFO << "init done.";
    return true;
 }
@ -162,8 +176,8 @@ const Vad::State& Vad::Postprocess() {

    if (outputProb_ < threshold_ && !triggerd_) {
        // 1. Silence
-        LOG_DEBUG << "{ silence: " << 1.0 * current_sample_ / sample_rate_
-                  << " s; prob: " << outputProb_ << " }";
+        DLOG(INFO) << "{ silence: " << 1.0 * current_sample_ / sample_rate_
+                   << " s; prob: " << outputProb_ << " }";
        states_.emplace_back(Vad::State::SIL);
    } else if (outputProb_ >= threshold_ && !triggerd_) {
        // 2. Start
@ -172,27 +186,28 @@ const Vad::State& Vad::Postprocess() {
            current_sample_ - current_chunk_size_ - speech_pad_left_samples_;
        float start_sec = 1.0 * speech_start_ / sample_rate_;
        speakStart_.emplace_back(start_sec);
-        LOG_DEBUG << "{ speech start: " << start_sec
-                  << " s; prob: " << outputProb_ << " }";
+        DLOG(INFO) << "{ speech start: " << start_sec
+                   << " s; prob: " << outputProb_ << " }";
        states_.emplace_back(Vad::State::START);
-    } else if (outputProb_ >= threshold_ - 0.15 && triggerd_) {
+    } else if (outputProb_ >= threshold_ - beam_ && triggerd_) {
        // 3. Continue

        if (temp_end_ != 0) {
            // speech prob relaxation, speech continues again
-            LOG_DEBUG << "{ speech fake end(sil < min_silence_ms) to continue: "
-                      << 1.0 * current_sample_ / sample_rate_
-                      << " s; prob: " << outputProb_ << " }";
+            DLOG(INFO)
+                << "{ speech fake end(sil < min_silence_ms) to continue: "
+                << 1.0 * current_sample_ / sample_rate_
+                << " s; prob: " << outputProb_ << " }";
            temp_end_ = 0;
        } else {
            // speech prob relaxation, keep tracking speech
-            LOG_DEBUG << "{ speech continue: "
-                      << 1.0 * current_sample_ / sample_rate_
-                      << " s; prob: " << outputProb_ << " }";
+            DLOG(INFO) << "{ speech continue: "
+                       << 1.0 * current_sample_ / sample_rate_
+                       << " s; prob: " << outputProb_ << " }";
        }

        states_.emplace_back(Vad::State::SPEECH);
-    } else if (outputProb_ < threshold_ - 0.15 && triggerd_) {
+    } else if (outputProb_ < threshold_ - beam_ && triggerd_) {
        // 4. End
        if (temp_end_ == 0) {
            temp_end_ = current_sample_;
@ -201,9 +216,9 @@ const Vad::State& Vad::Postprocess() {
        // check possible speech end
        if (current_sample_ - temp_end_ < min_silence_samples_) {
            // a. silence < min_slience_samples, continue speaking
-            LOG_DEBUG << "{ speech fake end(sil < min_silence_ms): "
-                      << 1.0 * current_sample_ / sample_rate_
-                      << " s; prob: " << outputProb_ << " }";
+            DLOG(INFO) << "{ speech fake end(sil < min_silence_ms): "
+                       << 1.0 * current_sample_ / sample_rate_
+                       << " s; prob: " << outputProb_ << " }";
            states_.emplace_back(Vad::State::SIL);
        } else {
            // b. silence >= min_slience_samples, end speaking
@ -212,8 +227,8 @@ const Vad::State& Vad::Postprocess() {
            triggerd_ = false;
            auto end_sec = 1.0 * speech_end_ / sample_rate_;
            speakEnd_.emplace_back(end_sec);
-            LOG_DEBUG << "{ speech end: " << end_sec
-                      << " s; prob: " << outputProb_ << " }";
+            DLOG(INFO) << "{ speech end: " << end_sec
+                       << " s; prob: " << outputProb_ << " }";
            states_.emplace_back(Vad::State::END);
        }
    }
@ -303,4 +318,6 @@ std::ostream& operator<<(std::ostream& os, const Vad::State& s) {
            break;
    }
    return os;
-}
+}
+
+}  // namespace ppspeech
--- a/runtime/engine/vad/nnet/vad.h
+++ b/runtime/engine/vad/nnet/vad.h
@ -1,4 +1,5 @@
 // Copyright (c) 2023 Chen Qianhe Authors. All Rights Reserved.
+// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@ -11,33 +12,59 @@
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
+
 #pragma once
 #include <iostream>
 #include <mutex>
 #include <vector>
-#include "./wav.h"
+
 #include "fastdeploy/fastdeploy_model.h"
 #include "fastdeploy/runtime.h"
+#include "vad/frontend/wav.h"
+
+namespace ppspeech {
+
+struct VadNnetConf {
+    // wav
+    int sr;
+    int frame_ms;
+    float threshold;
+    float beam;
+    int min_silence_duration_ms;
+    int speech_pad_left_ms;
+    int speech_pad_right_ms;
+
+    // model
+    std::string model_file_path;
+    std::string param_file_path;
+    std::string dict_file_path;
+    int num_cpu_thread;   // 1 thred
+    std::string backend;  // ort,lite, etc.
+};

 class Vad : public fastdeploy::FastDeployModel {
  public:
-    enum class State { SIL = 0, START, SPEECH, END };
+    enum class State { ILLEGAL = 0, SIL, START, SPEECH, END };
    friend std::ostream& operator<<(std::ostream& os, const Vad::State& s);

    Vad(const std::string& model_file,
        const fastdeploy::RuntimeOption& custom_option =
            fastdeploy::RuntimeOption());

+    virtual ~Vad() {}
+
    void Init();

    void Reset();

-    void SetConfig(int sr,
-                   int frame_ms,
-                   float threshold,
-                   int min_silence_duration_ms,
-                   int speech_pad_left_ms,
-                   int speech_pad_right_ms);
+    void SetConfig(const int& sr,
+                   const int& frame_ms,
+                   const float& threshold,
+                   const float& beam,
+                   const int& min_silence_duration_ms,
+                   const int& speech_pad_left_ms,
+                   const int& speech_pad_right_ms);
+    void SetConfig(const VadNnetConf conf);

    bool ForwardChunk(std::vector<float>& chunk);

@ -78,7 +105,9 @@ class Vad : public fastdeploy::FastDeployModel {
    bool Initialize();

  private:
-    std::once_flag init_;
+    std::mutex init_lock_;
+    bool initialized_{false};
+
    // input and output
    std::vector<fastdeploy::FDTensor> inputTensors_;
    std::vector<fastdeploy::FDTensor> outputTensors_;
@ -103,6 +132,7 @@ class Vad : public fastdeploy::FastDeployModel {
    int sample_rate_ = 16000;
    int frame_ms_ = 32;  // 32, 64, 96 for 16k
    float threshold_ = 0.5f;
+    float beam_ = 0.15f;

    int64_t window_size_samples_;  // support 256 512 768 for 8k; 512 1024 1536
                                   // for 16k.
@ -122,3 +152,5 @@ class Vad : public fastdeploy::FastDeployModel {
    const std::vector<int64_t> sr_node_dims_ = {1};
    const std::vector<int64_t> hc_node_dims_ = {2, 1, 64};
 };
+
+}  // namespace ppspeech
--- a/runtime/engine/vad/nnet/vad_nnet_main.cc
+++ b/runtime/engine/vad/nnet/vad_nnet_main.cc
@ -1,11 +1,26 @@
+// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.

-#include "vad.h"
+
+#include "common/base/common.h"
+#include "vad/nnet/vad.h"

 int main(int argc, char* argv[]) {
    if (argc < 3) {
-        std::cout << "Usage: infer_onnx_silero_vad path/to/model path/to/audio "
+        std::cout << "Usage: vad_nnet_main path/to/model path/to/audio "
                     "run_option, "
-                     "e.g ./infer_onnx_silero_vad silero_vad.onnx sample.wav"
+                     "e.g ./vad_nnet_main silero_vad.onnx sample.wav"
                  << std::endl;
        return -1;
    }
@ -14,9 +29,9 @@ int main(int argc, char* argv[]) {
    std::string audio_file = argv[2];

    int sr = 16000;
-    Vad vad(model_file);
+    ppspeech::Vad vad(model_file);
    // custom config, but must be set before init
-    vad.SetConfig(sr, 32, 0.45f, 200, 0, 0);
+    vad.SetConfig(sr, 32, 0.5f, 0.15, 200, 0, 0);
    vad.Init();

    std::vector<float> inputWav;  // [0, 1]
@ -30,6 +45,7 @@ int main(int argc, char* argv[]) {
        inputWav[i] = wav_reader.data()[i] / 32768;
    }

+    ppspeech::Timer timer;
    int window_size_samples = vad.WindowSizeSamples();
    for (int64_t j = 0; j < num_samples; j += window_size_samples) {
        auto start = j;
@ -39,7 +55,7 @@ int main(int argc, char* argv[]) {
        auto current_chunk_size = end - start;

        std::vector<float> r{&inputWav[0] + start, &inputWav[0] + end};
-        assert(r.size() == current_chunk_size);
+        assert(r.size() == static_cast<size_t>(current_chunk_size));

        if (!vad.ForwardChunk(r)) {
            std::cerr << "Failed to inference while using model:"
@ -47,11 +63,14 @@ int main(int argc, char* argv[]) {
            return false;
        }

-        Vad::State s = vad.Postprocess();
+        ppspeech::Vad::State s = vad.Postprocess();
        std::cout << s << " ";
    }
    std::cout << std::endl;

+    std::cout << "RTF=" << timer.Elapsed() / double(num_samples / sr)
+              << std::endl;
+
    std::vector<std::map<std::string, float>> result = vad.GetResult();
    for (auto& res : result) {
        std::cout << "speak start: " << res["start"]
--- a/runtime/examples/silero_vad/README.md
+++ b/runtime/examples/silero_vad/README.md
@ -1,121 +0,0 @@
-English | [简体中文](README_CN.md)
-
-# Silero VAD Deployment Example
-
-This directory provides examples that `infer_onnx_silero_vad` fast finishes the deployment of VAD models on CPU/GPU.
-
-Before deployment, two steps require confirmation.
-
- 1. Software and hardware should meet the requirements. Please refer to [FastDeploy Environment Requirements](../../../../docs/en/build_and_install/download_prebuilt_libraries.md).  
- 2. Download the precompiled deployment library and samples code according to your development environment. Refer to [FastDeploy Precompiled Library](../../../../docs/en/build_and_install/download_prebuilt_libraries.md).
-
-Taking VAD inference on Linux as an example, the compilation test can be completed by executing the following command in this directory.
-
-```bash
-mkdir build
-cd build
-# Download the FastDeploy precompiled library. Users can choose your appropriate version in the `FastDeploy Precompiled Library` mentioned above
-wget https://bj.bcebos.com/fastdeploy/release/cpp/fastdeploy-linux-x64-x.x.x.tgz
-tar xvf fastdeploy-linux-x64-x.x.x.tgz
-cmake .. -DFASTDEPLOY_INSTALL_DIR=${PWD}/fastdeploy-linux-x64-x.x.x
-make -j
-
-# Download the VAD model file and test audio. After decompression, place the model and test audio in the infer_onnx_silero_vad.cc peer directory
-wget https://bj.bcebos.com/paddlehub/fastdeploy/silero_vad.tgz
-wget https://bj.bcebos.com/paddlehub/fastdeploy/silero_vad_sample.wav
-
-# inference
-./infer_onnx_silero_vad ../silero_vad.onnx ../silero_vad_sample.wav
-```
-
- The above command works for Linux or MacOS. Refer to:
-  - [How to use FastDeploy C++ SDK in Windows](../../../../docs/en/faq/use_sdk_on_windows.md)  for SDK use-pattern in Windows
-
-## VAD C++ Interface
-
-### Vad Class
-
-```c++
-Vad::Vad(const std::string& model_file,
-    const fastdeploy::RuntimeOption& custom_option = fastdeploy::RuntimeOption())
-```
-
-**Parameter**
-
-> * **model_file**(str): Model file path
-> * **runtime_option**(RuntimeOption): Backend inference configuration. None by default. (use the default configuration)
-
-### setAudioCofig function
-
-**Must be called before the `init` function**
-
-```c++
-void Vad::setAudioCofig(int sr, int frame_ms, float threshold, int min_silence_duration_ms, int speech_pad_ms);
-```
-
-**Parameter**
-
-> * **sr**(int): sampling rate
-> * **frame_ms**(int): The length of each detection frame, and it is used to calculate the detection window size
-> * **threshold**(float): Result probability judgment threshold
-> * **min_silence_duration_ms**(int): The threshold used to calculate whether it is silence
-> * **speech_pad_ms**(int): Used to calculate the end time of the speech
-
-### init function
-
-Used to initialize audio-related parameters.
-
-```c++
-void Vad::init();
-```
-
-### loadAudio function
-
-Load audio.
-
-```c++
-void Vad::loadAudio(const std::string& wavPath)
-```
-
-**Parameter**
-
-> * **wavPath**(str): Audio file path
-
-### Predict function
-
-Used to start model reasoning.
-
-```c++
-bool Vad::Predict();
-```
-
-### getResult function
-
-**Used to obtain reasoning results**
-
-```c++
-std::vector<std::map<std::string, float>> Vad::getResult(
-            float removeThreshold = 1.6, float expandHeadThreshold = 0.32, float expandTailThreshold = 0,
-            float mergeThreshold = 0.3);
-```
-
-**Parameter**
-
-> * **removeThreshold**(float): Discard result fragment threshold; If some recognition results are too short, they will be discarded according to this threshold
-> * **expandHeadThreshold**(float): Offset at the beginning of the segment; The recognized start time may be too close to the voice part, so move forward the start time accordingly
-> * **expandTailThreshold**(float): Offset at the end of the segment; The recognized end time may be too close to the voice part, so the end time is moved back accordingly
-> * **mergeThreshold**(float): Some result segments are very close and can be combined into one, and the vocal segments can be combined accordingly
-
-**The output result format is**`std::vector<std::map<std::string, float>>`
-
-> Output a list, each element is a speech fragment
->
-> Each clip can use 'start' to get the start time and 'end' to get the end time
-
-### Tips
-
-1. `The setAudioCofig`function must be called before the `init` function
-2. The sampling rate of the input audio file must be consistent with that set in the code
-
- [Model Description](../)
- [How to switch the model inference backend engine](../../../../docs/en/faq/how_to_change_backend.md)
--- a/runtime/examples/silero_vad/README_CN.md
+++ b/runtime/examples/silero_vad/README_CN.md
@ -1,119 +0,0 @@
-[English](README.md) | 简体中文
-# Silero VAD 部署示例
-
-本目录下提供`infer_onnx_silero_vad`快速完成 Silero VAD 模型在CPU/GPU。
-
-在部署前，需确认以下两个步骤
-
- 1. 软硬件环境满足要求，参考[FastDeploy环境要求](../../../../docs/cn/build_and_install/download_prebuilt_libraries.md)
- 2. 根据开发环境，下载预编译部署库和samples代码，参考[FastDeploy预编译库](../../../../docs/cn/build_and_install/download_prebuilt_libraries.md)
-
-以Linux上 VAD 推理为例，在本目录执行如下命令即可完成编译测试。
-
-```bash
-mkdir build
-cd build
-# 下载FastDeploy预编译库，用户可在上文提到的`FastDeploy预编译库`中自行选择合适的版本使用
-wget https://bj.bcebos.com/fastdeploy/release/cpp/fastdeploy-linux-x64-x.x.x.tgz
-tar xvf fastdeploy-linux-x64-x.x.x.tgz
-cmake .. -DFASTDEPLOY_INSTALL_DIR=${PWD}/fastdeploy-linux-x64-x.x.x
-make -j
-
-# 下载 VAD 模型文件和测试音频，解压后将模型和测试音频放置在与 infer_onnx_silero_vad.cc 同级目录下
-wget https://bj.bcebos.com/paddlehub/fastdeploy/silero_vad.tgz
-wget https://bj.bcebos.com/paddlehub/fastdeploy/silero_vad_sample.wav
-
-# 推理
-./infer_onnx_silero_vad ../silero_vad.onnx ../silero_vad_sample.wav
-```
-
-以上命令只适用于Linux或MacOS, Windows下SDK的使用方式请参考:
- [如何在Windows中使用FastDeploy C++ SDK](../../../../docs/cn/faq/use_sdk_on_windows.md)
-
-## VAD C++ 接口
-### Vad 类
-
-```c++
-Vad::Vad(const std::string& model_file,
-    const fastdeploy::RuntimeOption& custom_option = fastdeploy::RuntimeOption())
-```
-
-**参数**
-
-> * **model_file**(str): 模型文件路径
-> * **runtime_option**(RuntimeOption): 后端推理配置，默认为None，即采用默认配置
-
-### setAudioCofig 函数
-
-**必须在`init`函数前调用**
-
-```c++
-void Vad::setAudioCofig(int sr, int frame_ms, float threshold, int min_silence_duration_ms, int speech_pad_ms);
-```
-
-**参数**
-
-> * **sr**(int): 采样率
-> * **frame_ms**(int): 每次检测帧长，用于计算检测窗口大小
-> * **threshold**(float): 结果概率判断阈值
-> * **min_silence_duration_ms**(int): 用于计算判断是否是 silence 的阈值
-> * **speech_pad_ms**(int): 用于计算 speach 结束时刻
-
-### init 函数
-
-用于初始化音频相关参数
-
-```c++
-void Vad::init();
-```
-
-### loadAudio 函数
-
-加载音频
-
-```c++
-void Vad::loadAudio(const std::string& wavPath)
-```
-
-**参数**
-
-> * **wavPath**(str): 音频文件路径
-
-### Predict 函数
-
-用于开始模型推理
-
-```c++
-bool Vad::Predict();
-```
-
-### getResult 函数
-
-**用于获取推理结果**
-
-```c++
-std::vector<std::map<std::string, float>> Vad::getResult(
-            float removeThreshold = 1.6, float expandHeadThreshold = 0.32, float expandTailThreshold = 0,
-            float mergeThreshold = 0.3);
-```
-
-**参数**
-
-> * **removeThreshold**(float): 丢弃结果片段阈值；部分识别结果太短则根据此阈值丢弃
-> * **expandHeadThreshold**(float): 结果片段开始时刻偏移；识别到的开始时刻可能过于贴近发声部分，因此据此前移开始时刻
-> * **expandTailThreshold**(float): 结果片段结束时刻偏移；识别到的结束时刻可能过于贴近发声部分，因此据此后移结束时刻
-> * **mergeThreshold**(float): 有的结果片段十分靠近，可以合并成一个，据此合并发声片段
-
-**输出结果格式为**`std::vector<std::map<std::string, float>>`
-
-> 输出一个列表，每个元素是一个讲话片段
->
-> 每个片段可以用 'start' 获取到开始时刻，用 'end' 获取到结束时刻
-
-### 提示
-
-1. `setAudioCofig`函数必须在`init`函数前调用
-2. 输入的音频文件的采样率必须与代码中设置的保持一致
-
- [模型介绍](../)
- [如何切换模型推理后端引擎](../../../../docs/cn/faq/how_to_change_backend.md)
--- a/runtime/examples/silero_vad/local/decode.sh
+++ b/runtime/examples/silero_vad/local/decode.sh
--- a/runtime/examples/silero_vad/path.sh
+++ b/runtime/examples/silero_vad/path.sh
@ -1,18 +0,0 @@
-# This contains the locations of binarys build required for running the examples.
-
-unset GREP_OPTIONS
-
-ENGINE_ROOT=$PWD/../../../
-ENGINE_BUILD=$ENGINE_ROOT/build/engine/asr
-
-ENGINE_TOOLS=$ENGINE_ROOT/tools
-TOOLS_BIN=$ENGINE_TOOLS/valgrind/install/bin
-
-[ -d $ENGINE_BUILD ] || { echo "Error: 'build/runtime' directory not found. please ensure that the project build successfully"; }
-
-export LC_AL=C
-
-export PATH=$PATH:$TOOLS_BIN:$ENGINE_BUILD/nnet:$ENGINE_BUILD/decoder:$ENGINE_BUILD/../common/frontend/audio:$ENGINE_BUILD/recognizer
-
-#PADDLE_LIB_PATH=$(python -c "import os; import paddle; include_dir=paddle.sysconfig.get_include(); paddle_dir=os.path.split(include_dir)[0]; libs_dir=os.path.join(paddle_dir, 'libs'); fluid_dir=os.path.join(paddle_dir, 'fluid'); out=':'.join([libs_dir, fluid_dir]); print(out);")
-export LD_LIBRARY_PATH=$PADDLE_LIB_PATH:$LD_LIBRARY_PATH
--- a/runtime/examples/u2pp_ol/wenetspeech/path.sh
+++ b/runtime/examples/u2pp_ol/wenetspeech/path.sh
@ -3,7 +3,7 @@
 unset GREP_OPTIONS

 ENGINE_ROOT=$PWD/../../../
-ENGINE_BUILD=$ENGINE_ROOT/build/engine/asr
+ENGINE_BUILD=$ENGINE_ROOT/build/Linux/x86_64/engine/asr

 ENGINE_TOOLS=$ENGINE_ROOT/tools
 TOOLS_BIN=$ENGINE_TOOLS/valgrind/install/bin
--- a/runtime/examples/silero_vad/.gitignore
+++ b/runtime/examples/silero_vad/.gitignore
--- a/runtime/examples/vad/README.md
+++ b/runtime/examples/vad/README.md
@ -0,0 +1,261 @@
+# Silero VAD - pre-trained enterprise-grade Voice Activity Detector
+
+This directory provides VAD models on CPU/GPU.
+
+![](https://user-images.githubusercontent.com/36505480/198026365-8da383e0-5398-4a12-b7f8-22c2c0059512.png)
+
+
+## VAD Interface
+
+For vad interface please see [](../../engine/vad/interface/).
+
+### Create Handdle
+
+```c++
+PPSHandle_t PPSVadCreateInstance(const char* conf_path);
+```
+
+### Destroy Handdle
+
+```c++
+int PPSVadDestroyInstance(PPSHandle_t instance);
+```
+
+### Reset Vad State
+
+```c++
+int PPSVadReset(PPSHandle_t instance);
+```
+
+Reset Vad state before processing next `wav`.
+
+### Get Chunk Size
+
+```c++
+int PPSVadChunkSizeSamples(PPSHandle_t instance);
+```
+
+This API will return chunk size in `sample` unit.
+When do forward, we need feed `chunk size` samples, except last chunk.
+
+### Vad Forward
+
+```c++
+PPSVadState_t PPSVadFeedForward(PPSHandle_t instance,
+                                float* chunk,
+                                int num_element);
+```
+
+Vad has below states:
+```c++
+typedef enum {
+    PPS_VAD_ILLEGAL = 0,  // error
+    PPS_VAD_SIL,          // silence
+    PPS_VAD_START,        // start speech
+    PPS_VAD_SPEECH,       // in speech
+    PPS_VAD_END,          // end speech
+    PPS_VAD_NUMSTATES,    // number of states
+} PPSVadState_t;
+```
+
+If `PPSVadFeedForward` occur an error will return `PPS_VAD_ILLEGAL` state.
+
+
+## Linux
+
+### Build Runtime
+```bash
+# cd /path/to/paddlespeech/runtime
+cmake -B build -DBUILD_SHARED_LIBS=OFF -DWITH_ASR=OFF -DWITH_CLS=OFF -DWITH_VAD=ON
+cmake --build build
+```
+
+Since VAD using FastDeploy runtime, if you have another FastDeploy Library, you can using this command to build:
+
+```bash
+# cd /path/to/paddlespeech/runtime
+cmake -B build -DBUILD_SHARED_LIBS=OFF -DWITH_ASR=OFF -DWITH_CLS=OFF -DWITH_VAD=ON -DFASTDEPLOY_INSTALL_DIR=/workspace//paddle/FastDeploy/build/Linux/x86_64/install
+cmake --build build
+```
+
+`DFASTDEPLOY_INSTALL_DIR` is the directory of FastDeploy Library.
+
+### Run Demo
+
+After building success, we can do this to run demo under this example dir:
+
+```bash 
+bash run.sh
+```
+
+The output like these:
+
+```bash
+/workspace//PaddleSpeech/runtime/engine/vad/nnet/vad.cc(88)::SetConfig  sr=16 threshold=0.5 beam=0.15 frame_ms=32 min_silence_duration_ms=200 speech_pad_left_ms=0 speech_pad_right_ms=0[INFO] fastdeploy/runtime/runtime.cc(293)::CreateOrtBackend     Runtime initialized with Backend::ORT in Device::CPU./workspace//PaddleSpeech/runtime/engine/vad/nnet/vad.cc(137)::Initialize        init done.[SIL] [SIL] [SIL] [SIL] [SIL] [SIL] [SIL] [SIL] [SIL] [SIL] [STA] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SIL] [SIL] [SIL] [SIL] [SIL] [SIL] [SIL] [END] [SIL] [SIL] [SIL] [SIL] [SIL] [SIL] [SIL] [SIL] [SIL] [SIL] [SIL] [SIL] [SIL] [SIL] [SIL] [SIL] [SIL] [SIL] [SIL] [SIL] [SIL] [SIL] [SIL] [SIL] [SIL] [SIL] [STA] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SIL] [SIL] [SIL] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SIL] [SIL] [SIL] [SIL] [SIL] [SIL] [SIL] [END] [SIL] [SIL] [SIL] [SIL] [SIL] [SIL] [SIL] [SIL] [SIL] [SIL] [SIL] [SIL] [SIL] [SIL] [SIL] [SIL] [SIL] [SIL] [SIL] [SIL] [SIL] [SIL] [SIL] [SIL] [STA] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SIL] [SIL] [SIL] [SIL] [SIL] [SIL] [SIL] [END] [SIL] [SIL] [SIL] [SIL] [SIL] [SIL] [SIL] [SIL] [SIL] [SIL] [STA] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SIL] [SIL] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SIL] [SIL] [SIL] [SIL] [SIL] [SIL] [SIL] [END] [SIL] [SIL] [SIL] [SIL] [SIL] [SIL] [SIL] [SIL] [SIL] 
+RTF=0.00774591
+speak start: 0.32 s, end: 2.464 s | speak start: 3.296 s, end: 4.64 s | speak start: 5.408 s, end: 7.872 s | speak start: 8.192 s, end: 10.72 s   
+vad_nnet_main done!
+sr = 16000
+frame_ms = 32
+threshold = 0.5
+beam = 0.15
+min_silence_duration_ms = 200
+speech_pad_left_ms = 0
+speech_pad_right_ms = 0
+model_path = ./data/silero_vad/silero_vad.onnx
+param_path = (default)num_cpu_thread = 1(default)/workspace//PaddleSpeech/runtime/engine/vad/nnet/vad.cc(88)::SetConfig  sr=16 threshold=0.5 beam=0.15 frame_ms=32 min_silence_duration_ms=200 speech_pad_left_ms=0 speech_pad_right_ms=0[INFO] fastdeploy/runtime/runtime.cc(293)::CreateOrtBackend     Runtime initialized with Backend::ORT in Device::CPU./workspace//PaddleSpeech/runtime/engine/vad/nnet/vad.cc(137)::Initialize        init done.
+1 1 1 1 1 1 1 1 1 1 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 1 1 1 1 1 1 1 4 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 1 1 1 3 3 3 3 3 3 3 3 3 3 3 3 3 1 1 1 1 1 1 1 4 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 1 1 1 1 1 1 1 4 1 1 1 1 1 1 1 1 1 1 2 3 3 3 3 3 3 3 3 3 3 3 1 1 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 1 1 1 1 1 1 1 4 1 1 1 1 1 1 1 1 1 
+RTF=0.00778218
+vad_interface_main done!
+```
+
+## Android
+
+When to using on Android, please setup your `NDK` enverment before, then do as below:
+
+```bash
+# cd /path/to/paddlespeech/runtime
+bash build_android.sh
+```
+
+## Result
+
+| Arch | RTF | Runtime Size |
+|--|--|--|
+| x86_64    | 0.00778218 |  |
+| arm64-v8a | 0.00744745 | ~10.532MB |
+
+## Machine Information
+
+#### x86_64
+
+The environment as below:
+
+```text
+Architecture:        x86_64
+CPU op-mode(s):      32-bit, 64-bit
+Byte Order:          Little Endian
+CPU(s):              80
+On-line CPU(s) list: 0-79
+Thread(s) per core:  2
+Core(s) per socket:  20
+Socket(s):           2
+NUMA node(s):        2
+Vendor ID:           GenuineIntel
+CPU family:          6
+Model:               85
+Model name:          Intel(R) Xeon(R) Gold 6271C CPU @ 2.60GHz
+Stepping:            7
+CPU MHz:             2599.998
+BogoMIPS:            5199.99
+Hypervisor vendor:   KVM
+Virtualization type: full
+L1d cache:           32K
+L1i cache:           32K
+L2 cache:            1024K
+L3 cache:            33792K
+NUMA node0 CPU(s):   0-39
+NUMA node1 CPU(s):   40-79
+Flags:               fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ss ht syscall nx pdpe1gb rdtscp lm constant_tsc arch_perfmon rep_good nopl xtopology nonstop_tsc eagerfpu pni pclmulqdq monitor ssse3 fma cx16 pcid sse4_1 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave avx f16c rdrand hypervisor lahf_lm abm 3dnowprefetch invpcid_single ssbd ibrs ibpb ibrs_enhanced fsgsbase tsc_adjust bmi1 avx2 smep bmi2 erms invpcid mpx avx512f avx512dq rdseed adx smap clflushopt clwb avx512cd avx512bw avx512vl xsaveopt xsavec xgetbv1 arat umip pku ospke avx512_vnni spec_ctrl arch_capabilities
+```
+
+#### arm64-v8a
+
+```text
+Processor       : AArch64 Processor rev 14 (aarch64)
+processor       : 0
+BogoMIPS        : 38.40
+Features        : fp asimd evtstrm aes pmull sha1 sha2 crc32 atomics fphp asimdhp cpuid asimdrdm lrcpc dcpop
+CPU implementer : 0x51
+CPU architecture: 8
+CPU variant     : 0xd
+CPU part        : 0x805
+CPU revision    : 14
+
+processor       : 1
+BogoMIPS        : 38.40
+Features        : fp asimd evtstrm aes pmull sha1 sha2 crc32 atomics fphp asimdhp cpuid asimdrdm lrcpc dcpop
+CPU implementer : 0x51
+CPU architecture: 8
+CPU variant     : 0xd
+CPU part        : 0x805
+CPU revision    : 14
+
+processor       : 2
+BogoMIPS        : 38.40
+Features        : fp asimd evtstrm aes pmull sha1 sha2 crc32 atomics fphp asimdhp cpuid asimdrdm lrcpc dcpop
+CPU implementer : 0x51
+CPU architecture: 8
+CPU variant     : 0xd
+CPU part        : 0x805
+CPU revision    : 14
+
+processor       : 3
+BogoMIPS        : 38.40
+Features        : fp asimd evtstrm aes pmull sha1 sha2 crc32 atomics fphp asimdhp cpuid asimdrdm lrcpc dcpop
+CPU implementer : 0x51
+CPU architecture: 8
+CPU variant     : 0xd
+CPU part        : 0x805
+CPU revision    : 14
+
+processor       : 4
+BogoMIPS        : 38.40
+Features        : fp asimd evtstrm aes pmull sha1 sha2 crc32 atomics fphp asimdhp cpuid asimdrdm lrcpc dcpop
+CPU implementer : 0x51
+CPU architecture: 8
+CPU variant     : 0xd
+CPU part        : 0x804
+CPU revision    : 14
+
+processor       : 5
+BogoMIPS        : 38.40
+Features        : fp asimd evtstrm aes pmull sha1 sha2 crc32 atomics fphp asimdhp cpuid asimdrdm lrcpc dcpop
+CPU implementer : 0x51
+CPU architecture: 8
+CPU variant     : 0xd
+CPU part        : 0x804
+CPU revision    : 14
+
+processor       : 6
+BogoMIPS        : 38.40
+Features        : fp asimd evtstrm aes pmull sha1 sha2 crc32 atomics fphp asimdhp cpuid asimdrdm lrcpc dcpop
+CPU implementer : 0x51
+CPU architecture: 8
+CPU variant     : 0xd
+CPU part        : 0x804
+CPU revision    : 14
+
+processor       : 7
+BogoMIPS        : 38.40
+Features        : fp asimd evtstrm aes pmull sha1 sha2 crc32 atomics fphp asimdhp cpuid asimdrdm lrcpc dcpop
+CPU implementer : 0x51
+CPU architecture: 8
+CPU variant     : 0xd
+CPU part        : 0x804
+CPU revision    : 14
+
+Hardware        : Qualcomm Technologies, Inc SM8150
+```
+
+
+## Download Pre-trained ONNX Model
+
+For developers' testing, model exported by VAD are provided below. Developers can download them directly.
+
+| 模型                                                         | 大小  | 备注                                                         |
+| :----------------------------------------------------------- | :---- | :----------------------------------------------------------- |
+| [silero-vad](https://bj.bcebos.com/paddlehub/fastdeploy/silero_vad.tgz) | 1.8MB | This model file is sourced from [snakers4/silero-vad](https://github.com/snakers4/silero-vad)，MIT License |
+
+
+## FastDeploy Runtime
+
+For FastDeploy software and hardware requements, and pre-released library please to see [FastDeploy](https://github.com/PaddlePaddle/FastDeploy):
+
+- 1. [FastDeploy Environment Requirements](https://github.com/PaddlePaddle/FastDeploy/docs/en/build_and_install/download_prebuilt_libraries.md).
+- 2. [FastDeploy Precompiled Library](https://github.com/PaddlePaddle/FastDeploy/docs/en/build_and_install/download_prebuilt_libraries.md).
+
+
+## Reference
+* https://github.com/snakers4/silero-vad
+* https://github.com/PaddlePaddle/FastDeploy/blob/develop/examples/audio/silero-vad/README.md
--- a/runtime/examples/vad/conf/vad.ini
+++ b/runtime/examples/vad/conf/vad.ini
@ -0,0 +1,11 @@
+[model]
+model_path=./data/silero_vad/silero_vad.onnx
+
+[vad]
+sr = 16000 # 16k
+frame_ms = 32 # 32, 64, 96 for 16k
+threshold = 0.5
+beam = 0.15
+min_silence_duration_ms = 200
+speech_pad_left_ms = 0
+speech_pad_right_ms = 0
--- a/runtime/examples/silero_vad/local/build.sh
+++ b/runtime/examples/silero_vad/local/build.sh
--- a/runtime/examples/silero_vad/local/build_android.sh
+++ b/runtime/examples/silero_vad/local/build_android.sh
--- a/runtime/examples/vad/local/decode.sh
+++ b/runtime/examples/vad/local/decode.sh
@ -0,0 +1,23 @@
+#!/bin/bash
+set -e
+
+conf=conf
+data=data
+exp=exp
+
+. utils/parse_options.sh
+
+mkdir -p $exp
+ckpt_dir=$data/silero_vad
+model=$ckpt_dir/silero_vad.onnx
+test_wav=$data/silero_vad_sample.wav
+conf_file=$conf/vad.ini
+
+
+vad_nnet_main $model $test_wav
+echo "vad_nnet_main done!"
+
+vad_interface_main $conf_file $test_wav
+echo "vad_interface_main done!"
+
+
--- a/runtime/examples/silero_vad/local/download.sh
+++ b/runtime/examples/silero_vad/local/download.sh
--- a/runtime/examples/vad/path.sh
+++ b/runtime/examples/vad/path.sh
@ -0,0 +1,17 @@
+# This contains the locations of binarys build required for running the examples.
+
+unset GREP_OPTIONS
+
+ENGINE_ROOT=$PWD/../../
+ENGINE_BUILD=$ENGINE_ROOT/build/Linux/x86_64/engine/vad
+
+ENGINE_TOOLS=$ENGINE_ROOT/tools
+TOOLS_BIN=$ENGINE_TOOLS/valgrind/install/bin
+
+[ -d $ENGINE_BUILD ] || { echo "Error: 'build/runtime' directory not found. please ensure that the project build successfully"; }
+
+export LC_AL=C
+
+export PATH=$PATH:$TOOLS_BIN:$ENGINE_BUILD/nnet:$ENGINE_BUILD/interface
+
+export LD_LIBRARY_PATH=$PADDLE_LIB_PATH:$LD_LIBRARY_PATH
--- a/runtime/examples/silero_vad/run.sh
+++ b/runtime/examples/silero_vad/run.sh
@ -15,8 +15,8 @@ exp=exp
 mkdir -p $exp $data

 # 1. compile
-if [ ! -d ${SPEECHX_BUILD} ]; then
-    pushd ${SPEECHX_ROOT} 
+if [ ! -d ${ENGINE_BUILD} ]; then
+    pushd ${ENGINE_ROOT} 
    bash build.sh

    # build for android armv8/armv7
@ -24,8 +24,6 @@ if [ ! -d ${SPEECHX_BUILD} ]; then
    popd
 fi

-ckpt_dir=$data/silero_vad
-wav=$data/silero_vad_sample.wav

 if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ];then
    ./local/download.sh
--- a/runtime/examples/silero_vad/utils
+++ b/runtime/examples/silero_vad/utils