From bf914a9c8b5b01c37932cfc63fae46ce3fa83928 Mon Sep 17 00:00:00 2001 From: Hui Zhang <zhtclz@foxmail.com> Date: Mon, 13 Mar 2023 14:45:22 +0800 Subject: [PATCH] [runtime] optimization compile and add vad interface (#3026) * vad recipe ok * refactor vad, add vad conf, vad inerface, vad recipe * format * install vad lib/bin/inc * using cpack * add vad doc, fix vad state name * add comment * refactor fastdeploy download * add vad jni; format code * add timer; compute vad rtf; vad add beam param * andorid find library * fix log; add vad rtf * fix glog * fix BUILD_TYPE bug * update doc * rm jni --- runtime/CMakeLists.txt | 67 +++-- runtime/build.sh | 16 +- runtime/build_android.sh | 9 +- runtime/cmake/fastdeploy.cmake | 139 +++++++--- runtime/cmake/gflags.cmake | 2 + runtime/cmake/glog.cmake | 16 +- runtime/cmake/openfst.cmake | 10 + runtime/cmake/summary.cmake | 6 + .../ctc_prefix_beam_search_decoder_main.cc | 8 +- .../asr/decoder/ctc_tlg_decoder_main.cc | 2 +- runtime/engine/asr/nnet/nnet_producer.cc | 3 +- runtime/engine/asr/nnet/u2_nnet_main.cc | 8 +- .../engine/asr/nnet/u2_nnet_thread_main.cc | 2 +- .../recognizer/u2_recognizer_batch_main.cc | 2 +- .../asr/recognizer/u2_recognizer_main.cc | 2 +- .../recognizer/u2_recognizer_thread_main.cc | 2 +- .../server/websocket/websocket_client_main.cc | 2 +- .../server/websocket/websocket_server_main.cc | 2 +- runtime/engine/cls/nnet/CMakeLists.txt | 2 +- runtime/engine/cls/nnet/panns_interface.cc | 1 + runtime/engine/cls/nnet/panns_nnet_main.cc | 1 + runtime/engine/common/CMakeLists.txt | 6 +- runtime/engine/common/base/CMakeLists.txt | 1 + runtime/engine/common/base/basic_types.h | 2 +- runtime/engine/common/base/common.h | 3 +- runtime/engine/common/base/config.h | 13 +- runtime/engine/common/base/log_impl.cc | 40 +-- runtime/engine/common/base/log_impl.h | 55 ++-- runtime/engine/common/frontend/CMakeLists.txt | 6 +- runtime/engine/common/frontend/assembler.cc | 3 +- runtime/engine/common/frontend/fftsg.c | 24 +- runtime/engine/common/frontend/rfft.cc | 3 +- runtime/engine/common/frontend/wave-reader.cc | 10 +- runtime/engine/common/matrix/kaldi-matrix.h | 2 +- runtime/engine/common/matrix/kaldi-vector.cc | 2 + runtime/engine/common/matrix/matrix-common.h | 2 +- runtime/engine/common/utils/CMakeLists.txt | 1 + runtime/engine/common/utils/timer.cc | 63 +++++ runtime/engine/common/utils/timer.h | 39 +++ runtime/engine/vad/CMakeLists.txt | 8 +- runtime/engine/vad/{ => frontend}/wav.h | 2 + runtime/engine/vad/interface/CMakeLists.txt | 25 ++ runtime/engine/vad/interface/vad_interface.cc | 94 +++++++ runtime/engine/vad/interface/vad_interface.h | 46 +++ .../vad/interface/vad_interface_main.cc | 71 +++++ runtime/engine/vad/nnet/CMakeLists.txt | 16 ++ runtime/engine/vad/{ => nnet}/vad.cc | 93 ++++--- runtime/engine/vad/{ => nnet}/vad.h | 50 +++- .../vad_nnet_main.cc} | 33 ++- runtime/examples/silero_vad/README.md | 121 -------- runtime/examples/silero_vad/README_CN.md | 119 -------- runtime/examples/silero_vad/local/decode.sh | 0 runtime/examples/silero_vad/path.sh | 18 -- runtime/examples/u2pp_ol/wenetspeech/path.sh | 2 +- .../examples/{silero_vad => vad}/.gitignore | 0 runtime/examples/vad/README.md | 261 ++++++++++++++++++ runtime/examples/vad/conf/vad.ini | 11 + .../{silero_vad => vad}/local/build.sh | 0 .../local/build_android.sh | 0 runtime/examples/vad/local/decode.sh | 23 ++ .../{silero_vad => vad}/local/download.sh | 0 runtime/examples/vad/path.sh | 17 ++ runtime/examples/{silero_vad => vad}/run.sh | 6 +- runtime/examples/{silero_vad => vad}/utils | 0 64 files changed, 1128 insertions(+), 465 deletions(-) create mode 100644 runtime/engine/common/utils/timer.cc create mode 100644 runtime/engine/common/utils/timer.h rename runtime/engine/vad/{ => frontend}/wav.h (99%) create mode 100644 runtime/engine/vad/interface/CMakeLists.txt create mode 100644 runtime/engine/vad/interface/vad_interface.cc create mode 100644 runtime/engine/vad/interface/vad_interface.h create mode 100644 runtime/engine/vad/interface/vad_interface_main.cc create mode 100644 runtime/engine/vad/nnet/CMakeLists.txt rename runtime/engine/vad/{ => nnet}/vad.cc (80%) rename runtime/engine/vad/{ => nnet}/vad.h (78%) rename runtime/engine/vad/{silero_vad_main.cc => nnet/vad_nnet_main.cc} (58%) delete mode 100644 runtime/examples/silero_vad/README.md delete mode 100644 runtime/examples/silero_vad/README_CN.md delete mode 100755 runtime/examples/silero_vad/local/decode.sh delete mode 100644 runtime/examples/silero_vad/path.sh rename runtime/examples/{silero_vad => vad}/.gitignore (100%) create mode 100644 runtime/examples/vad/README.md create mode 100644 runtime/examples/vad/conf/vad.ini rename runtime/examples/{silero_vad => vad}/local/build.sh (100%) rename runtime/examples/{silero_vad => vad}/local/build_android.sh (100%) create mode 100755 runtime/examples/vad/local/decode.sh rename runtime/examples/{silero_vad => vad}/local/download.sh (100%) create mode 100644 runtime/examples/vad/path.sh rename runtime/examples/{silero_vad => vad}/run.sh (77%) mode change 100644 => 100755 rename runtime/examples/{silero_vad => vad}/utils (100%) diff --git a/runtime/CMakeLists.txt b/runtime/CMakeLists.txt index af970526a..efeb62188 100644 --- a/runtime/CMakeLists.txt +++ b/runtime/CMakeLists.txt @@ -1,4 +1,5 @@ -cmake_minimum_required(VERSION 3.14 FATAL_ERROR) +# >=3.17 support -DCMAKE_FIND_DEBUG_MODE=ON +cmake_minimum_required(VERSION 3.17 FATAL_ERROR) set(CMAKE_PROJECT_INCLUDE_BEFORE "${CMAKE_CURRENT_SOURCE_DIR}/cmake/EnableCMP0048.cmake") @@ -6,20 +7,12 @@ set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_CURRENT_SOURCE_DIR}/cmake") include(system) -# Ninja Generator will set CMAKE_BUILD_TYPE to Debug -if(NOT CMAKE_BUILD_TYPE) - set(CMAKE_BUILD_TYPE - "Release" - CACHE - STRING - "Choose the type of build, options are: Debug Release RelWithDebInfo MinSizeRel" - FORCE) -endif() - project(paddlespeech VERSION 0.1) -include(FetchContent) -include(ExternalProject) +set(PPS_VERSION_MAJOR 1) +set(PPS_VERSION_MINOR 0) +set(PPS_VERSION_PATCH 0) +set(PPS_VERSION "${PPS_VERSION_MAJOR}.${PPS_VERSION_MINOR}.${PPS_VERSION_PATCH}") # fc_patch dir set(FETCHCONTENT_QUIET off) @@ -27,21 +20,36 @@ get_filename_component(fc_patch "fc_patch" REALPATH BASE_DIR "${CMAKE_SOURCE_DIR set(FETCHCONTENT_BASE_DIR ${fc_patch}) set(CMAKE_VERBOSE_MAKEFILE ON) +set(CMAKE_FIND_DEBUG_MODE OFF) set(PPS_CXX_STANDARD 14) # set std-14 set(CMAKE_CXX_STANDARD ${PPS_CXX_STANDARD}) -add_compile_options(-fPIC) -# compiler option -# Keep the same with openfst, -fPIC or -fpic -set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} --std=c++14 -pthread -fPIC -O0 -Wall -g -ldl") -SET(CMAKE_CXX_FLAGS_DEBUG "$ENV{CXXFLAGS} --std=c++14 -pthread -fPIC -O0 -Wall -g -ggdb") -SET(CMAKE_CXX_FLAGS_RELEASE "$ENV{CXXFLAGS} --std=c++14 -pthread -fPIC -O3 -Wall") +# Ninja Generator will set CMAKE_BUILD_TYPE to Debug +if(NOT CMAKE_BUILD_TYPE) + set(CMAKE_BUILD_TYPE "Release" CACHE STRING "Choose the type of build, options are: Debug Release RelWithDebInfo MinSizeRel" FORCE) +endif() +# find_* e.g. find_library work when Cross-Compiling +if(ANDROID) + set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM BOTH) + set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY BOTH) + set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE BOTH) + set(CMAKE_FIND_ROOT_PATH_MODE_PACKAGE BOTH) +endif() + +# install dir into `build/install` +set(CMAKE_INSTALL_PREFIX ${CMAKE_CURRENT_BINARY_DIR}/install) + +include(FetchContent) +include(ExternalProject) ############################################################################### # Option Configurations ############################################################################### +# https://github.com/google/brotli/pull/655 +option(BUILD_SHARED_LIBS "Build shared libraries" ON) + option(WITH_ASR "build asr" ON) option(WITH_CLS "build cls" ON) option(WITH_VAD "build vad" ON) @@ -77,6 +85,7 @@ endif() ############################################################################### # Find Package ############################################################################### +# https://github.com/Kitware/CMake/blob/v3.1.0/Modules/FindThreads.cmake#L207 find_package(Threads REQUIRED) if(WITH_ASR) @@ -157,6 +166,22 @@ include(summary) ############################################################################### # Add local library ############################################################################### -set(ENGINE_ROOT ${CMAKE_CURRENT_SOURCE_DIR}/engine) +set(ENGINE_ROOT ${CMAKE_SOURCE_DIR}/engine) -add_subdirectory(engine) \ No newline at end of file +add_subdirectory(engine) + + +############################################################################### +# CPack library +############################################################################### +# build a CPack driven installer package +include (InstallRequiredSystemLibraries) +set(CPACK_PACKAGE_NAME "paddlespeech_library") +set(CPACK_PACKAGE_VENDOR "paddlespeech") +set(CPACK_PACKAGE_VERSION_MAJOR 1) +set(CPACK_PACKAGE_VERSION_MINOR 0) +set(CPACK_PACKAGE_VERSION_PATCH 0) +set(CPACK_PACKAGE_DESCRIPTION "paddlespeech library") +set(CPACK_PACKAGE_CONTACT "paddlespeech@baidu.com") +set(CPACK_SOURCE_GENERATOR "TGZ") +include (CPack) \ No newline at end of file diff --git a/runtime/build.sh b/runtime/build.sh index f7d0a2b25..4a27766a9 100755 --- a/runtime/build.sh +++ b/runtime/build.sh @@ -1,8 +1,20 @@ #!/usr/bin/env bash set -xe +BUILD_ROOT=build/Linux +BUILD_DIR=${BUILD_ROOT}/x86_64 + +mkdir -p ${BUILD_DIR} + # the build script had verified in the paddlepaddle docker image. # please follow the instruction below to install PaddlePaddle image. # https://www.paddlepaddle.org.cn/documentation/docs/zh/install/docker/linux-docker.html -cmake -B build -DWITH_ASR=ON -DWITH_CLS=OFF -DWITH_VAD=OFF -cmake --build build -j +#cmake -B build -DBUILD_SHARED_LIBS=OFF -DWITH_ASR=OFF -DWITH_CLS=OFF -DWITH_VAD=ON -DFASTDEPLOY_INSTALL_DIR=/workspace/zhanghui/paddle/FastDeploy/build/Android/arm64-v8a-api-21/install +cmake -B ${BUILD_DIR} \ + -DCMAKE_BUILD_TYPE=Release \ + -DBUILD_SHARED_LIBS=OFF \ + -DWITH_ASR=OFF \ + -DWITH_CLS=OFF \ + -DWITH_VAD=ON \ + -DFASTDEPLOY_INSTALL_DIR=/workspace/zhanghui/paddle/FastDeploy/build/Linux/x86_64/install +cmake --build ${BUILD_DIR} -j diff --git a/runtime/build_android.sh b/runtime/build_android.sh index 64a337627..ac3980a8f 100755 --- a/runtime/build_android.sh +++ b/runtime/build_android.sh @@ -14,8 +14,8 @@ TOOLCHAIN_FILE=${ANDROID_NDK}/build/cmake/android.toolchain.cmake # Create build directory BUILD_ROOT=build/Android BUILD_DIR=${BUILD_ROOT}/${ANDROID_ABI}-api-21 -#FASDEPLOY_INSTALL_DIR="${BUILD_DIR}/install" -#mkdir build && mkdir ${BUILD_ROOT} && mkdir ${BUILD_DIR} +FASTDEPLOY_INSTALL_DIR="/workspace/zhanghui/paddle/FastDeploy/build/Android/arm64-v8a-api-21/install" + mkdir -p ${BUILD_DIR} cd ${BUILD_DIR} @@ -27,10 +27,13 @@ cmake -DCMAKE_TOOLCHAIN_FILE=${TOOLCHAIN_FILE} \ -DANDROID_PLATFORM=${ANDROID_PLATFORM} \ -DANDROID_STL=${ANDROID_STL} \ -DANDROID_TOOLCHAIN=${ANDROID_TOOLCHAIN} \ + -DBUILD_SHARED_LIBS=OFF \ -DWITH_ASR=OFF \ -DWITH_CLS=OFF \ + -DWITH_VAD=ON \ + -DFASTDEPLOY_INSTALL_DIR=${FASTDEPLOY_INSTALL_DIR} \ + -DCMAKE_FIND_DEBUG_MODE=OFF \ -Wno-dev ../../.. - #-DFASTDEPLOY_INSTALL_DIR=${FASTDEPLOY_INSTALL_DIR} \ # Build FastDeploy Android C++ SDK make diff --git a/runtime/cmake/fastdeploy.cmake b/runtime/cmake/fastdeploy.cmake index 463a8e8e8..b7c9a8ddb 100644 --- a/runtime/cmake/fastdeploy.cmake +++ b/runtime/cmake/fastdeploy.cmake @@ -1,42 +1,119 @@ -set(ARCH "mserver_x86_64" CACHE STRING "Target Architecture: -android_arm, android_armv7, android_armv8, android_x86, android_x86_64, -mserver_x86_64, ubuntu_x86_64, ios_armv7, ios_armv7s, ios_armv8, ios_x86_64, ios_x86, -windows_x86") - -set(FASTDEPLOY_DIR ${CMAKE_SOURCE_DIR}/fc_patch/fastdeploy) -if(NOT EXISTS ${FASTDEPLOY_DIR}/fastdeploy-linux-x64-1.0.4.tgz) - exec_program("mkdir -p ${FASTDEPLOY_DIR} && - wget -c https://bj.bcebos.com/fastdeploy/release/cpp/fastdeploy-linux-x64-1.0.4.tgz -P ${FASTDEPLOY_DIR} && - tar xzvf ${FASTDEPLOY_DIR}/fastdeploy-linux-x64-1.0.4.tgz -C ${FASTDEPLOY_DIR} && - mv ${FASTDEPLOY_DIR}/fastdeploy-linux-x64-1.0.4 ${FASTDEPLOY_DIR}/linux-x64") -endif() +include(FetchContent) -if(NOT EXISTS ${FASTDEPLOY_DIR}/fastdeploy-android-1.0.4-shared.tgz) - exec_program("mkdir -p ${FASTDEPLOY_DIR} && - wget -c https://bj.bcebos.com/fastdeploy/release/android/fastdeploy-android-1.0.4-shared.tgz -P ${FASTDEPLOY_DIR} && - tar xzvf ${FASTDEPLOY_DIR}/fastdeploy-android-1.0.4-shared.tgz -C ${FASTDEPLOY_DIR} && - mv ${FASTDEPLOY_DIR}/fastdeploy-android-1.0.4-shared ${FASTDEPLOY_DIR}/android-armv7v8") -endif() +set(EXTERNAL_PROJECT_LOG_ARGS + LOG_DOWNLOAD 1 # Wrap download in script to log output + LOG_UPDATE 1 # Wrap update in script to log output + LOG_PATCH 1 + LOG_CONFIGURE 1# Wrap configure in script to log output + LOG_BUILD 1 # Wrap build in script to log output + LOG_INSTALL 1 + LOG_TEST 1 # Wrap test in script to log output + LOG_MERGED_STDOUTERR 1 + LOG_OUTPUT_ON_FAILURE 1 +) + +if(NOT FASTDEPLOY_INSTALL_DIR) + if(ANDROID) + FetchContent_Declare( + fastdeploy + URL https://bj.bcebos.com/fastdeploy/release/android/fastdeploy-android-1.0.4-shared.tgz + URL_HASH MD5=2a15301158e9eb157a4f11283689e7ba + ${EXTERNAL_PROJECT_LOG_ARGS} + ) + add_definitions("-DUSE_PADDLE_LITE_BAKEND") + set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -g -mfloat-abi=softfp -mfpu=vfpv3 -mfpu=neon -fPIC -pie -fPIE") + set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -g0 -O3 -mfloat-abi=softfp -mfpu=vfpv3 -mfpu=neon -fPIC -pie -fPIE") + else() # Linux + FetchContent_Declare( + fastdeploy + URL https://bj.bcebos.com/fastdeploy/release/cpp/fastdeploy-linux-x64-1.0.4.tgz + URL_HASH MD5=125df3bfce603521960cc5c8b47faab0 + ${EXTERNAL_PROJECT_LOG_ARGS} + ) + add_definitions("-DUSE_PADDLE_INFERENCE_BACKEND") + # add_definitions("-DUSE_ORT_BACKEND") + set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -msse -msse2") + set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -msse -msse2 -mavx -O3") + endif() -if(ANDROID) - set(FASTDEPLOY_INSTALL_DIR ${FASTDEPLOY_DIR}/android-armv7v8) - add_definitions("-DUSE_PADDLE_LITE_BAKEND") - set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -g -mfloat-abi=softfp -mfpu=vfpv3 -mfpu=neon -fPIC -pie -fPIE") - set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -g0 -O3 -mfloat-abi=softfp -mfpu=vfpv3 -mfpu=neon -fPIC -pie -fPIE") -elseif(UNIX) - set(FASTDEPLOY_INSTALL_DIR ${FASTDEPLOY_DIR}/linux-x64) - add_definitions("-DUSE_PADDLE_INFERENCE_BACKEND") - # add_definitions("-DUSE_ORT_BACKEND") - set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -msse -msse2") - set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -msse -msse2 -mavx -O3") + FetchContent_MakeAvailable(fastdeploy) + + set(FASTDEPLOY_INSTALL_DIR ${fc_patch}/fastdeploy-src) endif() -message(STATUS "FASTDEPLOY_INSTALL_DIR=${FASTDEPLOY_INSTALL_DIR} ${UNIX}") include(${FASTDEPLOY_INSTALL_DIR}/FastDeploy.cmake) # fix compiler flags conflict, since fastdeploy using c++11 for project +# this line must after `include(${FASTDEPLOY_INSTALL_DIR}/FastDeploy.cmake)` set(CMAKE_CXX_STANDARD ${PPS_CXX_STANDARD}) include_directories(${FASTDEPLOY_INCS}) -message(STATUS "FASTDEPLOY_INCS=${FASTDEPLOY_INCS}") \ No newline at end of file + +# install fastdeploy and dependents lib +# install_fastdeploy_libraries(${CMAKE_INSTALL_PREFIX}/${CMAKE_INSTALL_LIBDIR}) +# No dynamic libs need to install while using +# FastDeploy static lib. +if(ANDROID AND WITH_ANDROID_STATIC_LIB) + return() +endif() + +set(DYN_LIB_SUFFIX "*.so*") +if(WIN32) + set(DYN_LIB_SUFFIX "*.dll") +elseif(APPLE) + set(DYN_LIB_SUFFIX "*.dylib*") +endif() + +if(FastDeploy_DIR) + set(DYN_SEARCH_DIR ${FastDeploy_DIR}) +elseif(FASTDEPLOY_INSTALL_DIR) + set(DYN_SEARCH_DIR ${FASTDEPLOY_INSTALL_DIR}) +else() + message(FATAL_ERROR "Please set FastDeploy_DIR/FASTDEPLOY_INSTALL_DIR before call install_fastdeploy_libraries.") +endif() + +file(GLOB_RECURSE ALL_NEED_DYN_LIBS ${DYN_SEARCH_DIR}/lib/${DYN_LIB_SUFFIX}) +file(GLOB_RECURSE ALL_DEPS_DYN_LIBS ${DYN_SEARCH_DIR}/third_libs/${DYN_LIB_SUFFIX}) + +if(ENABLE_VISION) + # OpenCV + if(ANDROID) + file(GLOB_RECURSE ALL_OPENCV_DYN_LIBS ${OpenCV_NATIVE_DIR}/libs/${DYN_LIB_SUFFIX}) + else() + file(GLOB_RECURSE ALL_OPENCV_DYN_LIBS ${OpenCV_DIR}/../../${DYN_LIB_SUFFIX}) + endif() + + list(REMOVE_ITEM ALL_DEPS_DYN_LIBS ${ALL_OPENCV_DYN_LIBS}) + + if(WIN32) + file(GLOB OPENCV_DYN_LIBS ${OpenCV_DIR}/x64/vc15/bin/${DYN_LIB_SUFFIX}) + install(FILES ${OPENCV_DYN_LIBS} DESTINATION lib) + elseif(ANDROID AND (NOT WITH_ANDROID_OPENCV_STATIC)) + file(GLOB OPENCV_DYN_LIBS ${OpenCV_NATIVE_DIR}/libs/${ANDROID_ABI}/${DYN_LIB_SUFFIX}) + install(FILES ${OPENCV_DYN_LIBS} DESTINATION lib) + else() # linux/mac + file(GLOB OPENCV_DYN_LIBS ${OpenCV_DIR}/lib/${DYN_LIB_SUFFIX}) + install(FILES ${OPENCV_DYN_LIBS} DESTINATION lib) + endif() + + # FlyCV + if(ENABLE_FLYCV) + file(GLOB_RECURSE ALL_FLYCV_DYN_LIBS ${FLYCV_LIB_DIR}/${DYN_LIB_SUFFIX}) + list(REMOVE_ITEM ALL_DEPS_DYN_LIBS ${ALL_FLYCV_DYN_LIBS}) + if(ANDROID AND (NOT WITH_ANDROID_FLYCV_STATIC)) + install(FILES ${ALL_FLYCV_DYN_LIBS} DESTINATION lib) + endif() + endif() +endif() + +if(ENABLE_OPENVINO_BACKEND) + # need plugins.xml for openvino backend + set(OPENVINO_RUNTIME_BIN_DIR ${OPENVINO_DIR}/bin) + file(GLOB OPENVINO_PLUGIN_XML ${OPENVINO_RUNTIME_BIN_DIR}/*.xml) + install(FILES ${OPENVINO_PLUGIN_XML} DESTINATION lib) +endif() + +# Install other libraries +install(FILES ${ALL_NEED_DYN_LIBS} DESTINATION lib) +install(FILES ${ALL_DEPS_DYN_LIBS} DESTINATION lib) diff --git a/runtime/cmake/gflags.cmake b/runtime/cmake/gflags.cmake index d01eaf60a..8ddf66356 100644 --- a/runtime/cmake/gflags.cmake +++ b/runtime/cmake/gflags.cmake @@ -9,3 +9,5 @@ FetchContent_MakeAvailable(gflags) # openfst need include_directories(${gflags_BINARY_DIR}/include) + +install(FILES ${gflags_BINARY_DIR}/libgflags_nothreads.a DESTINATION lib) \ No newline at end of file diff --git a/runtime/cmake/glog.cmake b/runtime/cmake/glog.cmake index cbb97d2d3..51d0ef066 100644 --- a/runtime/cmake/glog.cmake +++ b/runtime/cmake/glog.cmake @@ -7,6 +7,19 @@ else() # UNIX glog URL https://paddleaudio.bj.bcebos.com/build/glog-0.4.0.zip URL_HASH SHA256=9e1b54eb2782f53cd8af107ecf08d2ab64b8d0dc2b7f5594472f3bd63ca85cdc + CMAKE_ARGS -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER} + -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER} + -DCMAKE_CXX_FLAGS=${GLOG_CMAKE_CXX_FLAGS} + -DCMAKE_CXX_FLAGS_RELEASE=${CMAKE_CXX_FLAGS_RELEASE} + -DCMAKE_CXX_FLAGS_DEBUG=${CMAKE_CXX_FLAGS_DEBUG} + -DCMAKE_C_FLAGS=${CMAKE_C_FLAGS} + -DCMAKE_C_FLAGS_DEBUG=${CMAKE_C_FLAGS_DEBUG} + -DCMAKE_C_FLAGS_RELEASE=${CMAKE_C_FLAGS_RELEASE} + -DCMAKE_POSITION_INDEPENDENT_CODE=ON + -DWITH_GFLAGS=OFF + -DBUILD_TESTING=OFF + -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} + ${EXTERNAL_OPTIONAL_ARGS} ) FetchContent_MakeAvailable(glog) include_directories(${glog_BINARY_DIR} ${glog_SOURCE_DIR}/src) @@ -15,7 +28,8 @@ endif() if(ANDROID) add_library(extern_glog INTERFACE) + add_dependencies(extern_glog gflags) else() # UNIX - add_dependencies(glog gflags) add_library(extern_glog ALIAS glog) + add_dependencies(extern_glog gflags) endif() \ No newline at end of file diff --git a/runtime/cmake/openfst.cmake b/runtime/cmake/openfst.cmake index 066971563..a859076fe 100644 --- a/runtime/cmake/openfst.cmake +++ b/runtime/cmake/openfst.cmake @@ -10,9 +10,19 @@ include(FetchContent) #Application of Automata, (CIAA 2007), volume 4783 of Lecture Notes in #Computer Science, pages 11-23. Springer, 2007. http://www.openfst.org. +set(EXTERNAL_PROJECT_LOG_ARGS + LOG_DOWNLOAD 1 # Wrap download in script to log output + LOG_UPDATE 1 # Wrap update in script to log output + LOG_CONFIGURE 1# Wrap configure in script to log output + LOG_BUILD 1 # Wrap build in script to log output + LOG_TEST 1 # Wrap test in script to log output + LOG_INSTALL 1 # Wrap install in script to log output +) + ExternalProject_Add(openfst URL https://paddleaudio.bj.bcebos.com/build/openfst_1.7.2.zip URL_HASH SHA256=ffc56931025579a8af3515741c0f3b0fc3a854c023421472c07ca0c6389c75e6 + ${EXTERNAL_PROJECT_LOG_ARGS} PREFIX ${openfst_PREFIX_DIR} SOURCE_DIR ${openfst_SOURCE_DIR} BINARY_DIR ${openfst_BINARY_DIR} diff --git a/runtime/cmake/summary.cmake b/runtime/cmake/summary.cmake index fd47c6bd4..95ee324a1 100644 --- a/runtime/cmake/summary.cmake +++ b/runtime/cmake/summary.cmake @@ -15,6 +15,7 @@ function(pps_summary) message(STATUS "") message(STATUS "*************PaddleSpeech Building Summary**********") + message(STATUS " PPS_VERSION : ${PPS_VERSION}") message(STATUS " CMake version : ${CMAKE_VERSION}") message(STATUS " CMake command : ${CMAKE_COMMAND}") message(STATUS " UNIX : ${UNIX}") @@ -24,10 +25,13 @@ function(pps_summary) message(STATUS " C++ compiler version : ${CMAKE_CXX_COMPILER_VERSION}") message(STATUS " CXX flags : ${CMAKE_CXX_FLAGS}") message(STATUS " Build type : ${CMAKE_BUILD_TYPE}") + message(STATUS " BUILD_SHARED_LIBS : ${BUILD_SHARED_LIBS}") get_directory_property(tmp DIRECTORY ${PROJECT_SOURCE_DIR} COMPILE_DEFINITIONS) message(STATUS " Compile definitions : ${tmp}") message(STATUS " CMAKE_PREFIX_PATH : ${CMAKE_PREFIX_PATH}") + message(STATUS " CMAKE_CURRENT_BINARY_DIR : ${CMAKE_CURRENT_BINARY_DIR}") message(STATUS " CMAKE_INSTALL_PREFIX : ${CMAKE_INSTALL_PREFIX}") + message(STATUS " CMAKE_INSTALL_LIBDIR : ${CMAKE_INSTALL_LIBDIR}") message(STATUS " CMAKE_MODULE_PATH : ${CMAKE_MODULE_PATH}") message(STATUS " CMAKE_SYSTEM_NAME : ${CMAKE_SYSTEM_NAME}") message(STATUS "") @@ -39,6 +43,8 @@ function(pps_summary) message(STATUS " WITH_TESTING : ${WITH_TESTING}") message(STATUS " WITH_PROFILING : ${WITH_PROFILING}") message(STATUS " FASTDEPLOY_INSTALL_DIR : ${FASTDEPLOY_INSTALL_DIR}") + message(STATUS " FASTDEPLOY_INCS : ${FASTDEPLOY_INCS}") + message(STATUS " FASTDEPLOY_LIBS : ${FASTDEPLOY_LIBS}") if(WITH_GPU) message(STATUS " CUDA_DIRECTORY : ${CUDA_DIRECTORY}") endif() diff --git a/runtime/engine/asr/decoder/ctc_prefix_beam_search_decoder_main.cc b/runtime/engine/asr/decoder/ctc_prefix_beam_search_decoder_main.cc index bd73b3aca..1673bdad1 100644 --- a/runtime/engine/asr/decoder/ctc_prefix_beam_search_decoder_main.cc +++ b/runtime/engine/asr/decoder/ctc_prefix_beam_search_decoder_main.cc @@ -12,8 +12,8 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "decoder/ctc_prefix_beam_search_decoder.h" #include "base/common.h" +#include "decoder/ctc_prefix_beam_search_decoder.h" #include "frontend/data_cache.h" #include "fst/symbol-table.h" #include "kaldi/util/table-types.h" @@ -117,9 +117,9 @@ int main(int argc, char* argv[]) { ori_feature_len - chunk_idx * chunk_stride, chunk_size); } if (this_chunk_size < receptive_field_length) { - LOG(WARNING) << "utt: " << utt << " skip last " - << this_chunk_size << " frames, expect is " - << receptive_field_length; + LOG(WARNING) + << "utt: " << utt << " skip last " << this_chunk_size + << " frames, expect is " << receptive_field_length; break; } diff --git a/runtime/engine/asr/decoder/ctc_tlg_decoder_main.cc b/runtime/engine/asr/decoder/ctc_tlg_decoder_main.cc index 148ee15e3..410574dcb 100644 --- a/runtime/engine/asr/decoder/ctc_tlg_decoder_main.cc +++ b/runtime/engine/asr/decoder/ctc_tlg_decoder_main.cc @@ -14,8 +14,8 @@ // todo refactor, repalce with gtest -#include "decoder/ctc_tlg_decoder.h" #include "base/common.h" +#include "decoder/ctc_tlg_decoder.h" #include "decoder/param.h" #include "frontend/data_cache.h" #include "kaldi/util/table-types.h" diff --git a/runtime/engine/asr/nnet/nnet_producer.cc b/runtime/engine/asr/nnet/nnet_producer.cc index 29daa709d..1e481e306 100644 --- a/runtime/engine/asr/nnet/nnet_producer.cc +++ b/runtime/engine/asr/nnet/nnet_producer.cc @@ -13,12 +13,13 @@ // limitations under the License. #include "nnet/nnet_producer.h" + #include "matrix/kaldi-matrix.h" namespace ppspeech { -using std::vector; using kaldi::BaseFloat; +using std::vector; NnetProducer::NnetProducer(std::shared_ptr<NnetBase> nnet, std::shared_ptr<FrontendInterface> frontend) diff --git a/runtime/engine/asr/nnet/u2_nnet_main.cc b/runtime/engine/asr/nnet/u2_nnet_main.cc index 699f42586..e60ae7e80 100644 --- a/runtime/engine/asr/nnet/u2_nnet_main.cc +++ b/runtime/engine/asr/nnet/u2_nnet_main.cc @@ -13,13 +13,13 @@ // limitations under the License. -#include "nnet/u2_nnet.h" #include "base/common.h" #include "decoder/param.h" #include "frontend/assembler.h" #include "frontend/data_cache.h" #include "kaldi/util/table-types.h" #include "nnet/decodable.h" +#include "nnet/u2_nnet.h" DEFINE_string(feature_rspecifier, "", "test feature rspecifier"); @@ -93,9 +93,9 @@ int main(int argc, char* argv[]) { ori_feature_len - chunk_idx * chunk_stride, chunk_size); } if (this_chunk_size < receptive_field_length) { - LOG(WARNING) << "utt: " << utt << " skip last " - << this_chunk_size << " frames, expect is " - << receptive_field_length; + LOG(WARNING) + << "utt: " << utt << " skip last " << this_chunk_size + << " frames, expect is " << receptive_field_length; break; } diff --git a/runtime/engine/asr/nnet/u2_nnet_thread_main.cc b/runtime/engine/asr/nnet/u2_nnet_thread_main.cc index 4339bdbea..c3f291ced 100644 --- a/runtime/engine/asr/nnet/u2_nnet_thread_main.cc +++ b/runtime/engine/asr/nnet/u2_nnet_thread_main.cc @@ -13,7 +13,6 @@ // limitations under the License. -#include "nnet/u2_nnet.h" #include "base/common.h" #include "decoder/param.h" #include "frontend/feature_pipeline.h" @@ -21,6 +20,7 @@ #include "kaldi/util/table-types.h" #include "nnet/decodable.h" #include "nnet/nnet_producer.h" +#include "nnet/u2_nnet.h" DEFINE_string(wav_rspecifier, "", "test wav rspecifier"); DEFINE_string(nnet_prob_wspecifier, "", "nnet porb wspecifier"); diff --git a/runtime/engine/asr/recognizer/u2_recognizer_batch_main.cc b/runtime/engine/asr/recognizer/u2_recognizer_batch_main.cc index 709e5aa62..8d1532bd1 100644 --- a/runtime/engine/asr/recognizer/u2_recognizer_batch_main.cc +++ b/runtime/engine/asr/recognizer/u2_recognizer_batch_main.cc @@ -12,7 +12,6 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "recognizer/u2_recognizer.h" #include "common/base/thread_pool.h" #include "common/utils/file_utils.h" #include "common/utils/strings.h" @@ -20,6 +19,7 @@ #include "frontend/wave-reader.h" #include "kaldi/util/table-types.h" #include "nnet/u2_nnet.h" +#include "recognizer/u2_recognizer.h" DEFINE_string(wav_rspecifier, "", "test feature rspecifier"); DEFINE_string(result_wspecifier, "", "test result wspecifier"); diff --git a/runtime/engine/asr/recognizer/u2_recognizer_main.cc b/runtime/engine/asr/recognizer/u2_recognizer_main.cc index fb37d050f..178c91db1 100644 --- a/runtime/engine/asr/recognizer/u2_recognizer_main.cc +++ b/runtime/engine/asr/recognizer/u2_recognizer_main.cc @@ -12,10 +12,10 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "recognizer/u2_recognizer.h" #include "decoder/param.h" #include "frontend/wave-reader.h" #include "kaldi/util/table-types.h" +#include "recognizer/u2_recognizer.h" DEFINE_string(wav_rspecifier, "", "test feature rspecifier"); DEFINE_string(result_wspecifier, "", "test result wspecifier"); diff --git a/runtime/engine/asr/recognizer/u2_recognizer_thread_main.cc b/runtime/engine/asr/recognizer/u2_recognizer_thread_main.cc index b86853fad..272defc60 100644 --- a/runtime/engine/asr/recognizer/u2_recognizer_thread_main.cc +++ b/runtime/engine/asr/recognizer/u2_recognizer_thread_main.cc @@ -12,10 +12,10 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "recognizer/u2_recognizer.h" #include "decoder/param.h" #include "frontend/wave-reader.h" #include "kaldi/util/table-types.h" +#include "recognizer/u2_recognizer.h" DEFINE_string(wav_rspecifier, "", "test feature rspecifier"); DEFINE_string(result_wspecifier, "", "test result wspecifier"); diff --git a/runtime/engine/asr/server/websocket/websocket_client_main.cc b/runtime/engine/asr/server/websocket/websocket_client_main.cc index 7ad36e3a5..7c5a4f2f7 100644 --- a/runtime/engine/asr/server/websocket/websocket_client_main.cc +++ b/runtime/engine/asr/server/websocket/websocket_client_main.cc @@ -12,10 +12,10 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "websocket/websocket_client.h" #include "kaldi/feat/wave-reader.h" #include "kaldi/util/kaldi-io.h" #include "kaldi/util/table-types.h" +#include "websocket/websocket_client.h" DEFINE_string(host, "127.0.0.1", "host of websocket server"); DEFINE_int32(port, 8082, "port of websocket server"); diff --git a/runtime/engine/asr/server/websocket/websocket_server_main.cc b/runtime/engine/asr/server/websocket/websocket_server_main.cc index 5f805ac9d..5c32caf27 100644 --- a/runtime/engine/asr/server/websocket/websocket_server_main.cc +++ b/runtime/engine/asr/server/websocket/websocket_server_main.cc @@ -12,8 +12,8 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "websocket/websocket_server.h" #include "decoder/param.h" +#include "websocket/websocket_server.h" DEFINE_int32(port, 8082, "websocket listening port"); diff --git a/runtime/engine/cls/nnet/CMakeLists.txt b/runtime/engine/cls/nnet/CMakeLists.txt index 27f244345..d331d31a6 100644 --- a/runtime/engine/cls/nnet/CMakeLists.txt +++ b/runtime/engine/cls/nnet/CMakeLists.txt @@ -3,7 +3,7 @@ set(srcs panns_interface.cc ) -add_library(cls SHARED ${srcs}) +add_library(cls ${srcs}) target_link_libraries(cls INTERFACE -static-libstdc++;-Wl,-Bsymbolic ${FASTDEPLOY_LIBS} kaldi-matrix kaldi-base frontend utils ) set(bin_name panns_nnet_main) diff --git a/runtime/engine/cls/nnet/panns_interface.cc b/runtime/engine/cls/nnet/panns_interface.cc index 257ee44f1..cfff3f92e 100644 --- a/runtime/engine/cls/nnet/panns_interface.cc +++ b/runtime/engine/cls/nnet/panns_interface.cc @@ -13,6 +13,7 @@ // limitations under the License. #include "cls/nnet/panns_interface.h" + #include "cls/nnet/panns_nnet.h" #include "common/base/config.h" diff --git a/runtime/engine/cls/nnet/panns_nnet_main.cc b/runtime/engine/cls/nnet/panns_nnet_main.cc index 4280d14c2..14f91fc71 100644 --- a/runtime/engine/cls/nnet/panns_nnet_main.cc +++ b/runtime/engine/cls/nnet/panns_nnet_main.cc @@ -14,6 +14,7 @@ #include <fstream> #include <string> + #include "base/flags.h" #include "cls/nnet/panns_interface.h" diff --git a/runtime/engine/common/CMakeLists.txt b/runtime/engine/common/CMakeLists.txt index a2f56f7ff..405479ae1 100644 --- a/runtime/engine/common/CMakeLists.txt +++ b/runtime/engine/common/CMakeLists.txt @@ -12,4 +12,8 @@ ${CMAKE_CURRENT_SOURCE_DIR}/frontend add_subdirectory(frontend) add_library(common INTERFACE) -add_definitions(common base utils kaldi-matrix frontend) \ No newline at end of file +target_link_libraries(common INTERFACE base utils kaldi-matrix frontend) +install(TARGETS base DESTINATION lib) +install(TARGETS utils DESTINATION lib) +install(TARGETS kaldi-matrix DESTINATION lib) +install(TARGETS frontend DESTINATION lib) \ No newline at end of file diff --git a/runtime/engine/common/base/CMakeLists.txt b/runtime/engine/common/base/CMakeLists.txt index a49b78bdd..f4171a186 100644 --- a/runtime/engine/common/base/CMakeLists.txt +++ b/runtime/engine/common/base/CMakeLists.txt @@ -36,6 +36,7 @@ if(ANDROID) glog_utils.cc ) add_library(base ${csrc}) + target_link_libraries(base gflags) else() # UNIX set(csrc) add_library(base INTERFACE) diff --git a/runtime/engine/common/base/basic_types.h b/runtime/engine/common/base/basic_types.h index c7fdc9241..2b15a61fe 100644 --- a/runtime/engine/common/base/basic_types.h +++ b/runtime/engine/common/base/basic_types.h @@ -28,7 +28,7 @@ typedef int int32; // NOLINT #if defined(__LP64__) && !defined(OS_MACOSX) && !defined(OS_OPENBSD) typedef long int64; // NOLINT #else -typedef long long int64; // NOLINT +typedef long long int64; // NOLINT #endif typedef unsigned char uint8; // NOLINT diff --git a/runtime/engine/common/base/common.h b/runtime/engine/common/base/common.h index 17560102e..b31fc53e0 100644 --- a/runtime/engine/common/base/common.h +++ b/runtime/engine/common/base/common.h @@ -50,4 +50,5 @@ #include "base/log.h" #include "base/macros.h" #include "utils/file_utils.h" -#include "utils/math.h" \ No newline at end of file +#include "utils/math.h" +#include "utils/timer.h" \ No newline at end of file diff --git a/runtime/engine/common/base/config.h b/runtime/engine/common/base/config.h index c59c3ab8b..c8eae5e28 100644 --- a/runtime/engine/common/base/config.h +++ b/runtime/engine/common/base/config.h @@ -10,11 +10,14 @@ using namespace std; #pragma once +#ifdef _MSC_VER #pragma region ParseIniFile +#endif + /* -* \brief Generic configuration Class -* -*/ + * \brief Generic configuration Class + * + */ class Config { // Data protected: @@ -32,7 +35,7 @@ class Config { std::string comment = "#"); Config(); template <class T> - T Read(const std::string& in_key) const; //!<Search for key and read value + T Read(const std::string& in_key) const; //!< Search for key and read value //! or optional default value, call //! as read<T> template <class T> @@ -335,4 +338,6 @@ void Config::ReadFile(string filename, string delimiter, string comment) { in >> (*this); } +#ifdef _MSC_VER #pragma endregion ParseIniFIle +#endif diff --git a/runtime/engine/common/base/log_impl.cc b/runtime/engine/common/base/log_impl.cc index 8286f1e70..d82955905 100644 --- a/runtime/engine/common/base/log_impl.cc +++ b/runtime/engine/common/base/log_impl.cc @@ -29,9 +29,9 @@ LogMessage::LogMessage(const char* file, bool out_to_file /* = false */) : level_(level), verbose_(verbose), out_to_file_(out_to_file) { if (FLAGS_logtostderr == 0) { - stream_ = std::shared_ptr<std::ostream>(&std::cout); + stream_ = static_cast<std::ostream*>(&std::cout); } else if (FLAGS_logtostderr == 1) { - stream_ = std::shared_ptr<std::ostream>(&std::cerr); + stream_ = static_cast<std::ostream*>(&std::cerr); } else if (out_to_file_) { // logfile lock_.lock(); @@ -46,11 +46,21 @@ LogMessage::~LogMessage() { lock_.unlock(); } - if (level_ == FATAL) { + if (verbose_ && level_ == FATAL) { std::abort(); } } +std::ostream* LogMessage::nullstream() { + thread_local static std::ofstream os; + thread_local static bool flag_set = false; + if (!flag_set) { + os.setstate(std::ios_base::badbit); + flag_set = true; + } + return &os; +} + void LogMessage::init(const char* file, int line) { time_t t = time(0); char tmp[100]; @@ -73,30 +83,20 @@ void LogMessage::init(const char* file, int line) { std::string("log." + proc_name + ".log.FATAL." + tmp + "." + pid); } - std::ofstream ofs; + thread_local static std::ofstream ofs; if (level_ == DEBUG) { - stream_ = std::make_shared<std::ofstream>( - s_debug_logfile_.c_str(), std::ios::out | std::ios::app); - // ofs.open(s_debug_logfile_.c_str(), std::ios::out | std::ios::app); + ofs.open(s_debug_logfile_.c_str(), std::ios::out | std::ios::app); } else if (level_ == INFO) { - // ofs.open(s_warning_logfile_.c_str(), std::ios::out | std::ios::app); - stream_ = std::make_shared<std::ofstream>( - s_warning_logfile_.c_str(), std::ios::out | std::ios::app); + ofs.open(s_info_logfile_.c_str(), std::ios::out | std::ios::app); } else if (level_ == WARNING) { - // ofs.open(s_warning_logfile_.c_str(), std::ios::out | std::ios::app); - stream_ = std::make_shared<std::ofstream>( - s_warning_logfile_.c_str(), std::ios::out | std::ios::app); + ofs.open(s_warning_logfile_.c_str(), std::ios::out | std::ios::app); } else if (level_ == ERROR) { - // ofs.open(s_error_logfile_.c_str(), std::ios::out | std::ios::app); - stream_ = std::make_shared<std::ofstream>( - s_error_logfile_.c_str(), std::ios::out | std::ios::app); + ofs.open(s_error_logfile_.c_str(), std::ios::out | std::ios::app); } else { - // ofs.open(s_fatal_logfile_.c_str(), std::ios::out | std::ios::app); - stream_ = std::make_shared<std::ofstream>( - s_fatal_logfile_.c_str(), std::ios::out | std::ios::app); + ofs.open(s_fatal_logfile_.c_str(), std::ios::out | std::ios::app); } - // stream_ = &ofs; + stream_ = &ofs; stream() << tmp << " " << file << " line " << line << "; "; stream() << std::flush; diff --git a/runtime/engine/common/base/log_impl.h b/runtime/engine/common/base/log_impl.h index 935736205..2cc96c45c 100644 --- a/runtime/engine/common/base/log_impl.h +++ b/runtime/engine/common/base/log_impl.h @@ -18,6 +18,9 @@ #pragma once +#include <stdlib.h> +#include <unistd.h> + #include <fstream> #include <iostream> #include <mutex> @@ -25,9 +28,6 @@ #include <string> #include <thread> -#include <stdlib.h> -#include <unistd.h> - #include "base/common.h" #include "base/macros.h" #ifndef WITH_GLOG @@ -61,13 +61,15 @@ class LogMessage { ~LogMessage(); - std::ostream& stream() { return *stream_; } + std::ostream& stream() { return verbose_ ? *stream_ : *nullstream(); } private: void init(const char* file, int line); + std::ostream* nullstream(); private: - std::shared_ptr<std::ostream> stream_; + std::ostream* stream_; + std::ostream* null_stream_; Severity level_; bool verbose_; bool out_to_file_; @@ -88,14 +90,16 @@ class LogMessage { } // namespace ppspeech -#ifndef NDEBUG -#define DLOG_DEBUG \ - ppspeech::log::LogMessage(__FILE__, __LINE__, ppspeech::log::DEBUG, false) +#ifdef NDEBUG +#define DLOG_INFO \ + ppspeech::log::LogMessage(__FILE__, __LINE__, ppspeech::log::INFO, false) +#define DLOG_WARNING \ + ppspeech::log::LogMessage(__FILE__, __LINE__, ppspeech::log::WARNING, false) +#define DLOG_ERROR \ + ppspeech::log::LogMessage(__FILE__, __LINE__, ppspeech::log::ERROR, false) +#define DLOG_FATAL \ + ppspeech::log::LogMessage(__FILE__, __LINE__, ppspeech::log::FATAL, false) #else -#define DLOG_DEBUG \ - ppspeech::log::LogMessage(__FILE__, __LINE__, ppspeech::log::DEBUG, true) -#endif - #define DLOG_INFO \ ppspeech::log::LogMessage(__FILE__, __LINE__, ppspeech::log::INFO, true) #define DLOG_WARNING \ @@ -104,17 +108,30 @@ class LogMessage { ppspeech::log::LogMessage(__FILE__, __LINE__, ppspeech::log::ERROR, true) #define DLOG_FATAL \ ppspeech::log::LogMessage(__FILE__, __LINE__, ppspeech::log::FATAL, true) +#endif -#define DLOG_0 DLOG_DEBUG -#define DLOG_1 DLOG_INFO -#define DLOG_2 DLOG_WARNING -#define DLOG_3 DLOG_ERROR -#define DLOG_4 DLOG_FATAL -#define LOG(level) DLOG_##level.stream() +#define LOG_INFO \ + ppspeech::log::LogMessage(__FILE__, __LINE__, ppspeech::log::INFO, true) +#define LOG_WARNING \ + ppspeech::log::LogMessage(__FILE__, __LINE__, ppspeech::log::WARNING, true) +#define LOG_ERROR \ + ppspeech::log::LogMessage(__FILE__, __LINE__, ppspeech::log::ERROR, true) +#define LOG_FATAL \ + ppspeech::log::LogMessage(__FILE__, __LINE__, ppspeech::log::FATAL, true) -#define VLOG(verboselevel) LOG(verboselevel) +#define LOG_0 LOG_DEBUG +#define LOG_1 LOG_INFO +#define LOG_2 LOG_WARNING +#define LOG_3 LOG_ERROR +#define LOG_4 LOG_FATAL + +#define LOG(level) LOG_##level.stream() + +#define DLOG(level) DLOG_##level.stream() + +#define VLOG(verboselevel) LOG(verboselevel) #define CHECK(exp) \ ppspeech::log::LogMessage( \ diff --git a/runtime/engine/common/frontend/CMakeLists.txt b/runtime/engine/common/frontend/CMakeLists.txt index 4ff3117c9..5d78e7ead 100644 --- a/runtime/engine/common/frontend/CMakeLists.txt +++ b/runtime/engine/common/frontend/CMakeLists.txt @@ -6,6 +6,7 @@ add_library(kaldi-native-fbank-core mel-computations.cc rfft.cc ) +target_link_libraries(kaldi-native-fbank-core PUBLIC utils base) add_library(frontend STATIC cmvn.cc @@ -15,7 +16,7 @@ add_library(frontend STATIC assembler.cc wave-reader.cc ) -target_link_libraries(frontend PUBLIC kaldi-native-fbank-core utils) +target_link_libraries(frontend PUBLIC kaldi-native-fbank-core utils base) set(BINS compute_fbank_main @@ -24,5 +25,6 @@ set(BINS foreach(bin_name IN LISTS BINS) add_executable(${bin_name} ${CMAKE_CURRENT_SOURCE_DIR}/${bin_name}.cc) target_include_directories(${bin_name} PRIVATE ${SPEECHX_ROOT} ${SPEECHX_ROOT}/kaldi) - target_link_libraries(${bin_name} PUBLIC frontend base utils kaldi-util gflags extern_glog) + # https://github.com/Kitware/CMake/blob/v3.1.0/Modules/FindThreads.cmake#L207 + target_link_libraries(${bin_name} PUBLIC frontend base utils kaldi-util gflags Threads::Threads extern_glog) endforeach() \ No newline at end of file diff --git a/runtime/engine/common/frontend/assembler.cc b/runtime/engine/common/frontend/assembler.cc index 487951cdb..70e1a43ec 100644 --- a/runtime/engine/common/frontend/assembler.cc +++ b/runtime/engine/common/frontend/assembler.cc @@ -17,9 +17,8 @@ namespace ppspeech { using kaldi::BaseFloat; -using std::vector; -using std::vector; using std::unique_ptr; +using std::vector; Assembler::Assembler(AssemblerOptions opts, unique_ptr<FrontendInterface> base_extractor) { diff --git a/runtime/engine/common/frontend/fftsg.c b/runtime/engine/common/frontend/fftsg.c index ec8217a2b..30b816049 100644 --- a/runtime/engine/common/frontend/fftsg.c +++ b/runtime/engine/common/frontend/fftsg.c @@ -821,12 +821,12 @@ void cftfsub(int n, double *a, int *ip, int nw, double *w) { } else #endif /* USE_CDFT_THREADS */ if (n > 512) { - cftrec4(n, a, nw, w); - } else if (n > 128) { - cftleaf(n, 1, a, nw, w); - } else { - cftfx41(n, a, nw, w); - } + cftrec4(n, a, nw, w); + } else if (n > 128) { + cftleaf(n, 1, a, nw, w); + } else { + cftfx41(n, a, nw, w); + } bitrv2(n, ip, a); } else if (n == 32) { cftf161(a, &w[nw - 8]); @@ -868,12 +868,12 @@ void cftbsub(int n, double *a, int *ip, int nw, double *w) { } else #endif /* USE_CDFT_THREADS */ if (n > 512) { - cftrec4(n, a, nw, w); - } else if (n > 128) { - cftleaf(n, 1, a, nw, w); - } else { - cftfx41(n, a, nw, w); - } + cftrec4(n, a, nw, w); + } else if (n > 128) { + cftleaf(n, 1, a, nw, w); + } else { + cftfx41(n, a, nw, w); + } bitrv2conj(n, ip, a); } else if (n == 32) { cftf161(a, &w[nw - 8]); diff --git a/runtime/engine/common/frontend/rfft.cc b/runtime/engine/common/frontend/rfft.cc index 8cdb634ff..9ce6a172a 100644 --- a/runtime/engine/common/frontend/rfft.cc +++ b/runtime/engine/common/frontend/rfft.cc @@ -17,12 +17,13 @@ */ #include "frontend/rfft.h" -#include "base/log.h" #include <cmath> #include <memory> #include <vector> +#include "base/log.h" + // see fftsg.c #ifdef __cplusplus extern "C" void rdft(int n, int isgn, double *a, int *ip, double *w); diff --git a/runtime/engine/common/frontend/wave-reader.cc b/runtime/engine/common/frontend/wave-reader.cc index b64dcc9e3..e94aafef9 100644 --- a/runtime/engine/common/frontend/wave-reader.cc +++ b/runtime/engine/common/frontend/wave-reader.cc @@ -19,6 +19,8 @@ // See the Apache 2 License for the specific language governing permissions and // limitations under the License. +#include "frontend/wave-reader.h" + #include <algorithm> #include <cstdio> #include <limits> @@ -27,7 +29,6 @@ #include "base/kaldi-error.h" #include "base/kaldi-utils.h" -#include "frontend/wave-reader.h" namespace kaldi { @@ -243,10 +244,9 @@ void WaveInfo::Read(std::istream &is) { << ", data chunk size: " << data_chunk_size << ". Assume 'stream mode' (reading data to EOF)."; - if (!is_stream_mode && - std::abs(static_cast<int64>(riff_chunk_read) + - static_cast<int64>(data_chunk_size) - - static_cast<int64>(riff_chunk_size)) > 1) { + if (!is_stream_mode && std::abs(static_cast<int64>(riff_chunk_read) + + static_cast<int64>(data_chunk_size) - + static_cast<int64>(riff_chunk_size)) > 1) { // We allow the size to be off by one without warning, because there is // a // weirdness in the format of RIFF files that means that the input may diff --git a/runtime/engine/common/matrix/kaldi-matrix.h b/runtime/engine/common/matrix/kaldi-matrix.h index c082a731c..d614f36f9 100644 --- a/runtime/engine/common/matrix/kaldi-matrix.h +++ b/runtime/engine/common/matrix/kaldi-matrix.h @@ -590,7 +590,7 @@ class MatrixBase { * SpMatrix and use Eig() function there, which uses eigenvalue * decomposition * directly rather than SVD. - */ + */ /// stream read. /// Use instead of stream<<*this, if you want to add to existing contents. diff --git a/runtime/engine/common/matrix/kaldi-vector.cc b/runtime/engine/common/matrix/kaldi-vector.cc index 1d0b55b96..3ab9a7ffa 100644 --- a/runtime/engine/common/matrix/kaldi-vector.cc +++ b/runtime/engine/common/matrix/kaldi-vector.cc @@ -24,8 +24,10 @@ // limitations under the License. #include "matrix/kaldi-vector.h" + #include <algorithm> #include <string> + #include "matrix/kaldi-matrix.h" namespace kaldi { diff --git a/runtime/engine/common/matrix/matrix-common.h b/runtime/engine/common/matrix/matrix-common.h index 512beb204..e915db0a7 100644 --- a/runtime/engine/common/matrix/matrix-common.h +++ b/runtime/engine/common/matrix/matrix-common.h @@ -90,7 +90,7 @@ typedef uint32 UnsignedMatrixIndexT; // typedef size_t MatrixIndexT; // typedef ssize_t SignedMatrixIndexT; // typedef size_t UnsignedMatrixIndexT; -} +} // namespace kaldi #endif // KALDI_MATRIX_MATRIX_COMMON_H_ diff --git a/runtime/engine/common/utils/CMakeLists.txt b/runtime/engine/common/utils/CMakeLists.txt index eb3c71979..14733648c 100644 --- a/runtime/engine/common/utils/CMakeLists.txt +++ b/runtime/engine/common/utils/CMakeLists.txt @@ -5,6 +5,7 @@ set(csrc math.cc strings.cc audio_process.cc + timer.cc ) add_library(utils ${csrc}) diff --git a/runtime/engine/common/utils/timer.cc b/runtime/engine/common/utils/timer.cc new file mode 100644 index 000000000..ff43cd04c --- /dev/null +++ b/runtime/engine/common/utils/timer.cc @@ -0,0 +1,63 @@ +// Copyright 2020 Xiaomi Corporation (authors: Haowen Qiu) +// Mobvoi Inc. (authors: Fangjun Kuang) +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + + +#include <chrono> + +#include "common/utils/timer.h" + +namespace ppspeech{ + +struct TimerImpl{ + TimerImpl() = default; + virtual ~TimerImpl() = default; + virtual void Reset() = 0; + // time in seconds + virtual double Elapsed() = 0; +}; + +class CpuTimerImpl : public TimerImpl { + public: + CpuTimerImpl() { Reset(); } + + using high_resolution_clock = std::chrono::high_resolution_clock; + + void Reset() override { begin_ = high_resolution_clock::now(); } + + // time in seconds + double Elapsed() override { + auto end = high_resolution_clock::now(); + auto dur = + std::chrono::duration_cast<std::chrono::microseconds>(end - begin_); + return dur.count() / 1000000.0; + } + + private: + high_resolution_clock::time_point begin_; +}; + +Timer::Timer() { + impl_ = std::make_unique<CpuTimerImpl>(); +} + +Timer::~Timer() = default; + +void Timer::Reset() const { impl_->Reset(); } + +double Timer::Elapsed() const { return impl_->Elapsed(); } + + +} //namespace ppspeech \ No newline at end of file diff --git a/runtime/engine/common/utils/timer.h b/runtime/engine/common/utils/timer.h new file mode 100644 index 000000000..6f4ae1f8d --- /dev/null +++ b/runtime/engine/common/utils/timer.h @@ -0,0 +1,39 @@ +// Copyright 2020 Xiaomi Corporation (authors: Haowen Qiu) +// Mobvoi Inc. (authors: Fangjun Kuang) +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include <memory> + +namespace ppspeech { + +struct TimerImpl; + +class Timer { + public: + Timer(); + ~Timer(); + + void Reset() const; + + // time in seconds + double Elapsed() const; + + private: + std::unique_ptr<TimerImpl> impl_; +}; + +} //namespace ppspeech \ No newline at end of file diff --git a/runtime/engine/vad/CMakeLists.txt b/runtime/engine/vad/CMakeLists.txt index 4e9f448c9..f61c5a9a8 100644 --- a/runtime/engine/vad/CMakeLists.txt +++ b/runtime/engine/vad/CMakeLists.txt @@ -1,5 +1,7 @@ +include_directories( +${CMAKE_CURRENT_SOURCE_DIR}/../ +) +add_subdirectory(nnet) -set(bin_name silero_vad_main) -add_executable(${bin_name} ${CMAKE_CURRENT_SOURCE_DIR}/${bin_name}.cc vad.cc) -target_link_libraries(${bin_name} ${FASTDEPLOY_LIBS} gflags extern_glog) +add_subdirectory(interface) \ No newline at end of file diff --git a/runtime/engine/vad/wav.h b/runtime/engine/vad/frontend/wav.h similarity index 99% rename from runtime/engine/vad/wav.h rename to runtime/engine/vad/frontend/wav.h index 6d1a6f723..f9b7bee22 100644 --- a/runtime/engine/vad/wav.h +++ b/runtime/engine/vad/frontend/wav.h @@ -17,6 +17,8 @@ #include <stdio.h> #include <stdlib.h> #include <string.h> + +#include <iostream> #include <string> namespace wav { diff --git a/runtime/engine/vad/interface/CMakeLists.txt b/runtime/engine/vad/interface/CMakeLists.txt new file mode 100644 index 000000000..307000279 --- /dev/null +++ b/runtime/engine/vad/interface/CMakeLists.txt @@ -0,0 +1,25 @@ +set(srcs + vad_interface.cc +) + +add_library(pps_vad_interface ${srcs}) +target_link_libraries(pps_vad_interface PUBLIC pps_vad extern_glog) + + +set(bin_name vad_interface_main) +add_executable(${bin_name} ${CMAKE_CURRENT_SOURCE_DIR}/${bin_name}.cc) +target_link_libraries(${bin_name} pps_vad_interface) +# set_target_properties(${bin_name} PROPERTIES PUBLIC_HEADER "vad_interface.h;../frontend/wav.h") + + +file(RELATIVE_PATH DEST_DIR ${ENGINE_ROOT} ${CMAKE_CURRENT_SOURCE_DIR}) +install(TARGETS pps_vad_interface DESTINATION lib) +install(FILES vad_interface.h DESTINATION include/${DEST_DIR}) + +install(TARGETS vad_interface_main + RUNTIME DESTINATION bin + LIBRARY DESTINATION lib + ARCHIVE DESTINATION lib + PUBLIC_HEADER DESTINATION include/${DEST_DIR} +) +install(FILES vad_interface_main.cc DESTINATION demo/${DEST_DIR}) \ No newline at end of file diff --git a/runtime/engine/vad/interface/vad_interface.cc b/runtime/engine/vad/interface/vad_interface.cc new file mode 100644 index 000000000..4c3877ff0 --- /dev/null +++ b/runtime/engine/vad/interface/vad_interface.cc @@ -0,0 +1,94 @@ +// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + + +#include "vad/interface/vad_interface.h" + +#include "common/base/config.h" +#include "vad/nnet/vad.h" + + +PPSHandle_t PPSVadCreateInstance(const char* conf_path) { + Config conf(conf_path); + ppspeech::VadNnetConf nnet_conf; + nnet_conf.sr = conf.Read("sr", 16000); + nnet_conf.frame_ms = conf.Read("frame_ms", 32); + nnet_conf.threshold = conf.Read("threshold", 0.45f); + nnet_conf.beam = conf.Read("beam", 0.15f); + nnet_conf.min_silence_duration_ms = + conf.Read("min_silence_duration_ms", 200); + nnet_conf.speech_pad_left_ms = conf.Read("speech_pad_left_ms", 0); + nnet_conf.speech_pad_right_ms = conf.Read("speech_pad_right_ms", 0); + + nnet_conf.model_file_path = conf.Read("model_path", std::string("")); + nnet_conf.param_file_path = conf.Read("param_path", std::string("")); + nnet_conf.num_cpu_thread = conf.Read("num_cpu_thread", 1); + + ppspeech::Vad* model = new ppspeech::Vad(nnet_conf.model_file_path); + + // custom config, but must be set before init + model->SetConfig(nnet_conf); + model->Init(); + + return static_cast<PPSHandle_t>(model); +} + + +int PPSVadDestroyInstance(PPSHandle_t instance) { + ppspeech::Vad* model = static_cast<ppspeech::Vad*>(instance); + if (model != nullptr) { + delete model; + model = nullptr; + } + return 0; +} + +int PPSVadChunkSizeSamples(PPSHandle_t instance) { + ppspeech::Vad* model = static_cast<ppspeech::Vad*>(instance); + if (model == nullptr) { + printf("instance is null\n"); + return -1; + } + + return model->WindowSizeSamples(); +} + +PPSVadState_t PPSVadFeedForward(PPSHandle_t instance, + float* chunk, + int num_element) { + ppspeech::Vad* model = static_cast<ppspeech::Vad*>(instance); + if (model == nullptr) { + printf("instance is null\n"); + return PPS_VAD_ILLEGAL; + } + + std::vector<float> chunk_in(chunk, chunk + num_element); + if (!model->ForwardChunk(chunk_in)) { + printf("forward chunk failed\n"); + return PPS_VAD_ILLEGAL; + } + ppspeech::Vad::State s = model->Postprocess(); + PPSVadState_t ret = (PPSVadState_t)s; + return ret; +} + +int PPSVadReset(PPSHandle_t instance) { + ppspeech::Vad* model = static_cast<ppspeech::Vad*>(instance); + if (model == nullptr) { + printf("instance is null\n"); + return -1; + } + model->Reset(); + return 0; +} \ No newline at end of file diff --git a/runtime/engine/vad/interface/vad_interface.h b/runtime/engine/vad/interface/vad_interface.h new file mode 100644 index 000000000..5d7ca7091 --- /dev/null +++ b/runtime/engine/vad/interface/vad_interface.h @@ -0,0 +1,46 @@ +// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#ifdef __cplusplus +extern "C" { +#endif + +typedef void* PPSHandle_t; + +typedef enum { + PPS_VAD_ILLEGAL = 0, // error + PPS_VAD_SIL, // silence + PPS_VAD_START, // start speech + PPS_VAD_SPEECH, // in speech + PPS_VAD_END, // end speech + PPS_VAD_NUMSTATES, // number of states +} PPSVadState_t; + +PPSHandle_t PPSVadCreateInstance(const char* conf_path); + +int PPSVadDestroyInstance(PPSHandle_t instance); + +int PPSVadReset(PPSHandle_t instance); + +int PPSVadChunkSizeSamples(PPSHandle_t instance); + +PPSVadState_t PPSVadFeedForward(PPSHandle_t instance, + float* chunk, + int num_element); + +#ifdef __cplusplus +} +#endif // __cplusplus \ No newline at end of file diff --git a/runtime/engine/vad/interface/vad_interface_main.cc b/runtime/engine/vad/interface/vad_interface_main.cc new file mode 100644 index 000000000..16059c41f --- /dev/null +++ b/runtime/engine/vad/interface/vad_interface_main.cc @@ -0,0 +1,71 @@ +// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + + +#include <iostream> +#include <vector> + +#include "common/base/common.h" +#include "vad/frontend/wav.h" +#include "vad/interface/vad_interface.h" + +int main(int argc, char* argv[]) { + if (argc < 3) { + std::cout << "Usage: vad_interface_main path/to/config path/to/audio " + "run_option, " + "e.g ./vad_interface_main config sample.wav" + << std::endl; + return -1; + } + + std::string config_path = argv[1]; + std::string audio_file = argv[2]; + + PPSHandle_t handle = PPSVadCreateInstance(config_path.c_str()); + + std::vector<float> inputWav; // [0, 1] + wav::WavReader wav_reader = wav::WavReader(audio_file); + auto sr = wav_reader.sample_rate(); + CHECK(sr == 16000) << " sr is " << sr << " expect 16000"; + + auto num_samples = wav_reader.num_samples(); + inputWav.resize(num_samples); + for (int i = 0; i < num_samples; i++) { + inputWav[i] = wav_reader.data()[i] / 32768; + } + + ppspeech::Timer timer; + int window_size_samples = PPSVadChunkSizeSamples(handle); + for (int64_t j = 0; j < num_samples; j += window_size_samples) { + auto start = j; + auto end = start + window_size_samples >= num_samples + ? num_samples + : start + window_size_samples; + auto current_chunk_size = end - start; + + std::vector<float> r{&inputWav[0] + start, &inputWav[0] + end}; + assert(r.size() == static_cast<size_t>(current_chunk_size)); + + PPSVadState_t s = PPSVadFeedForward(handle, r.data(), r.size()); + std::cout << s << " "; + } + std::cout << std::endl; + + std::cout << "RTF=" << timer.Elapsed() / double(num_samples / sr) + << std::endl; + + PPSVadReset(handle); + + return 0; +} diff --git a/runtime/engine/vad/nnet/CMakeLists.txt b/runtime/engine/vad/nnet/CMakeLists.txt new file mode 100644 index 000000000..22c9f7602 --- /dev/null +++ b/runtime/engine/vad/nnet/CMakeLists.txt @@ -0,0 +1,16 @@ +set(srcs + vad.cc +) + +add_library(pps_vad ${srcs}) +target_link_libraries(pps_vad PUBLIC ${FASTDEPLOY_LIBS} common extern_glog) + + +set(bin_name vad_nnet_main) +add_executable(${bin_name} ${CMAKE_CURRENT_SOURCE_DIR}/${bin_name}.cc) +target_link_libraries(${bin_name} pps_vad) + + +file(RELATIVE_PATH DEST_DIR ${ENGINE_ROOT} ${CMAKE_CURRENT_SOURCE_DIR}) +install(TARGETS pps_vad DESTINATION lib) +install(TARGETS extern_glog DESTINATION lib) \ No newline at end of file diff --git a/runtime/engine/vad/vad.cc b/runtime/engine/vad/nnet/vad.cc similarity index 80% rename from runtime/engine/vad/vad.cc rename to runtime/engine/vad/nnet/vad.cc index 7630b98df..0b77e632a 100644 --- a/runtime/engine/vad/vad.cc +++ b/runtime/engine/vad/nnet/vad.cc @@ -1,4 +1,5 @@ // Copyright (c) 2023 Chen Qianhe Authors. All Rights Reserved. +// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -11,20 +12,15 @@ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. -#include "vad.h" +#include "vad/nnet/vad.h" + #include <cstring> #include <iomanip> +#include "common/base/common.h" + -#ifdef NDEBUG -#define LOG_DEBUG \ - ::fastdeploy::FDLogger(true, "[DEBUG]") << __REL_FILE__ << "(" << __LINE__ \ - << ")::" << __FUNCTION__ << "\t" -#else -#define LOG_DEBUG \ - ::fastdeploy::FDLogger(false, "[DEBUG]") \ - << __REL_FILE__ << "(" << __LINE__ << ")::" << __FUNCTION__ << "\t" -#endif +namespace ppspeech { Vad::Vad(const std::string& model_file, const fastdeploy::RuntimeOption& @@ -48,18 +44,30 @@ Vad::Vad(const std::string& model_file, } void Vad::Init() { - std::call_once(init_, [&]() { initialized = Initialize(); }); + std::lock_guard<std::mutex> lock(init_lock_); + Initialize(); } std::string Vad::ModelName() const { return "VAD"; } -void Vad::SetConfig(int sr, - int frame_ms, - float threshold, - int min_silence_duration_ms, - int speech_pad_left_ms, - int speech_pad_right_ms) { - if (initialized) { +void Vad::SetConfig(const VadNnetConf conf) { + SetConfig(conf.sr, + conf.frame_ms, + conf.threshold, + conf.beam, + conf.min_silence_duration_ms, + conf.speech_pad_left_ms, + conf.speech_pad_right_ms); +} + +void Vad::SetConfig(const int& sr, + const int& frame_ms, + const float& threshold, + const float& beam, + const int& min_silence_duration_ms, + const int& speech_pad_left_ms, + const int& speech_pad_right_ms) { + if (initialized_) { fastdeploy::FDERROR << "SetConfig must be called before init" << std::endl; throw std::runtime_error("SetConfig must be called before init"); @@ -67,6 +75,7 @@ void Vad::SetConfig(int sr, sample_rate_ = sr; sr_per_ms_ = sr / 1000; threshold_ = threshold; + beam_ = beam; frame_ms_ = frame_ms; min_silence_samples_ = min_silence_duration_ms * sr_per_ms_; speech_pad_left_samples_ = speech_pad_left_ms * sr_per_ms_; @@ -76,8 +85,8 @@ void Vad::SetConfig(int sr, window_size_samples_ = frame_ms * sr_per_ms_; current_chunk_size_ = window_size_samples_; - fastdeploy::FDINFO << "sr=" << sr << " threshold=" << threshold - << " frame_ms=" << frame_ms + fastdeploy::FDINFO << "sr=" << sr_per_ms_ << " threshold=" << threshold_ + << " beam=" << beam_ << " frame_ms=" << frame_ms_ << " min_silence_duration_ms=" << min_silence_duration_ms << " speech_pad_left_ms=" << speech_pad_left_ms << " speech_pad_right_ms=" << speech_pad_right_ms; @@ -114,12 +123,17 @@ bool Vad::Initialize() { Reset(); + // InitRuntime if (!InitRuntime()) { fastdeploy::FDERROR << "Failed to initialize fastdeploy backend." << std::endl; return false; } + + initialized_ = true; + + fastdeploy::FDINFO << "init done."; return true; } @@ -162,8 +176,8 @@ const Vad::State& Vad::Postprocess() { if (outputProb_ < threshold_ && !triggerd_) { // 1. Silence - LOG_DEBUG << "{ silence: " << 1.0 * current_sample_ / sample_rate_ - << " s; prob: " << outputProb_ << " }"; + DLOG(INFO) << "{ silence: " << 1.0 * current_sample_ / sample_rate_ + << " s; prob: " << outputProb_ << " }"; states_.emplace_back(Vad::State::SIL); } else if (outputProb_ >= threshold_ && !triggerd_) { // 2. Start @@ -172,27 +186,28 @@ const Vad::State& Vad::Postprocess() { current_sample_ - current_chunk_size_ - speech_pad_left_samples_; float start_sec = 1.0 * speech_start_ / sample_rate_; speakStart_.emplace_back(start_sec); - LOG_DEBUG << "{ speech start: " << start_sec - << " s; prob: " << outputProb_ << " }"; + DLOG(INFO) << "{ speech start: " << start_sec + << " s; prob: " << outputProb_ << " }"; states_.emplace_back(Vad::State::START); - } else if (outputProb_ >= threshold_ - 0.15 && triggerd_) { + } else if (outputProb_ >= threshold_ - beam_ && triggerd_) { // 3. Continue if (temp_end_ != 0) { // speech prob relaxation, speech continues again - LOG_DEBUG << "{ speech fake end(sil < min_silence_ms) to continue: " - << 1.0 * current_sample_ / sample_rate_ - << " s; prob: " << outputProb_ << " }"; + DLOG(INFO) + << "{ speech fake end(sil < min_silence_ms) to continue: " + << 1.0 * current_sample_ / sample_rate_ + << " s; prob: " << outputProb_ << " }"; temp_end_ = 0; } else { // speech prob relaxation, keep tracking speech - LOG_DEBUG << "{ speech continue: " - << 1.0 * current_sample_ / sample_rate_ - << " s; prob: " << outputProb_ << " }"; + DLOG(INFO) << "{ speech continue: " + << 1.0 * current_sample_ / sample_rate_ + << " s; prob: " << outputProb_ << " }"; } states_.emplace_back(Vad::State::SPEECH); - } else if (outputProb_ < threshold_ - 0.15 && triggerd_) { + } else if (outputProb_ < threshold_ - beam_ && triggerd_) { // 4. End if (temp_end_ == 0) { temp_end_ = current_sample_; @@ -201,9 +216,9 @@ const Vad::State& Vad::Postprocess() { // check possible speech end if (current_sample_ - temp_end_ < min_silence_samples_) { // a. silence < min_slience_samples, continue speaking - LOG_DEBUG << "{ speech fake end(sil < min_silence_ms): " - << 1.0 * current_sample_ / sample_rate_ - << " s; prob: " << outputProb_ << " }"; + DLOG(INFO) << "{ speech fake end(sil < min_silence_ms): " + << 1.0 * current_sample_ / sample_rate_ + << " s; prob: " << outputProb_ << " }"; states_.emplace_back(Vad::State::SIL); } else { // b. silence >= min_slience_samples, end speaking @@ -212,8 +227,8 @@ const Vad::State& Vad::Postprocess() { triggerd_ = false; auto end_sec = 1.0 * speech_end_ / sample_rate_; speakEnd_.emplace_back(end_sec); - LOG_DEBUG << "{ speech end: " << end_sec - << " s; prob: " << outputProb_ << " }"; + DLOG(INFO) << "{ speech end: " << end_sec + << " s; prob: " << outputProb_ << " }"; states_.emplace_back(Vad::State::END); } } @@ -303,4 +318,6 @@ std::ostream& operator<<(std::ostream& os, const Vad::State& s) { break; } return os; -} \ No newline at end of file +} + +} // namespace ppspeech \ No newline at end of file diff --git a/runtime/engine/vad/vad.h b/runtime/engine/vad/nnet/vad.h similarity index 78% rename from runtime/engine/vad/vad.h rename to runtime/engine/vad/nnet/vad.h index 6eed7d1c3..de557ec67 100644 --- a/runtime/engine/vad/vad.h +++ b/runtime/engine/vad/nnet/vad.h @@ -1,4 +1,5 @@ // Copyright (c) 2023 Chen Qianhe Authors. All Rights Reserved. +// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -11,33 +12,59 @@ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. + #pragma once #include <iostream> #include <mutex> #include <vector> -#include "./wav.h" + #include "fastdeploy/fastdeploy_model.h" #include "fastdeploy/runtime.h" +#include "vad/frontend/wav.h" + +namespace ppspeech { + +struct VadNnetConf { + // wav + int sr; + int frame_ms; + float threshold; + float beam; + int min_silence_duration_ms; + int speech_pad_left_ms; + int speech_pad_right_ms; + + // model + std::string model_file_path; + std::string param_file_path; + std::string dict_file_path; + int num_cpu_thread; // 1 thred + std::string backend; // ort,lite, etc. +}; class Vad : public fastdeploy::FastDeployModel { public: - enum class State { SIL = 0, START, SPEECH, END }; + enum class State { ILLEGAL = 0, SIL, START, SPEECH, END }; friend std::ostream& operator<<(std::ostream& os, const Vad::State& s); Vad(const std::string& model_file, const fastdeploy::RuntimeOption& custom_option = fastdeploy::RuntimeOption()); + virtual ~Vad() {} + void Init(); void Reset(); - void SetConfig(int sr, - int frame_ms, - float threshold, - int min_silence_duration_ms, - int speech_pad_left_ms, - int speech_pad_right_ms); + void SetConfig(const int& sr, + const int& frame_ms, + const float& threshold, + const float& beam, + const int& min_silence_duration_ms, + const int& speech_pad_left_ms, + const int& speech_pad_right_ms); + void SetConfig(const VadNnetConf conf); bool ForwardChunk(std::vector<float>& chunk); @@ -78,7 +105,9 @@ class Vad : public fastdeploy::FastDeployModel { bool Initialize(); private: - std::once_flag init_; + std::mutex init_lock_; + bool initialized_{false}; + // input and output std::vector<fastdeploy::FDTensor> inputTensors_; std::vector<fastdeploy::FDTensor> outputTensors_; @@ -103,6 +132,7 @@ class Vad : public fastdeploy::FastDeployModel { int sample_rate_ = 16000; int frame_ms_ = 32; // 32, 64, 96 for 16k float threshold_ = 0.5f; + float beam_ = 0.15f; int64_t window_size_samples_; // support 256 512 768 for 8k; 512 1024 1536 // for 16k. @@ -122,3 +152,5 @@ class Vad : public fastdeploy::FastDeployModel { const std::vector<int64_t> sr_node_dims_ = {1}; const std::vector<int64_t> hc_node_dims_ = {2, 1, 64}; }; + +} // namespace ppspeech \ No newline at end of file diff --git a/runtime/engine/vad/silero_vad_main.cc b/runtime/engine/vad/nnet/vad_nnet_main.cc similarity index 58% rename from runtime/engine/vad/silero_vad_main.cc rename to runtime/engine/vad/nnet/vad_nnet_main.cc index 7fb524060..7b89d1af3 100644 --- a/runtime/engine/vad/silero_vad_main.cc +++ b/runtime/engine/vad/nnet/vad_nnet_main.cc @@ -1,11 +1,26 @@ +// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. -#include "vad.h" + +#include "common/base/common.h" +#include "vad/nnet/vad.h" int main(int argc, char* argv[]) { if (argc < 3) { - std::cout << "Usage: infer_onnx_silero_vad path/to/model path/to/audio " + std::cout << "Usage: vad_nnet_main path/to/model path/to/audio " "run_option, " - "e.g ./infer_onnx_silero_vad silero_vad.onnx sample.wav" + "e.g ./vad_nnet_main silero_vad.onnx sample.wav" << std::endl; return -1; } @@ -14,9 +29,9 @@ int main(int argc, char* argv[]) { std::string audio_file = argv[2]; int sr = 16000; - Vad vad(model_file); + ppspeech::Vad vad(model_file); // custom config, but must be set before init - vad.SetConfig(sr, 32, 0.45f, 200, 0, 0); + vad.SetConfig(sr, 32, 0.5f, 0.15, 200, 0, 0); vad.Init(); std::vector<float> inputWav; // [0, 1] @@ -30,6 +45,7 @@ int main(int argc, char* argv[]) { inputWav[i] = wav_reader.data()[i] / 32768; } + ppspeech::Timer timer; int window_size_samples = vad.WindowSizeSamples(); for (int64_t j = 0; j < num_samples; j += window_size_samples) { auto start = j; @@ -39,7 +55,7 @@ int main(int argc, char* argv[]) { auto current_chunk_size = end - start; std::vector<float> r{&inputWav[0] + start, &inputWav[0] + end}; - assert(r.size() == current_chunk_size); + assert(r.size() == static_cast<size_t>(current_chunk_size)); if (!vad.ForwardChunk(r)) { std::cerr << "Failed to inference while using model:" @@ -47,11 +63,14 @@ int main(int argc, char* argv[]) { return false; } - Vad::State s = vad.Postprocess(); + ppspeech::Vad::State s = vad.Postprocess(); std::cout << s << " "; } std::cout << std::endl; + std::cout << "RTF=" << timer.Elapsed() / double(num_samples / sr) + << std::endl; + std::vector<std::map<std::string, float>> result = vad.GetResult(); for (auto& res : result) { std::cout << "speak start: " << res["start"] diff --git a/runtime/examples/silero_vad/README.md b/runtime/examples/silero_vad/README.md deleted file mode 100644 index f032be862..000000000 --- a/runtime/examples/silero_vad/README.md +++ /dev/null @@ -1,121 +0,0 @@ -English | [简体中文](README_CN.md) - -# Silero VAD Deployment Example - -This directory provides examples that `infer_onnx_silero_vad` fast finishes the deployment of VAD models on CPU/GPU. - -Before deployment, two steps require confirmation. - -- 1. Software and hardware should meet the requirements. Please refer to [FastDeploy Environment Requirements](../../../../docs/en/build_and_install/download_prebuilt_libraries.md). -- 2. Download the precompiled deployment library and samples code according to your development environment. Refer to [FastDeploy Precompiled Library](../../../../docs/en/build_and_install/download_prebuilt_libraries.md). - -Taking VAD inference on Linux as an example, the compilation test can be completed by executing the following command in this directory. - -```bash -mkdir build -cd build -# Download the FastDeploy precompiled library. Users can choose your appropriate version in the `FastDeploy Precompiled Library` mentioned above -wget https://bj.bcebos.com/fastdeploy/release/cpp/fastdeploy-linux-x64-x.x.x.tgz -tar xvf fastdeploy-linux-x64-x.x.x.tgz -cmake .. -DFASTDEPLOY_INSTALL_DIR=${PWD}/fastdeploy-linux-x64-x.x.x -make -j - -# Download the VAD model file and test audio. After decompression, place the model and test audio in the infer_onnx_silero_vad.cc peer directory -wget https://bj.bcebos.com/paddlehub/fastdeploy/silero_vad.tgz -wget https://bj.bcebos.com/paddlehub/fastdeploy/silero_vad_sample.wav - -# inference -./infer_onnx_silero_vad ../silero_vad.onnx ../silero_vad_sample.wav -``` - -- The above command works for Linux or MacOS. Refer to: - - [How to use FastDeploy C++ SDK in Windows](../../../../docs/en/faq/use_sdk_on_windows.md) for SDK use-pattern in Windows - -## VAD C++ Interface - -### Vad Class - -```c++ -Vad::Vad(const std::string& model_file, - const fastdeploy::RuntimeOption& custom_option = fastdeploy::RuntimeOption()) -``` - -**Parameter** - -> * **model_file**(str): Model file path -> * **runtime_option**(RuntimeOption): Backend inference configuration. None by default. (use the default configuration) - -### setAudioCofig function - -**Must be called before the `init` function** - -```c++ -void Vad::setAudioCofig(int sr, int frame_ms, float threshold, int min_silence_duration_ms, int speech_pad_ms); -``` - -**Parameter** - -> * **sr**(int): sampling rate -> * **frame_ms**(int): The length of each detection frame, and it is used to calculate the detection window size -> * **threshold**(float): Result probability judgment threshold -> * **min_silence_duration_ms**(int): The threshold used to calculate whether it is silence -> * **speech_pad_ms**(int): Used to calculate the end time of the speech - -### init function - -Used to initialize audio-related parameters. - -```c++ -void Vad::init(); -``` - -### loadAudio function - -Load audio. - -```c++ -void Vad::loadAudio(const std::string& wavPath) -``` - -**Parameter** - -> * **wavPath**(str): Audio file path - -### Predict function - -Used to start model reasoning. - -```c++ -bool Vad::Predict(); -``` - -### getResult function - -**Used to obtain reasoning results** - -```c++ -std::vector<std::map<std::string, float>> Vad::getResult( - float removeThreshold = 1.6, float expandHeadThreshold = 0.32, float expandTailThreshold = 0, - float mergeThreshold = 0.3); -``` - -**Parameter** - -> * **removeThreshold**(float): Discard result fragment threshold; If some recognition results are too short, they will be discarded according to this threshold -> * **expandHeadThreshold**(float): Offset at the beginning of the segment; The recognized start time may be too close to the voice part, so move forward the start time accordingly -> * **expandTailThreshold**(float): Offset at the end of the segment; The recognized end time may be too close to the voice part, so the end time is moved back accordingly -> * **mergeThreshold**(float): Some result segments are very close and can be combined into one, and the vocal segments can be combined accordingly - -**The output result format is**`std::vector<std::map<std::string, float>>` - -> Output a list, each element is a speech fragment -> -> Each clip can use 'start' to get the start time and 'end' to get the end time - -### Tips - -1. `The setAudioCofig`function must be called before the `init` function -2. The sampling rate of the input audio file must be consistent with that set in the code - -- [Model Description](../) -- [How to switch the model inference backend engine](../../../../docs/en/faq/how_to_change_backend.md) diff --git a/runtime/examples/silero_vad/README_CN.md b/runtime/examples/silero_vad/README_CN.md deleted file mode 100644 index c45d9896c..000000000 --- a/runtime/examples/silero_vad/README_CN.md +++ /dev/null @@ -1,119 +0,0 @@ -[English](README.md) | 简体中文 -# Silero VAD 部署示例 - -本目录下提供`infer_onnx_silero_vad`快速完成 Silero VAD 模型在CPU/GPU。 - -在部署前,需确认以下两个步骤 - -- 1. 软硬件环境满足要求,参考[FastDeploy环境要求](../../../../docs/cn/build_and_install/download_prebuilt_libraries.md) -- 2. 根据开发环境,下载预编译部署库和samples代码,参考[FastDeploy预编译库](../../../../docs/cn/build_and_install/download_prebuilt_libraries.md) - -以Linux上 VAD 推理为例,在本目录执行如下命令即可完成编译测试。 - -```bash -mkdir build -cd build -# 下载FastDeploy预编译库,用户可在上文提到的`FastDeploy预编译库`中自行选择合适的版本使用 -wget https://bj.bcebos.com/fastdeploy/release/cpp/fastdeploy-linux-x64-x.x.x.tgz -tar xvf fastdeploy-linux-x64-x.x.x.tgz -cmake .. -DFASTDEPLOY_INSTALL_DIR=${PWD}/fastdeploy-linux-x64-x.x.x -make -j - -# 下载 VAD 模型文件和测试音频,解压后将模型和测试音频放置在与 infer_onnx_silero_vad.cc 同级目录下 -wget https://bj.bcebos.com/paddlehub/fastdeploy/silero_vad.tgz -wget https://bj.bcebos.com/paddlehub/fastdeploy/silero_vad_sample.wav - -# 推理 -./infer_onnx_silero_vad ../silero_vad.onnx ../silero_vad_sample.wav -``` - -以上命令只适用于Linux或MacOS, Windows下SDK的使用方式请参考: -- [如何在Windows中使用FastDeploy C++ SDK](../../../../docs/cn/faq/use_sdk_on_windows.md) - -## VAD C++ 接口 -### Vad 类 - -```c++ -Vad::Vad(const std::string& model_file, - const fastdeploy::RuntimeOption& custom_option = fastdeploy::RuntimeOption()) -``` - -**参数** - -> * **model_file**(str): 模型文件路径 -> * **runtime_option**(RuntimeOption): 后端推理配置,默认为None,即采用默认配置 - -### setAudioCofig 函数 - -**必须在`init`函数前调用** - -```c++ -void Vad::setAudioCofig(int sr, int frame_ms, float threshold, int min_silence_duration_ms, int speech_pad_ms); -``` - -**参数** - -> * **sr**(int): 采样率 -> * **frame_ms**(int): 每次检测帧长,用于计算检测窗口大小 -> * **threshold**(float): 结果概率判断阈值 -> * **min_silence_duration_ms**(int): 用于计算判断是否是 silence 的阈值 -> * **speech_pad_ms**(int): 用于计算 speach 结束时刻 - -### init 函数 - -用于初始化音频相关参数 - -```c++ -void Vad::init(); -``` - -### loadAudio 函数 - -加载音频 - -```c++ -void Vad::loadAudio(const std::string& wavPath) -``` - -**参数** - -> * **wavPath**(str): 音频文件路径 - -### Predict 函数 - -用于开始模型推理 - -```c++ -bool Vad::Predict(); -``` - -### getResult 函数 - -**用于获取推理结果** - -```c++ -std::vector<std::map<std::string, float>> Vad::getResult( - float removeThreshold = 1.6, float expandHeadThreshold = 0.32, float expandTailThreshold = 0, - float mergeThreshold = 0.3); -``` - -**参数** - -> * **removeThreshold**(float): 丢弃结果片段阈值;部分识别结果太短则根据此阈值丢弃 -> * **expandHeadThreshold**(float): 结果片段开始时刻偏移;识别到的开始时刻可能过于贴近发声部分,因此据此前移开始时刻 -> * **expandTailThreshold**(float): 结果片段结束时刻偏移;识别到的结束时刻可能过于贴近发声部分,因此据此后移结束时刻 -> * **mergeThreshold**(float): 有的结果片段十分靠近,可以合并成一个,据此合并发声片段 - -**输出结果格式为**`std::vector<std::map<std::string, float>>` - -> 输出一个列表,每个元素是一个讲话片段 -> -> 每个片段可以用 'start' 获取到开始时刻,用 'end' 获取到结束时刻 - -### 提示 - -1. `setAudioCofig`函数必须在`init`函数前调用 -2. 输入的音频文件的采样率必须与代码中设置的保持一致 - -- [模型介绍](../) -- [如何切换模型推理后端引擎](../../../../docs/cn/faq/how_to_change_backend.md) diff --git a/runtime/examples/silero_vad/local/decode.sh b/runtime/examples/silero_vad/local/decode.sh deleted file mode 100755 index e69de29bb..000000000 diff --git a/runtime/examples/silero_vad/path.sh b/runtime/examples/silero_vad/path.sh deleted file mode 100644 index ad3a73584..000000000 --- a/runtime/examples/silero_vad/path.sh +++ /dev/null @@ -1,18 +0,0 @@ -# This contains the locations of binarys build required for running the examples. - -unset GREP_OPTIONS - -ENGINE_ROOT=$PWD/../../../ -ENGINE_BUILD=$ENGINE_ROOT/build/engine/asr - -ENGINE_TOOLS=$ENGINE_ROOT/tools -TOOLS_BIN=$ENGINE_TOOLS/valgrind/install/bin - -[ -d $ENGINE_BUILD ] || { echo "Error: 'build/runtime' directory not found. please ensure that the project build successfully"; } - -export LC_AL=C - -export PATH=$PATH:$TOOLS_BIN:$ENGINE_BUILD/nnet:$ENGINE_BUILD/decoder:$ENGINE_BUILD/../common/frontend/audio:$ENGINE_BUILD/recognizer - -#PADDLE_LIB_PATH=$(python -c "import os; import paddle; include_dir=paddle.sysconfig.get_include(); paddle_dir=os.path.split(include_dir)[0]; libs_dir=os.path.join(paddle_dir, 'libs'); fluid_dir=os.path.join(paddle_dir, 'fluid'); out=':'.join([libs_dir, fluid_dir]); print(out);") -export LD_LIBRARY_PATH=$PADDLE_LIB_PATH:$LD_LIBRARY_PATH diff --git a/runtime/examples/u2pp_ol/wenetspeech/path.sh b/runtime/examples/u2pp_ol/wenetspeech/path.sh index ad3a73584..544e2048b 100644 --- a/runtime/examples/u2pp_ol/wenetspeech/path.sh +++ b/runtime/examples/u2pp_ol/wenetspeech/path.sh @@ -3,7 +3,7 @@ unset GREP_OPTIONS ENGINE_ROOT=$PWD/../../../ -ENGINE_BUILD=$ENGINE_ROOT/build/engine/asr +ENGINE_BUILD=$ENGINE_ROOT/build/Linux/x86_64/engine/asr ENGINE_TOOLS=$ENGINE_ROOT/tools TOOLS_BIN=$ENGINE_TOOLS/valgrind/install/bin diff --git a/runtime/examples/silero_vad/.gitignore b/runtime/examples/vad/.gitignore similarity index 100% rename from runtime/examples/silero_vad/.gitignore rename to runtime/examples/vad/.gitignore diff --git a/runtime/examples/vad/README.md b/runtime/examples/vad/README.md new file mode 100644 index 000000000..b521063b0 --- /dev/null +++ b/runtime/examples/vad/README.md @@ -0,0 +1,261 @@ +# Silero VAD - pre-trained enterprise-grade Voice Activity Detector + +This directory provides VAD models on CPU/GPU. + + + + +## VAD Interface + +For vad interface please see [](../../engine/vad/interface/). + +### Create Handdle + +```c++ +PPSHandle_t PPSVadCreateInstance(const char* conf_path); +``` + +### Destroy Handdle + +```c++ +int PPSVadDestroyInstance(PPSHandle_t instance); +``` + +### Reset Vad State + +```c++ +int PPSVadReset(PPSHandle_t instance); +``` + +Reset Vad state before processing next `wav`. + +### Get Chunk Size + +```c++ +int PPSVadChunkSizeSamples(PPSHandle_t instance); +``` + +This API will return chunk size in `sample` unit. +When do forward, we need feed `chunk size` samples, except last chunk. + +### Vad Forward + +```c++ +PPSVadState_t PPSVadFeedForward(PPSHandle_t instance, + float* chunk, + int num_element); +``` + +Vad has below states: +```c++ +typedef enum { + PPS_VAD_ILLEGAL = 0, // error + PPS_VAD_SIL, // silence + PPS_VAD_START, // start speech + PPS_VAD_SPEECH, // in speech + PPS_VAD_END, // end speech + PPS_VAD_NUMSTATES, // number of states +} PPSVadState_t; +``` + +If `PPSVadFeedForward` occur an error will return `PPS_VAD_ILLEGAL` state. + + +## Linux + +### Build Runtime +```bash +# cd /path/to/paddlespeech/runtime +cmake -B build -DBUILD_SHARED_LIBS=OFF -DWITH_ASR=OFF -DWITH_CLS=OFF -DWITH_VAD=ON +cmake --build build +``` + +Since VAD using FastDeploy runtime, if you have another FastDeploy Library, you can using this command to build: + +```bash +# cd /path/to/paddlespeech/runtime +cmake -B build -DBUILD_SHARED_LIBS=OFF -DWITH_ASR=OFF -DWITH_CLS=OFF -DWITH_VAD=ON -DFASTDEPLOY_INSTALL_DIR=/workspace//paddle/FastDeploy/build/Linux/x86_64/install +cmake --build build +``` + +`DFASTDEPLOY_INSTALL_DIR` is the directory of FastDeploy Library. + +### Run Demo + +After building success, we can do this to run demo under this example dir: + +```bash +bash run.sh +``` + +The output like these: + +```bash +/workspace//PaddleSpeech/runtime/engine/vad/nnet/vad.cc(88)::SetConfig sr=16 threshold=0.5 beam=0.15 frame_ms=32 min_silence_duration_ms=200 speech_pad_left_ms=0 speech_pad_right_ms=0[INFO] fastdeploy/runtime/runtime.cc(293)::CreateOrtBackend Runtime initialized with Backend::ORT in Device::CPU./workspace//PaddleSpeech/runtime/engine/vad/nnet/vad.cc(137)::Initialize init done.[SIL] [SIL] [SIL] [SIL] [SIL] [SIL] [SIL] [SIL] [SIL] [SIL] [STA] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SIL] [SIL] [SIL] [SIL] [SIL] [SIL] [SIL] [END] [SIL] [SIL] [SIL] [SIL] [SIL] [SIL] [SIL] [SIL] [SIL] [SIL] [SIL] [SIL] [SIL] [SIL] [SIL] [SIL] [SIL] [SIL] [SIL] [SIL] [SIL] [SIL] [SIL] [SIL] [SIL] [SIL] [STA] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SIL] [SIL] [SIL] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SIL] [SIL] [SIL] [SIL] [SIL] [SIL] [SIL] [END] [SIL] [SIL] [SIL] [SIL] [SIL] [SIL] [SIL] [SIL] [SIL] [SIL] [SIL] [SIL] [SIL] [SIL] [SIL] [SIL] [SIL] [SIL] [SIL] [SIL] [SIL] [SIL] [SIL] [SIL] [STA] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SIL] [SIL] [SIL] [SIL] [SIL] [SIL] [SIL] [END] [SIL] [SIL] [SIL] [SIL] [SIL] [SIL] [SIL] [SIL] [SIL] [SIL] [STA] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SIL] [SIL] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SIL] [SIL] [SIL] [SIL] [SIL] [SIL] [SIL] [END] [SIL] [SIL] [SIL] [SIL] [SIL] [SIL] [SIL] [SIL] [SIL] +RTF=0.00774591 +speak start: 0.32 s, end: 2.464 s | speak start: 3.296 s, end: 4.64 s | speak start: 5.408 s, end: 7.872 s | speak start: 8.192 s, end: 10.72 s +vad_nnet_main done! +sr = 16000 +frame_ms = 32 +threshold = 0.5 +beam = 0.15 +min_silence_duration_ms = 200 +speech_pad_left_ms = 0 +speech_pad_right_ms = 0 +model_path = ./data/silero_vad/silero_vad.onnx +param_path = (default)num_cpu_thread = 1(default)/workspace//PaddleSpeech/runtime/engine/vad/nnet/vad.cc(88)::SetConfig sr=16 threshold=0.5 beam=0.15 frame_ms=32 min_silence_duration_ms=200 speech_pad_left_ms=0 speech_pad_right_ms=0[INFO] fastdeploy/runtime/runtime.cc(293)::CreateOrtBackend Runtime initialized with Backend::ORT in Device::CPU./workspace//PaddleSpeech/runtime/engine/vad/nnet/vad.cc(137)::Initialize init done. +1 1 1 1 1 1 1 1 1 1 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 1 1 1 1 1 1 1 4 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 1 1 1 3 3 3 3 3 3 3 3 3 3 3 3 3 1 1 1 1 1 1 1 4 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 1 1 1 1 1 1 1 4 1 1 1 1 1 1 1 1 1 1 2 3 3 3 3 3 3 3 3 3 3 3 1 1 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 1 1 1 1 1 1 1 4 1 1 1 1 1 1 1 1 1 +RTF=0.00778218 +vad_interface_main done! +``` + +## Android + +When to using on Android, please setup your `NDK` enverment before, then do as below: + +```bash +# cd /path/to/paddlespeech/runtime +bash build_android.sh +``` + +## Result + +| Arch | RTF | Runtime Size | +|--|--|--| +| x86_64 | 0.00778218 | | +| arm64-v8a | 0.00744745 | ~10.532MB | + +## Machine Information + +#### x86_64 + +The environment as below: + +```text +Architecture: x86_64 +CPU op-mode(s): 32-bit, 64-bit +Byte Order: Little Endian +CPU(s): 80 +On-line CPU(s) list: 0-79 +Thread(s) per core: 2 +Core(s) per socket: 20 +Socket(s): 2 +NUMA node(s): 2 +Vendor ID: GenuineIntel +CPU family: 6 +Model: 85 +Model name: Intel(R) Xeon(R) Gold 6271C CPU @ 2.60GHz +Stepping: 7 +CPU MHz: 2599.998 +BogoMIPS: 5199.99 +Hypervisor vendor: KVM +Virtualization type: full +L1d cache: 32K +L1i cache: 32K +L2 cache: 1024K +L3 cache: 33792K +NUMA node0 CPU(s): 0-39 +NUMA node1 CPU(s): 40-79 +Flags: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ss ht syscall nx pdpe1gb rdtscp lm constant_tsc arch_perfmon rep_good nopl xtopology nonstop_tsc eagerfpu pni pclmulqdq monitor ssse3 fma cx16 pcid sse4_1 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave avx f16c rdrand hypervisor lahf_lm abm 3dnowprefetch invpcid_single ssbd ibrs ibpb ibrs_enhanced fsgsbase tsc_adjust bmi1 avx2 smep bmi2 erms invpcid mpx avx512f avx512dq rdseed adx smap clflushopt clwb avx512cd avx512bw avx512vl xsaveopt xsavec xgetbv1 arat umip pku ospke avx512_vnni spec_ctrl arch_capabilities +``` + +#### arm64-v8a + +```text +Processor : AArch64 Processor rev 14 (aarch64) +processor : 0 +BogoMIPS : 38.40 +Features : fp asimd evtstrm aes pmull sha1 sha2 crc32 atomics fphp asimdhp cpuid asimdrdm lrcpc dcpop +CPU implementer : 0x51 +CPU architecture: 8 +CPU variant : 0xd +CPU part : 0x805 +CPU revision : 14 + +processor : 1 +BogoMIPS : 38.40 +Features : fp asimd evtstrm aes pmull sha1 sha2 crc32 atomics fphp asimdhp cpuid asimdrdm lrcpc dcpop +CPU implementer : 0x51 +CPU architecture: 8 +CPU variant : 0xd +CPU part : 0x805 +CPU revision : 14 + +processor : 2 +BogoMIPS : 38.40 +Features : fp asimd evtstrm aes pmull sha1 sha2 crc32 atomics fphp asimdhp cpuid asimdrdm lrcpc dcpop +CPU implementer : 0x51 +CPU architecture: 8 +CPU variant : 0xd +CPU part : 0x805 +CPU revision : 14 + +processor : 3 +BogoMIPS : 38.40 +Features : fp asimd evtstrm aes pmull sha1 sha2 crc32 atomics fphp asimdhp cpuid asimdrdm lrcpc dcpop +CPU implementer : 0x51 +CPU architecture: 8 +CPU variant : 0xd +CPU part : 0x805 +CPU revision : 14 + +processor : 4 +BogoMIPS : 38.40 +Features : fp asimd evtstrm aes pmull sha1 sha2 crc32 atomics fphp asimdhp cpuid asimdrdm lrcpc dcpop +CPU implementer : 0x51 +CPU architecture: 8 +CPU variant : 0xd +CPU part : 0x804 +CPU revision : 14 + +processor : 5 +BogoMIPS : 38.40 +Features : fp asimd evtstrm aes pmull sha1 sha2 crc32 atomics fphp asimdhp cpuid asimdrdm lrcpc dcpop +CPU implementer : 0x51 +CPU architecture: 8 +CPU variant : 0xd +CPU part : 0x804 +CPU revision : 14 + +processor : 6 +BogoMIPS : 38.40 +Features : fp asimd evtstrm aes pmull sha1 sha2 crc32 atomics fphp asimdhp cpuid asimdrdm lrcpc dcpop +CPU implementer : 0x51 +CPU architecture: 8 +CPU variant : 0xd +CPU part : 0x804 +CPU revision : 14 + +processor : 7 +BogoMIPS : 38.40 +Features : fp asimd evtstrm aes pmull sha1 sha2 crc32 atomics fphp asimdhp cpuid asimdrdm lrcpc dcpop +CPU implementer : 0x51 +CPU architecture: 8 +CPU variant : 0xd +CPU part : 0x804 +CPU revision : 14 + +Hardware : Qualcomm Technologies, Inc SM8150 +``` + + +## Download Pre-trained ONNX Model + +For developers' testing, model exported by VAD are provided below. Developers can download them directly. + +| 模型 | 大小 | 备注 | +| :----------------------------------------------------------- | :---- | :----------------------------------------------------------- | +| [silero-vad](https://bj.bcebos.com/paddlehub/fastdeploy/silero_vad.tgz) | 1.8MB | This model file is sourced from [snakers4/silero-vad](https://github.com/snakers4/silero-vad),MIT License | + + +## FastDeploy Runtime + +For FastDeploy software and hardware requements, and pre-released library please to see [FastDeploy](https://github.com/PaddlePaddle/FastDeploy): + +- 1. [FastDeploy Environment Requirements](https://github.com/PaddlePaddle/FastDeploy/docs/en/build_and_install/download_prebuilt_libraries.md). +- 2. [FastDeploy Precompiled Library](https://github.com/PaddlePaddle/FastDeploy/docs/en/build_and_install/download_prebuilt_libraries.md). + + +## Reference +* https://github.com/snakers4/silero-vad +* https://github.com/PaddlePaddle/FastDeploy/blob/develop/examples/audio/silero-vad/README.md diff --git a/runtime/examples/vad/conf/vad.ini b/runtime/examples/vad/conf/vad.ini new file mode 100644 index 000000000..c168c73b5 --- /dev/null +++ b/runtime/examples/vad/conf/vad.ini @@ -0,0 +1,11 @@ +[model] +model_path=./data/silero_vad/silero_vad.onnx + +[vad] +sr = 16000 # 16k +frame_ms = 32 # 32, 64, 96 for 16k +threshold = 0.5 +beam = 0.15 +min_silence_duration_ms = 200 +speech_pad_left_ms = 0 +speech_pad_right_ms = 0 diff --git a/runtime/examples/silero_vad/local/build.sh b/runtime/examples/vad/local/build.sh similarity index 100% rename from runtime/examples/silero_vad/local/build.sh rename to runtime/examples/vad/local/build.sh diff --git a/runtime/examples/silero_vad/local/build_android.sh b/runtime/examples/vad/local/build_android.sh similarity index 100% rename from runtime/examples/silero_vad/local/build_android.sh rename to runtime/examples/vad/local/build_android.sh diff --git a/runtime/examples/vad/local/decode.sh b/runtime/examples/vad/local/decode.sh new file mode 100755 index 000000000..ff0a0d447 --- /dev/null +++ b/runtime/examples/vad/local/decode.sh @@ -0,0 +1,23 @@ +#!/bin/bash +set -e + +conf=conf +data=data +exp=exp + +. utils/parse_options.sh + +mkdir -p $exp +ckpt_dir=$data/silero_vad +model=$ckpt_dir/silero_vad.onnx +test_wav=$data/silero_vad_sample.wav +conf_file=$conf/vad.ini + + +vad_nnet_main $model $test_wav +echo "vad_nnet_main done!" + +vad_interface_main $conf_file $test_wav +echo "vad_interface_main done!" + + diff --git a/runtime/examples/silero_vad/local/download.sh b/runtime/examples/vad/local/download.sh similarity index 100% rename from runtime/examples/silero_vad/local/download.sh rename to runtime/examples/vad/local/download.sh diff --git a/runtime/examples/vad/path.sh b/runtime/examples/vad/path.sh new file mode 100644 index 000000000..b49911113 --- /dev/null +++ b/runtime/examples/vad/path.sh @@ -0,0 +1,17 @@ +# This contains the locations of binarys build required for running the examples. + +unset GREP_OPTIONS + +ENGINE_ROOT=$PWD/../../ +ENGINE_BUILD=$ENGINE_ROOT/build/Linux/x86_64/engine/vad + +ENGINE_TOOLS=$ENGINE_ROOT/tools +TOOLS_BIN=$ENGINE_TOOLS/valgrind/install/bin + +[ -d $ENGINE_BUILD ] || { echo "Error: 'build/runtime' directory not found. please ensure that the project build successfully"; } + +export LC_AL=C + +export PATH=$PATH:$TOOLS_BIN:$ENGINE_BUILD/nnet:$ENGINE_BUILD/interface + +export LD_LIBRARY_PATH=$PADDLE_LIB_PATH:$LD_LIBRARY_PATH diff --git a/runtime/examples/silero_vad/run.sh b/runtime/examples/vad/run.sh old mode 100644 new mode 100755 similarity index 77% rename from runtime/examples/silero_vad/run.sh rename to runtime/examples/vad/run.sh index 9707df1bb..606a44f8c --- a/runtime/examples/silero_vad/run.sh +++ b/runtime/examples/vad/run.sh @@ -15,8 +15,8 @@ exp=exp mkdir -p $exp $data # 1. compile -if [ ! -d ${SPEECHX_BUILD} ]; then - pushd ${SPEECHX_ROOT} +if [ ! -d ${ENGINE_BUILD} ]; then + pushd ${ENGINE_ROOT} bash build.sh # build for android armv8/armv7 @@ -24,8 +24,6 @@ if [ ! -d ${SPEECHX_BUILD} ]; then popd fi -ckpt_dir=$data/silero_vad -wav=$data/silero_vad_sample.wav if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ];then ./local/download.sh diff --git a/runtime/examples/silero_vad/utils b/runtime/examples/vad/utils similarity index 100% rename from runtime/examples/silero_vad/utils rename to runtime/examples/vad/utils