[runtime] optimization compile and add vad interface (#3026)
* vad recipe ok * refactor vad, add vad conf, vad inerface, vad recipe * format * install vad lib/bin/inc * using cpack * add vad doc, fix vad state name * add comment * refactor fastdeploy download * add vad jni; format code * add timer; compute vad rtf; vad add beam param * andorid find library * fix log; add vad rtf * fix glog * fix BUILD_TYPE bug * update doc * rm jnipull/3050/head
parent
2beb7ffce0
commit
bf914a9c8b
@ -1,8 +1,20 @@
|
||||
#!/usr/bin/env bash
|
||||
set -xe
|
||||
|
||||
BUILD_ROOT=build/Linux
|
||||
BUILD_DIR=${BUILD_ROOT}/x86_64
|
||||
|
||||
mkdir -p ${BUILD_DIR}
|
||||
|
||||
# the build script had verified in the paddlepaddle docker image.
|
||||
# please follow the instruction below to install PaddlePaddle image.
|
||||
# https://www.paddlepaddle.org.cn/documentation/docs/zh/install/docker/linux-docker.html
|
||||
cmake -B build -DWITH_ASR=ON -DWITH_CLS=OFF -DWITH_VAD=OFF
|
||||
cmake --build build -j
|
||||
#cmake -B build -DBUILD_SHARED_LIBS=OFF -DWITH_ASR=OFF -DWITH_CLS=OFF -DWITH_VAD=ON -DFASTDEPLOY_INSTALL_DIR=/workspace/zhanghui/paddle/FastDeploy/build/Android/arm64-v8a-api-21/install
|
||||
cmake -B ${BUILD_DIR} \
|
||||
-DCMAKE_BUILD_TYPE=Release \
|
||||
-DBUILD_SHARED_LIBS=OFF \
|
||||
-DWITH_ASR=OFF \
|
||||
-DWITH_CLS=OFF \
|
||||
-DWITH_VAD=ON \
|
||||
-DFASTDEPLOY_INSTALL_DIR=/workspace/zhanghui/paddle/FastDeploy/build/Linux/x86_64/install
|
||||
cmake --build ${BUILD_DIR} -j
|
||||
|
@ -1,42 +1,119 @@
|
||||
set(ARCH "mserver_x86_64" CACHE STRING "Target Architecture:
|
||||
android_arm, android_armv7, android_armv8, android_x86, android_x86_64,
|
||||
mserver_x86_64, ubuntu_x86_64, ios_armv7, ios_armv7s, ios_armv8, ios_x86_64, ios_x86,
|
||||
windows_x86")
|
||||
|
||||
set(FASTDEPLOY_DIR ${CMAKE_SOURCE_DIR}/fc_patch/fastdeploy)
|
||||
if(NOT EXISTS ${FASTDEPLOY_DIR}/fastdeploy-linux-x64-1.0.4.tgz)
|
||||
exec_program("mkdir -p ${FASTDEPLOY_DIR} &&
|
||||
wget -c https://bj.bcebos.com/fastdeploy/release/cpp/fastdeploy-linux-x64-1.0.4.tgz -P ${FASTDEPLOY_DIR} &&
|
||||
tar xzvf ${FASTDEPLOY_DIR}/fastdeploy-linux-x64-1.0.4.tgz -C ${FASTDEPLOY_DIR} &&
|
||||
mv ${FASTDEPLOY_DIR}/fastdeploy-linux-x64-1.0.4 ${FASTDEPLOY_DIR}/linux-x64")
|
||||
endif()
|
||||
include(FetchContent)
|
||||
|
||||
if(NOT EXISTS ${FASTDEPLOY_DIR}/fastdeploy-android-1.0.4-shared.tgz)
|
||||
exec_program("mkdir -p ${FASTDEPLOY_DIR} &&
|
||||
wget -c https://bj.bcebos.com/fastdeploy/release/android/fastdeploy-android-1.0.4-shared.tgz -P ${FASTDEPLOY_DIR} &&
|
||||
tar xzvf ${FASTDEPLOY_DIR}/fastdeploy-android-1.0.4-shared.tgz -C ${FASTDEPLOY_DIR} &&
|
||||
mv ${FASTDEPLOY_DIR}/fastdeploy-android-1.0.4-shared ${FASTDEPLOY_DIR}/android-armv7v8")
|
||||
endif()
|
||||
set(EXTERNAL_PROJECT_LOG_ARGS
|
||||
LOG_DOWNLOAD 1 # Wrap download in script to log output
|
||||
LOG_UPDATE 1 # Wrap update in script to log output
|
||||
LOG_PATCH 1
|
||||
LOG_CONFIGURE 1# Wrap configure in script to log output
|
||||
LOG_BUILD 1 # Wrap build in script to log output
|
||||
LOG_INSTALL 1
|
||||
LOG_TEST 1 # Wrap test in script to log output
|
||||
LOG_MERGED_STDOUTERR 1
|
||||
LOG_OUTPUT_ON_FAILURE 1
|
||||
)
|
||||
|
||||
if(NOT FASTDEPLOY_INSTALL_DIR)
|
||||
if(ANDROID)
|
||||
FetchContent_Declare(
|
||||
fastdeploy
|
||||
URL https://bj.bcebos.com/fastdeploy/release/android/fastdeploy-android-1.0.4-shared.tgz
|
||||
URL_HASH MD5=2a15301158e9eb157a4f11283689e7ba
|
||||
${EXTERNAL_PROJECT_LOG_ARGS}
|
||||
)
|
||||
add_definitions("-DUSE_PADDLE_LITE_BAKEND")
|
||||
set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -g -mfloat-abi=softfp -mfpu=vfpv3 -mfpu=neon -fPIC -pie -fPIE")
|
||||
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -g0 -O3 -mfloat-abi=softfp -mfpu=vfpv3 -mfpu=neon -fPIC -pie -fPIE")
|
||||
else() # Linux
|
||||
FetchContent_Declare(
|
||||
fastdeploy
|
||||
URL https://bj.bcebos.com/fastdeploy/release/cpp/fastdeploy-linux-x64-1.0.4.tgz
|
||||
URL_HASH MD5=125df3bfce603521960cc5c8b47faab0
|
||||
${EXTERNAL_PROJECT_LOG_ARGS}
|
||||
)
|
||||
|
||||
add_definitions("-DUSE_PADDLE_INFERENCE_BACKEND")
|
||||
# add_definitions("-DUSE_ORT_BACKEND")
|
||||
set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -msse -msse2")
|
||||
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -msse -msse2 -mavx -O3")
|
||||
endif()
|
||||
|
||||
if(ANDROID)
|
||||
set(FASTDEPLOY_INSTALL_DIR ${FASTDEPLOY_DIR}/android-armv7v8)
|
||||
add_definitions("-DUSE_PADDLE_LITE_BAKEND")
|
||||
set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -g -mfloat-abi=softfp -mfpu=vfpv3 -mfpu=neon -fPIC -pie -fPIE")
|
||||
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -g0 -O3 -mfloat-abi=softfp -mfpu=vfpv3 -mfpu=neon -fPIC -pie -fPIE")
|
||||
elseif(UNIX)
|
||||
set(FASTDEPLOY_INSTALL_DIR ${FASTDEPLOY_DIR}/linux-x64)
|
||||
add_definitions("-DUSE_PADDLE_INFERENCE_BACKEND")
|
||||
# add_definitions("-DUSE_ORT_BACKEND")
|
||||
set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -msse -msse2")
|
||||
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -msse -msse2 -mavx -O3")
|
||||
FetchContent_MakeAvailable(fastdeploy)
|
||||
|
||||
set(FASTDEPLOY_INSTALL_DIR ${fc_patch}/fastdeploy-src)
|
||||
endif()
|
||||
|
||||
message(STATUS "FASTDEPLOY_INSTALL_DIR=${FASTDEPLOY_INSTALL_DIR} ${UNIX}")
|
||||
include(${FASTDEPLOY_INSTALL_DIR}/FastDeploy.cmake)
|
||||
|
||||
# fix compiler flags conflict, since fastdeploy using c++11 for project
|
||||
# this line must after `include(${FASTDEPLOY_INSTALL_DIR}/FastDeploy.cmake)`
|
||||
set(CMAKE_CXX_STANDARD ${PPS_CXX_STANDARD})
|
||||
|
||||
include_directories(${FASTDEPLOY_INCS})
|
||||
message(STATUS "FASTDEPLOY_INCS=${FASTDEPLOY_INCS}")
|
||||
|
||||
# install fastdeploy and dependents lib
|
||||
# install_fastdeploy_libraries(${CMAKE_INSTALL_PREFIX}/${CMAKE_INSTALL_LIBDIR})
|
||||
# No dynamic libs need to install while using
|
||||
# FastDeploy static lib.
|
||||
if(ANDROID AND WITH_ANDROID_STATIC_LIB)
|
||||
return()
|
||||
endif()
|
||||
|
||||
set(DYN_LIB_SUFFIX "*.so*")
|
||||
if(WIN32)
|
||||
set(DYN_LIB_SUFFIX "*.dll")
|
||||
elseif(APPLE)
|
||||
set(DYN_LIB_SUFFIX "*.dylib*")
|
||||
endif()
|
||||
|
||||
if(FastDeploy_DIR)
|
||||
set(DYN_SEARCH_DIR ${FastDeploy_DIR})
|
||||
elseif(FASTDEPLOY_INSTALL_DIR)
|
||||
set(DYN_SEARCH_DIR ${FASTDEPLOY_INSTALL_DIR})
|
||||
else()
|
||||
message(FATAL_ERROR "Please set FastDeploy_DIR/FASTDEPLOY_INSTALL_DIR before call install_fastdeploy_libraries.")
|
||||
endif()
|
||||
|
||||
file(GLOB_RECURSE ALL_NEED_DYN_LIBS ${DYN_SEARCH_DIR}/lib/${DYN_LIB_SUFFIX})
|
||||
file(GLOB_RECURSE ALL_DEPS_DYN_LIBS ${DYN_SEARCH_DIR}/third_libs/${DYN_LIB_SUFFIX})
|
||||
|
||||
if(ENABLE_VISION)
|
||||
# OpenCV
|
||||
if(ANDROID)
|
||||
file(GLOB_RECURSE ALL_OPENCV_DYN_LIBS ${OpenCV_NATIVE_DIR}/libs/${DYN_LIB_SUFFIX})
|
||||
else()
|
||||
file(GLOB_RECURSE ALL_OPENCV_DYN_LIBS ${OpenCV_DIR}/../../${DYN_LIB_SUFFIX})
|
||||
endif()
|
||||
|
||||
list(REMOVE_ITEM ALL_DEPS_DYN_LIBS ${ALL_OPENCV_DYN_LIBS})
|
||||
|
||||
if(WIN32)
|
||||
file(GLOB OPENCV_DYN_LIBS ${OpenCV_DIR}/x64/vc15/bin/${DYN_LIB_SUFFIX})
|
||||
install(FILES ${OPENCV_DYN_LIBS} DESTINATION lib)
|
||||
elseif(ANDROID AND (NOT WITH_ANDROID_OPENCV_STATIC))
|
||||
file(GLOB OPENCV_DYN_LIBS ${OpenCV_NATIVE_DIR}/libs/${ANDROID_ABI}/${DYN_LIB_SUFFIX})
|
||||
install(FILES ${OPENCV_DYN_LIBS} DESTINATION lib)
|
||||
else() # linux/mac
|
||||
file(GLOB OPENCV_DYN_LIBS ${OpenCV_DIR}/lib/${DYN_LIB_SUFFIX})
|
||||
install(FILES ${OPENCV_DYN_LIBS} DESTINATION lib)
|
||||
endif()
|
||||
|
||||
# FlyCV
|
||||
if(ENABLE_FLYCV)
|
||||
file(GLOB_RECURSE ALL_FLYCV_DYN_LIBS ${FLYCV_LIB_DIR}/${DYN_LIB_SUFFIX})
|
||||
list(REMOVE_ITEM ALL_DEPS_DYN_LIBS ${ALL_FLYCV_DYN_LIBS})
|
||||
if(ANDROID AND (NOT WITH_ANDROID_FLYCV_STATIC))
|
||||
install(FILES ${ALL_FLYCV_DYN_LIBS} DESTINATION lib)
|
||||
endif()
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if(ENABLE_OPENVINO_BACKEND)
|
||||
# need plugins.xml for openvino backend
|
||||
set(OPENVINO_RUNTIME_BIN_DIR ${OPENVINO_DIR}/bin)
|
||||
file(GLOB OPENVINO_PLUGIN_XML ${OPENVINO_RUNTIME_BIN_DIR}/*.xml)
|
||||
install(FILES ${OPENVINO_PLUGIN_XML} DESTINATION lib)
|
||||
endif()
|
||||
|
||||
# Install other libraries
|
||||
install(FILES ${ALL_NEED_DYN_LIBS} DESTINATION lib)
|
||||
install(FILES ${ALL_DEPS_DYN_LIBS} DESTINATION lib)
|
||||
|
@ -0,0 +1,63 @@
|
||||
// Copyright 2020 Xiaomi Corporation (authors: Haowen Qiu)
|
||||
// Mobvoi Inc. (authors: Fangjun Kuang)
|
||||
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
|
||||
#include <chrono>
|
||||
|
||||
#include "common/utils/timer.h"
|
||||
|
||||
namespace ppspeech{
|
||||
|
||||
struct TimerImpl{
|
||||
TimerImpl() = default;
|
||||
virtual ~TimerImpl() = default;
|
||||
virtual void Reset() = 0;
|
||||
// time in seconds
|
||||
virtual double Elapsed() = 0;
|
||||
};
|
||||
|
||||
class CpuTimerImpl : public TimerImpl {
|
||||
public:
|
||||
CpuTimerImpl() { Reset(); }
|
||||
|
||||
using high_resolution_clock = std::chrono::high_resolution_clock;
|
||||
|
||||
void Reset() override { begin_ = high_resolution_clock::now(); }
|
||||
|
||||
// time in seconds
|
||||
double Elapsed() override {
|
||||
auto end = high_resolution_clock::now();
|
||||
auto dur =
|
||||
std::chrono::duration_cast<std::chrono::microseconds>(end - begin_);
|
||||
return dur.count() / 1000000.0;
|
||||
}
|
||||
|
||||
private:
|
||||
high_resolution_clock::time_point begin_;
|
||||
};
|
||||
|
||||
Timer::Timer() {
|
||||
impl_ = std::make_unique<CpuTimerImpl>();
|
||||
}
|
||||
|
||||
Timer::~Timer() = default;
|
||||
|
||||
void Timer::Reset() const { impl_->Reset(); }
|
||||
|
||||
double Timer::Elapsed() const { return impl_->Elapsed(); }
|
||||
|
||||
|
||||
} //namespace ppspeech
|
@ -0,0 +1,39 @@
|
||||
// Copyright 2020 Xiaomi Corporation (authors: Haowen Qiu)
|
||||
// Mobvoi Inc. (authors: Fangjun Kuang)
|
||||
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <memory>
|
||||
|
||||
namespace ppspeech {
|
||||
|
||||
struct TimerImpl;
|
||||
|
||||
class Timer {
|
||||
public:
|
||||
Timer();
|
||||
~Timer();
|
||||
|
||||
void Reset() const;
|
||||
|
||||
// time in seconds
|
||||
double Elapsed() const;
|
||||
|
||||
private:
|
||||
std::unique_ptr<TimerImpl> impl_;
|
||||
};
|
||||
|
||||
} //namespace ppspeech
|
@ -1,5 +1,7 @@
|
||||
include_directories(
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/../
|
||||
)
|
||||
|
||||
add_subdirectory(nnet)
|
||||
|
||||
set(bin_name silero_vad_main)
|
||||
add_executable(${bin_name} ${CMAKE_CURRENT_SOURCE_DIR}/${bin_name}.cc vad.cc)
|
||||
target_link_libraries(${bin_name} ${FASTDEPLOY_LIBS} gflags extern_glog)
|
||||
add_subdirectory(interface)
|
@ -0,0 +1,25 @@
|
||||
set(srcs
|
||||
vad_interface.cc
|
||||
)
|
||||
|
||||
add_library(pps_vad_interface ${srcs})
|
||||
target_link_libraries(pps_vad_interface PUBLIC pps_vad extern_glog)
|
||||
|
||||
|
||||
set(bin_name vad_interface_main)
|
||||
add_executable(${bin_name} ${CMAKE_CURRENT_SOURCE_DIR}/${bin_name}.cc)
|
||||
target_link_libraries(${bin_name} pps_vad_interface)
|
||||
# set_target_properties(${bin_name} PROPERTIES PUBLIC_HEADER "vad_interface.h;../frontend/wav.h")
|
||||
|
||||
|
||||
file(RELATIVE_PATH DEST_DIR ${ENGINE_ROOT} ${CMAKE_CURRENT_SOURCE_DIR})
|
||||
install(TARGETS pps_vad_interface DESTINATION lib)
|
||||
install(FILES vad_interface.h DESTINATION include/${DEST_DIR})
|
||||
|
||||
install(TARGETS vad_interface_main
|
||||
RUNTIME DESTINATION bin
|
||||
LIBRARY DESTINATION lib
|
||||
ARCHIVE DESTINATION lib
|
||||
PUBLIC_HEADER DESTINATION include/${DEST_DIR}
|
||||
)
|
||||
install(FILES vad_interface_main.cc DESTINATION demo/${DEST_DIR})
|
@ -0,0 +1,94 @@
|
||||
// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
|
||||
#include "vad/interface/vad_interface.h"
|
||||
|
||||
#include "common/base/config.h"
|
||||
#include "vad/nnet/vad.h"
|
||||
|
||||
|
||||
PPSHandle_t PPSVadCreateInstance(const char* conf_path) {
|
||||
Config conf(conf_path);
|
||||
ppspeech::VadNnetConf nnet_conf;
|
||||
nnet_conf.sr = conf.Read("sr", 16000);
|
||||
nnet_conf.frame_ms = conf.Read("frame_ms", 32);
|
||||
nnet_conf.threshold = conf.Read("threshold", 0.45f);
|
||||
nnet_conf.beam = conf.Read("beam", 0.15f);
|
||||
nnet_conf.min_silence_duration_ms =
|
||||
conf.Read("min_silence_duration_ms", 200);
|
||||
nnet_conf.speech_pad_left_ms = conf.Read("speech_pad_left_ms", 0);
|
||||
nnet_conf.speech_pad_right_ms = conf.Read("speech_pad_right_ms", 0);
|
||||
|
||||
nnet_conf.model_file_path = conf.Read("model_path", std::string(""));
|
||||
nnet_conf.param_file_path = conf.Read("param_path", std::string(""));
|
||||
nnet_conf.num_cpu_thread = conf.Read("num_cpu_thread", 1);
|
||||
|
||||
ppspeech::Vad* model = new ppspeech::Vad(nnet_conf.model_file_path);
|
||||
|
||||
// custom config, but must be set before init
|
||||
model->SetConfig(nnet_conf);
|
||||
model->Init();
|
||||
|
||||
return static_cast<PPSHandle_t>(model);
|
||||
}
|
||||
|
||||
|
||||
int PPSVadDestroyInstance(PPSHandle_t instance) {
|
||||
ppspeech::Vad* model = static_cast<ppspeech::Vad*>(instance);
|
||||
if (model != nullptr) {
|
||||
delete model;
|
||||
model = nullptr;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int PPSVadChunkSizeSamples(PPSHandle_t instance) {
|
||||
ppspeech::Vad* model = static_cast<ppspeech::Vad*>(instance);
|
||||
if (model == nullptr) {
|
||||
printf("instance is null\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
return model->WindowSizeSamples();
|
||||
}
|
||||
|
||||
PPSVadState_t PPSVadFeedForward(PPSHandle_t instance,
|
||||
float* chunk,
|
||||
int num_element) {
|
||||
ppspeech::Vad* model = static_cast<ppspeech::Vad*>(instance);
|
||||
if (model == nullptr) {
|
||||
printf("instance is null\n");
|
||||
return PPS_VAD_ILLEGAL;
|
||||
}
|
||||
|
||||
std::vector<float> chunk_in(chunk, chunk + num_element);
|
||||
if (!model->ForwardChunk(chunk_in)) {
|
||||
printf("forward chunk failed\n");
|
||||
return PPS_VAD_ILLEGAL;
|
||||
}
|
||||
ppspeech::Vad::State s = model->Postprocess();
|
||||
PPSVadState_t ret = (PPSVadState_t)s;
|
||||
return ret;
|
||||
}
|
||||
|
||||
int PPSVadReset(PPSHandle_t instance) {
|
||||
ppspeech::Vad* model = static_cast<ppspeech::Vad*>(instance);
|
||||
if (model == nullptr) {
|
||||
printf("instance is null\n");
|
||||
return -1;
|
||||
}
|
||||
model->Reset();
|
||||
return 0;
|
||||
}
|
@ -0,0 +1,46 @@
|
||||
// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#pragma once
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
typedef void* PPSHandle_t;
|
||||
|
||||
typedef enum {
|
||||
PPS_VAD_ILLEGAL = 0, // error
|
||||
PPS_VAD_SIL, // silence
|
||||
PPS_VAD_START, // start speech
|
||||
PPS_VAD_SPEECH, // in speech
|
||||
PPS_VAD_END, // end speech
|
||||
PPS_VAD_NUMSTATES, // number of states
|
||||
} PPSVadState_t;
|
||||
|
||||
PPSHandle_t PPSVadCreateInstance(const char* conf_path);
|
||||
|
||||
int PPSVadDestroyInstance(PPSHandle_t instance);
|
||||
|
||||
int PPSVadReset(PPSHandle_t instance);
|
||||
|
||||
int PPSVadChunkSizeSamples(PPSHandle_t instance);
|
||||
|
||||
PPSVadState_t PPSVadFeedForward(PPSHandle_t instance,
|
||||
float* chunk,
|
||||
int num_element);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif // __cplusplus
|
@ -0,0 +1,71 @@
|
||||
// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
|
||||
#include <iostream>
|
||||
#include <vector>
|
||||
|
||||
#include "common/base/common.h"
|
||||
#include "vad/frontend/wav.h"
|
||||
#include "vad/interface/vad_interface.h"
|
||||
|
||||
int main(int argc, char* argv[]) {
|
||||
if (argc < 3) {
|
||||
std::cout << "Usage: vad_interface_main path/to/config path/to/audio "
|
||||
"run_option, "
|
||||
"e.g ./vad_interface_main config sample.wav"
|
||||
<< std::endl;
|
||||
return -1;
|
||||
}
|
||||
|
||||
std::string config_path = argv[1];
|
||||
std::string audio_file = argv[2];
|
||||
|
||||
PPSHandle_t handle = PPSVadCreateInstance(config_path.c_str());
|
||||
|
||||
std::vector<float> inputWav; // [0, 1]
|
||||
wav::WavReader wav_reader = wav::WavReader(audio_file);
|
||||
auto sr = wav_reader.sample_rate();
|
||||
CHECK(sr == 16000) << " sr is " << sr << " expect 16000";
|
||||
|
||||
auto num_samples = wav_reader.num_samples();
|
||||
inputWav.resize(num_samples);
|
||||
for (int i = 0; i < num_samples; i++) {
|
||||
inputWav[i] = wav_reader.data()[i] / 32768;
|
||||
}
|
||||
|
||||
ppspeech::Timer timer;
|
||||
int window_size_samples = PPSVadChunkSizeSamples(handle);
|
||||
for (int64_t j = 0; j < num_samples; j += window_size_samples) {
|
||||
auto start = j;
|
||||
auto end = start + window_size_samples >= num_samples
|
||||
? num_samples
|
||||
: start + window_size_samples;
|
||||
auto current_chunk_size = end - start;
|
||||
|
||||
std::vector<float> r{&inputWav[0] + start, &inputWav[0] + end};
|
||||
assert(r.size() == static_cast<size_t>(current_chunk_size));
|
||||
|
||||
PPSVadState_t s = PPSVadFeedForward(handle, r.data(), r.size());
|
||||
std::cout << s << " ";
|
||||
}
|
||||
std::cout << std::endl;
|
||||
|
||||
std::cout << "RTF=" << timer.Elapsed() / double(num_samples / sr)
|
||||
<< std::endl;
|
||||
|
||||
PPSVadReset(handle);
|
||||
|
||||
return 0;
|
||||
}
|
@ -0,0 +1,16 @@
|
||||
set(srcs
|
||||
vad.cc
|
||||
)
|
||||
|
||||
add_library(pps_vad ${srcs})
|
||||
target_link_libraries(pps_vad PUBLIC ${FASTDEPLOY_LIBS} common extern_glog)
|
||||
|
||||
|
||||
set(bin_name vad_nnet_main)
|
||||
add_executable(${bin_name} ${CMAKE_CURRENT_SOURCE_DIR}/${bin_name}.cc)
|
||||
target_link_libraries(${bin_name} pps_vad)
|
||||
|
||||
|
||||
file(RELATIVE_PATH DEST_DIR ${ENGINE_ROOT} ${CMAKE_CURRENT_SOURCE_DIR})
|
||||
install(TARGETS pps_vad DESTINATION lib)
|
||||
install(TARGETS extern_glog DESTINATION lib)
|
@ -1,121 +0,0 @@
|
||||
English | [简体中文](README_CN.md)
|
||||
|
||||
# Silero VAD Deployment Example
|
||||
|
||||
This directory provides examples that `infer_onnx_silero_vad` fast finishes the deployment of VAD models on CPU/GPU.
|
||||
|
||||
Before deployment, two steps require confirmation.
|
||||
|
||||
- 1. Software and hardware should meet the requirements. Please refer to [FastDeploy Environment Requirements](../../../../docs/en/build_and_install/download_prebuilt_libraries.md).
|
||||
- 2. Download the precompiled deployment library and samples code according to your development environment. Refer to [FastDeploy Precompiled Library](../../../../docs/en/build_and_install/download_prebuilt_libraries.md).
|
||||
|
||||
Taking VAD inference on Linux as an example, the compilation test can be completed by executing the following command in this directory.
|
||||
|
||||
```bash
|
||||
mkdir build
|
||||
cd build
|
||||
# Download the FastDeploy precompiled library. Users can choose your appropriate version in the `FastDeploy Precompiled Library` mentioned above
|
||||
wget https://bj.bcebos.com/fastdeploy/release/cpp/fastdeploy-linux-x64-x.x.x.tgz
|
||||
tar xvf fastdeploy-linux-x64-x.x.x.tgz
|
||||
cmake .. -DFASTDEPLOY_INSTALL_DIR=${PWD}/fastdeploy-linux-x64-x.x.x
|
||||
make -j
|
||||
|
||||
# Download the VAD model file and test audio. After decompression, place the model and test audio in the infer_onnx_silero_vad.cc peer directory
|
||||
wget https://bj.bcebos.com/paddlehub/fastdeploy/silero_vad.tgz
|
||||
wget https://bj.bcebos.com/paddlehub/fastdeploy/silero_vad_sample.wav
|
||||
|
||||
# inference
|
||||
./infer_onnx_silero_vad ../silero_vad.onnx ../silero_vad_sample.wav
|
||||
```
|
||||
|
||||
- The above command works for Linux or MacOS. Refer to:
|
||||
- [How to use FastDeploy C++ SDK in Windows](../../../../docs/en/faq/use_sdk_on_windows.md) for SDK use-pattern in Windows
|
||||
|
||||
## VAD C++ Interface
|
||||
|
||||
### Vad Class
|
||||
|
||||
```c++
|
||||
Vad::Vad(const std::string& model_file,
|
||||
const fastdeploy::RuntimeOption& custom_option = fastdeploy::RuntimeOption())
|
||||
```
|
||||
|
||||
**Parameter**
|
||||
|
||||
> * **model_file**(str): Model file path
|
||||
> * **runtime_option**(RuntimeOption): Backend inference configuration. None by default. (use the default configuration)
|
||||
|
||||
### setAudioCofig function
|
||||
|
||||
**Must be called before the `init` function**
|
||||
|
||||
```c++
|
||||
void Vad::setAudioCofig(int sr, int frame_ms, float threshold, int min_silence_duration_ms, int speech_pad_ms);
|
||||
```
|
||||
|
||||
**Parameter**
|
||||
|
||||
> * **sr**(int): sampling rate
|
||||
> * **frame_ms**(int): The length of each detection frame, and it is used to calculate the detection window size
|
||||
> * **threshold**(float): Result probability judgment threshold
|
||||
> * **min_silence_duration_ms**(int): The threshold used to calculate whether it is silence
|
||||
> * **speech_pad_ms**(int): Used to calculate the end time of the speech
|
||||
|
||||
### init function
|
||||
|
||||
Used to initialize audio-related parameters.
|
||||
|
||||
```c++
|
||||
void Vad::init();
|
||||
```
|
||||
|
||||
### loadAudio function
|
||||
|
||||
Load audio.
|
||||
|
||||
```c++
|
||||
void Vad::loadAudio(const std::string& wavPath)
|
||||
```
|
||||
|
||||
**Parameter**
|
||||
|
||||
> * **wavPath**(str): Audio file path
|
||||
|
||||
### Predict function
|
||||
|
||||
Used to start model reasoning.
|
||||
|
||||
```c++
|
||||
bool Vad::Predict();
|
||||
```
|
||||
|
||||
### getResult function
|
||||
|
||||
**Used to obtain reasoning results**
|
||||
|
||||
```c++
|
||||
std::vector<std::map<std::string, float>> Vad::getResult(
|
||||
float removeThreshold = 1.6, float expandHeadThreshold = 0.32, float expandTailThreshold = 0,
|
||||
float mergeThreshold = 0.3);
|
||||
```
|
||||
|
||||
**Parameter**
|
||||
|
||||
> * **removeThreshold**(float): Discard result fragment threshold; If some recognition results are too short, they will be discarded according to this threshold
|
||||
> * **expandHeadThreshold**(float): Offset at the beginning of the segment; The recognized start time may be too close to the voice part, so move forward the start time accordingly
|
||||
> * **expandTailThreshold**(float): Offset at the end of the segment; The recognized end time may be too close to the voice part, so the end time is moved back accordingly
|
||||
> * **mergeThreshold**(float): Some result segments are very close and can be combined into one, and the vocal segments can be combined accordingly
|
||||
|
||||
**The output result format is**`std::vector<std::map<std::string, float>>`
|
||||
|
||||
> Output a list, each element is a speech fragment
|
||||
>
|
||||
> Each clip can use 'start' to get the start time and 'end' to get the end time
|
||||
|
||||
### Tips
|
||||
|
||||
1. `The setAudioCofig`function must be called before the `init` function
|
||||
2. The sampling rate of the input audio file must be consistent with that set in the code
|
||||
|
||||
- [Model Description](../)
|
||||
- [How to switch the model inference backend engine](../../../../docs/en/faq/how_to_change_backend.md)
|
@ -1,18 +0,0 @@
|
||||
# This contains the locations of binarys build required for running the examples.
|
||||
|
||||
unset GREP_OPTIONS
|
||||
|
||||
ENGINE_ROOT=$PWD/../../../
|
||||
ENGINE_BUILD=$ENGINE_ROOT/build/engine/asr
|
||||
|
||||
ENGINE_TOOLS=$ENGINE_ROOT/tools
|
||||
TOOLS_BIN=$ENGINE_TOOLS/valgrind/install/bin
|
||||
|
||||
[ -d $ENGINE_BUILD ] || { echo "Error: 'build/runtime' directory not found. please ensure that the project build successfully"; }
|
||||
|
||||
export LC_AL=C
|
||||
|
||||
export PATH=$PATH:$TOOLS_BIN:$ENGINE_BUILD/nnet:$ENGINE_BUILD/decoder:$ENGINE_BUILD/../common/frontend/audio:$ENGINE_BUILD/recognizer
|
||||
|
||||
#PADDLE_LIB_PATH=$(python -c "import os; import paddle; include_dir=paddle.sysconfig.get_include(); paddle_dir=os.path.split(include_dir)[0]; libs_dir=os.path.join(paddle_dir, 'libs'); fluid_dir=os.path.join(paddle_dir, 'fluid'); out=':'.join([libs_dir, fluid_dir]); print(out);")
|
||||
export LD_LIBRARY_PATH=$PADDLE_LIB_PATH:$LD_LIBRARY_PATH
|
@ -0,0 +1,11 @@
|
||||
[model]
|
||||
model_path=./data/silero_vad/silero_vad.onnx
|
||||
|
||||
[vad]
|
||||
sr = 16000 # 16k
|
||||
frame_ms = 32 # 32, 64, 96 for 16k
|
||||
threshold = 0.5
|
||||
beam = 0.15
|
||||
min_silence_duration_ms = 200
|
||||
speech_pad_left_ms = 0
|
||||
speech_pad_right_ms = 0
|
@ -0,0 +1,23 @@
|
||||
#!/bin/bash
|
||||
set -e
|
||||
|
||||
conf=conf
|
||||
data=data
|
||||
exp=exp
|
||||
|
||||
. utils/parse_options.sh
|
||||
|
||||
mkdir -p $exp
|
||||
ckpt_dir=$data/silero_vad
|
||||
model=$ckpt_dir/silero_vad.onnx
|
||||
test_wav=$data/silero_vad_sample.wav
|
||||
conf_file=$conf/vad.ini
|
||||
|
||||
|
||||
vad_nnet_main $model $test_wav
|
||||
echo "vad_nnet_main done!"
|
||||
|
||||
vad_interface_main $conf_file $test_wav
|
||||
echo "vad_interface_main done!"
|
||||
|
||||
|
@ -0,0 +1,17 @@
|
||||
# This contains the locations of binarys build required for running the examples.
|
||||
|
||||
unset GREP_OPTIONS
|
||||
|
||||
ENGINE_ROOT=$PWD/../../
|
||||
ENGINE_BUILD=$ENGINE_ROOT/build/Linux/x86_64/engine/vad
|
||||
|
||||
ENGINE_TOOLS=$ENGINE_ROOT/tools
|
||||
TOOLS_BIN=$ENGINE_TOOLS/valgrind/install/bin
|
||||
|
||||
[ -d $ENGINE_BUILD ] || { echo "Error: 'build/runtime' directory not found. please ensure that the project build successfully"; }
|
||||
|
||||
export LC_AL=C
|
||||
|
||||
export PATH=$PATH:$TOOLS_BIN:$ENGINE_BUILD/nnet:$ENGINE_BUILD/interface
|
||||
|
||||
export LD_LIBRARY_PATH=$PADDLE_LIB_PATH:$LD_LIBRARY_PATH
|
Loading…
Reference in new issue