From 77a3ceaa08b1cac493be2f6731206041f62d0be5 Mon Sep 17 00:00:00 2001 From: Hui Zhang Date: Mon, 6 Mar 2023 11:36:48 +0000 Subject: [PATCH] refactor vad, add vad conf, vad inerface, vad recipe --- runtime/CMakeLists.txt | 3 + runtime/build.sh | 2 +- runtime/cmake/fastdeploy.cmake | 4 +- runtime/cmake/openfst.cmake | 10 ++ runtime/engine/cls/nnet/CMakeLists.txt | 2 +- runtime/engine/common/CMakeLists.txt | 2 +- runtime/engine/common/base/config.h | 5 + runtime/engine/vad/CMakeLists.txt | 8 +- runtime/engine/vad/{ => frontend}/wav.h | 1 + runtime/engine/vad/interface/CMakeLists.txt | 11 +++ runtime/engine/vad/interface/vad_interface.cc | 92 +++++++++++++++++++ runtime/engine/vad/interface/vad_interface.h | 43 +++++++++ .../vad/interface/vad_interface_main.cc | 63 +++++++++++++ runtime/engine/vad/nnet/CMakeLists.txt | 13 +++ runtime/engine/vad/{ => nnet}/vad.cc | 42 +++++++-- runtime/engine/vad/{ => nnet}/vad.h | 44 +++++++-- .../vad_nnet_main.cc} | 26 ++++-- .../examples/{silero_vad => vad}/.gitignore | 0 .../examples/{silero_vad => vad}/README.md | 0 .../examples/{silero_vad => vad}/README_CN.md | 0 runtime/examples/vad/conf/vad.ini | 10 ++ .../{silero_vad => vad}/local/build.sh | 0 .../local/build_android.sh | 0 .../{silero_vad => vad}/local/decode.sh | 10 +- .../{silero_vad => vad}/local/download.sh | 0 runtime/examples/{silero_vad => vad}/path.sh | 2 +- runtime/examples/{silero_vad => vad}/run.sh | 0 runtime/examples/{silero_vad => vad}/utils | 0 28 files changed, 358 insertions(+), 35 deletions(-) rename runtime/engine/vad/{ => frontend}/wav.h (99%) create mode 100644 runtime/engine/vad/interface/CMakeLists.txt create mode 100644 runtime/engine/vad/interface/vad_interface.cc create mode 100644 runtime/engine/vad/interface/vad_interface.h create mode 100644 runtime/engine/vad/interface/vad_interface_main.cc create mode 100644 runtime/engine/vad/nnet/CMakeLists.txt rename runtime/engine/vad/{ => nnet}/vad.cc (93%) rename runtime/engine/vad/{ => nnet}/vad.h (79%) rename runtime/engine/vad/{silero_vad_main.cc => nnet/vad_nnet_main.cc} (63%) rename runtime/examples/{silero_vad => vad}/.gitignore (100%) rename runtime/examples/{silero_vad => vad}/README.md (100%) rename runtime/examples/{silero_vad => vad}/README_CN.md (100%) create mode 100644 runtime/examples/vad/conf/vad.ini rename runtime/examples/{silero_vad => vad}/local/build.sh (100%) rename runtime/examples/{silero_vad => vad}/local/build_android.sh (100%) rename runtime/examples/{silero_vad => vad}/local/decode.sh (51%) rename runtime/examples/{silero_vad => vad}/local/download.sh (100%) rename runtime/examples/{silero_vad => vad}/path.sh (86%) rename runtime/examples/{silero_vad => vad}/run.sh (100%) rename runtime/examples/{silero_vad => vad}/utils (100%) diff --git a/runtime/CMakeLists.txt b/runtime/CMakeLists.txt index af970526a..a8203a6d7 100644 --- a/runtime/CMakeLists.txt +++ b/runtime/CMakeLists.txt @@ -42,6 +42,9 @@ SET(CMAKE_CXX_FLAGS_RELEASE "$ENV{CXXFLAGS} --std=c++14 -pthread -fPIC -O3 -Wall ############################################################################### # Option Configurations ############################################################################### +# https://github.com/google/brotli/pull/655 +option(BUILD_SHARED_LIBS "Build shared libraries" ON) + option(WITH_ASR "build asr" ON) option(WITH_CLS "build cls" ON) option(WITH_VAD "build vad" ON) diff --git a/runtime/build.sh b/runtime/build.sh index f7d0a2b25..855c3b1aa 100755 --- a/runtime/build.sh +++ b/runtime/build.sh @@ -4,5 +4,5 @@ set -xe # the build script had verified in the paddlepaddle docker image. # please follow the instruction below to install PaddlePaddle image. # https://www.paddlepaddle.org.cn/documentation/docs/zh/install/docker/linux-docker.html -cmake -B build -DWITH_ASR=ON -DWITH_CLS=OFF -DWITH_VAD=OFF +cmake -B build -DBUILD_SHARED_LIBS=OFF -DWITH_ASR=OFF -DWITH_CLS=OFF -DWITH_VAD=ON -DFASTDEPLOY_INSTALL_DIR=/workspace/zhanghui/paddle/FastDeploy/build/Android/arm64-v8a-api-21/install cmake --build build -j diff --git a/runtime/cmake/fastdeploy.cmake b/runtime/cmake/fastdeploy.cmake index 463a8e8e8..e5e7f1f99 100644 --- a/runtime/cmake/fastdeploy.cmake +++ b/runtime/cmake/fastdeploy.cmake @@ -20,7 +20,9 @@ endif() if(ANDROID) - set(FASTDEPLOY_INSTALL_DIR ${FASTDEPLOY_DIR}/android-armv7v8) + if(NOT DEFINED FASTDEPLOY_INSTALL_DIR) + set(FASTDEPLOY_INSTALL_DIR ${FASTDEPLOY_DIR}/android-armv7v8) + endif() add_definitions("-DUSE_PADDLE_LITE_BAKEND") set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -g -mfloat-abi=softfp -mfpu=vfpv3 -mfpu=neon -fPIC -pie -fPIE") set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -g0 -O3 -mfloat-abi=softfp -mfpu=vfpv3 -mfpu=neon -fPIC -pie -fPIE") diff --git a/runtime/cmake/openfst.cmake b/runtime/cmake/openfst.cmake index 066971563..a859076fe 100644 --- a/runtime/cmake/openfst.cmake +++ b/runtime/cmake/openfst.cmake @@ -10,9 +10,19 @@ include(FetchContent) #Application of Automata, (CIAA 2007), volume 4783 of Lecture Notes in #Computer Science, pages 11-23. Springer, 2007. http://www.openfst.org. +set(EXTERNAL_PROJECT_LOG_ARGS + LOG_DOWNLOAD 1 # Wrap download in script to log output + LOG_UPDATE 1 # Wrap update in script to log output + LOG_CONFIGURE 1# Wrap configure in script to log output + LOG_BUILD 1 # Wrap build in script to log output + LOG_TEST 1 # Wrap test in script to log output + LOG_INSTALL 1 # Wrap install in script to log output +) + ExternalProject_Add(openfst URL https://paddleaudio.bj.bcebos.com/build/openfst_1.7.2.zip URL_HASH SHA256=ffc56931025579a8af3515741c0f3b0fc3a854c023421472c07ca0c6389c75e6 + ${EXTERNAL_PROJECT_LOG_ARGS} PREFIX ${openfst_PREFIX_DIR} SOURCE_DIR ${openfst_SOURCE_DIR} BINARY_DIR ${openfst_BINARY_DIR} diff --git a/runtime/engine/cls/nnet/CMakeLists.txt b/runtime/engine/cls/nnet/CMakeLists.txt index 27f244345..d331d31a6 100644 --- a/runtime/engine/cls/nnet/CMakeLists.txt +++ b/runtime/engine/cls/nnet/CMakeLists.txt @@ -3,7 +3,7 @@ set(srcs panns_interface.cc ) -add_library(cls SHARED ${srcs}) +add_library(cls ${srcs}) target_link_libraries(cls INTERFACE -static-libstdc++;-Wl,-Bsymbolic ${FASTDEPLOY_LIBS} kaldi-matrix kaldi-base frontend utils ) set(bin_name panns_nnet_main) diff --git a/runtime/engine/common/CMakeLists.txt b/runtime/engine/common/CMakeLists.txt index a2f56f7ff..4c2235b14 100644 --- a/runtime/engine/common/CMakeLists.txt +++ b/runtime/engine/common/CMakeLists.txt @@ -12,4 +12,4 @@ ${CMAKE_CURRENT_SOURCE_DIR}/frontend add_subdirectory(frontend) add_library(common INTERFACE) -add_definitions(common base utils kaldi-matrix frontend) \ No newline at end of file +add_definitions(common base utils kaldi-matrix frontend) \ No newline at end of file diff --git a/runtime/engine/common/base/config.h b/runtime/engine/common/base/config.h index c59c3ab8b..0a0712778 100644 --- a/runtime/engine/common/base/config.h +++ b/runtime/engine/common/base/config.h @@ -10,7 +10,10 @@ using namespace std; #pragma once +#ifdef _MSC_VER #pragma region ParseIniFile +#endif + /* * \brief Generic configuration Class * @@ -335,4 +338,6 @@ void Config::ReadFile(string filename, string delimiter, string comment) { in >> (*this); } +#ifdef _MSC_VER #pragma endregion ParseIniFIle +#endif diff --git a/runtime/engine/vad/CMakeLists.txt b/runtime/engine/vad/CMakeLists.txt index 4e9f448c9..f61c5a9a8 100644 --- a/runtime/engine/vad/CMakeLists.txt +++ b/runtime/engine/vad/CMakeLists.txt @@ -1,5 +1,7 @@ +include_directories( +${CMAKE_CURRENT_SOURCE_DIR}/../ +) +add_subdirectory(nnet) -set(bin_name silero_vad_main) -add_executable(${bin_name} ${CMAKE_CURRENT_SOURCE_DIR}/${bin_name}.cc vad.cc) -target_link_libraries(${bin_name} ${FASTDEPLOY_LIBS} gflags extern_glog) +add_subdirectory(interface) \ No newline at end of file diff --git a/runtime/engine/vad/wav.h b/runtime/engine/vad/frontend/wav.h similarity index 99% rename from runtime/engine/vad/wav.h rename to runtime/engine/vad/frontend/wav.h index 6d1a6f723..902154f40 100644 --- a/runtime/engine/vad/wav.h +++ b/runtime/engine/vad/frontend/wav.h @@ -18,6 +18,7 @@ #include #include #include +#include namespace wav { diff --git a/runtime/engine/vad/interface/CMakeLists.txt b/runtime/engine/vad/interface/CMakeLists.txt new file mode 100644 index 000000000..087e38ce4 --- /dev/null +++ b/runtime/engine/vad/interface/CMakeLists.txt @@ -0,0 +1,11 @@ +set(srcs + vad_interface.cc +) + +add_library(vad_interface ${srcs}) +target_link_libraries(vad_interface INTERFACE ${FASTDEPLOY_LIBS} vad) + + +set(bin_name vad_interface_main) +add_executable(${bin_name} ${CMAKE_CURRENT_SOURCE_DIR}/${bin_name}.cc) +target_link_libraries(${bin_name} ${FASTDEPLOY_LIBS} vad_interface) \ No newline at end of file diff --git a/runtime/engine/vad/interface/vad_interface.cc b/runtime/engine/vad/interface/vad_interface.cc new file mode 100644 index 000000000..59b7aa94c --- /dev/null +++ b/runtime/engine/vad/interface/vad_interface.cc @@ -0,0 +1,92 @@ +// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + + +#include "vad/interface/vad_interface.h" +#include "common/base/log.h" +#include "common/base/config.h" +#include "vad/nnet/vad.h" + + +PPSHandle_t PPSVadCreateInstance(const char* conf_path) { + Config conf(conf_path); + ppspeech::VadNnetConf nnet_conf; + nnet_conf.sr = conf.Read("sr", 16000); + nnet_conf.frame_ms = conf.Read("frame_ms", 32); + nnet_conf.threshold = conf.Read("threshold", 0.45f); + nnet_conf.min_silence_duration_ms = conf.Read("min_silence_duration_ms", 200); + nnet_conf.speech_pad_left_ms = conf.Read("speech_pad_left_ms", 0); + nnet_conf.speech_pad_right_ms = conf.Read("speech_pad_right_ms", 0); + + nnet_conf.model_file_path = conf.Read("model_path", std::string("")); + nnet_conf.param_file_path = conf.Read("param_path", std::string("")); + nnet_conf.num_cpu_thread = conf.Read("num_cpu_thread", 1); + + ppspeech::Vad* model = new ppspeech::Vad(nnet_conf.model_file_path); + + // custom config, but must be set before init + model->SetConfig(nnet_conf); + model->Init(); + + return static_cast(model); +} + + +int PPSVadDestroyInstance(PPSHandle_t instance) { + ppspeech::Vad* model = static_cast(instance); + if (model != nullptr) { + delete model; + model = nullptr; + } + return 0; +} + +int PPSVadChunkSizeSamples(PPSHandle_t instance) { + ppspeech::Vad* model = static_cast(instance); + if (model == nullptr) { + printf("instance is null\n"); + return -1; + } + + return model->WindowSizeSamples(); +} + +PPSVadState_t PPSVadFeedForward(PPSHandle_t instance, + float* chunk, + int num_element) { + ppspeech::Vad* model = static_cast(instance); + if (model == nullptr) { + LOG(ERROR) << "instance is null"; + return PPS_ILLEGAL; + } + + std::vector chunk_in(chunk, chunk + num_element); + if (!model->ForwardChunk(chunk_in)){ + LOG(ERROR) << "forward chunk failed"; + return PPS_ILLEGAL; + } + ppspeech::Vad::State s = model->Postprocess(); + PPSVadState_t ret = (PPSVadState_t)s; + return ret; +} + +int PPSVadReset(PPSHandle_t instance) { + ppspeech::Vad* model = static_cast(instance); + if (model == nullptr) { + printf("instance is null\n"); + return -1; + } + model->Reset(); + return 0; +} \ No newline at end of file diff --git a/runtime/engine/vad/interface/vad_interface.h b/runtime/engine/vad/interface/vad_interface.h new file mode 100644 index 000000000..5df24b555 --- /dev/null +++ b/runtime/engine/vad/interface/vad_interface.h @@ -0,0 +1,43 @@ +// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#ifdef __cplusplus +extern "C" { +#endif + +typedef void* PPSHandle_t; + +typedef enum { + PPS_ILLEGAL = 0, // error + PPS_SIL, // silence + PPS_START, // start speech + PPS_SPEECH, // in speech + PPS_END, // end speech +} PPSVadState_t; + +PPSHandle_t PPSVadCreateInstance(const char* conf_path); + +int PPSVadDestroyInstance(PPSHandle_t instance); + +int PPSVadReset(PPSHandle_t instance); + +int PPSVadChunkSizeSamples(PPSHandle_t instance); + +PPSVadState_t PPSVadFeedForward(PPSHandle_t instance, float* chunk,int num_element); + +#ifdef __cplusplus +} +#endif // __cplusplus \ No newline at end of file diff --git a/runtime/engine/vad/interface/vad_interface_main.cc b/runtime/engine/vad/interface/vad_interface_main.cc new file mode 100644 index 000000000..f5656943c --- /dev/null +++ b/runtime/engine/vad/interface/vad_interface_main.cc @@ -0,0 +1,63 @@ +// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + + +#include "vad/interface/vad_interface.h" +#include "vad/frontend/wav.h" +#include +#include + +int main(int argc, char* argv[]) { + if (argc < 3) { + std::cout << "Usage: vad_interface_main path/to/config path/to/audio " + "run_option, " + "e.g ./vad_interface_main config sample.wav" + << std::endl; + return -1; + } + + std::string config_path = argv[1]; + std::string audio_file = argv[2]; + + PPSHandle_t handle = PPSVadCreateInstance(config_path.c_str()); + + std::vector inputWav; // [0, 1] + wav::WavReader wav_reader = wav::WavReader(audio_file); + + auto num_samples = wav_reader.num_samples(); + inputWav.resize(num_samples); + for (int i = 0; i < num_samples; i++) { + inputWav[i] = wav_reader.data()[i] / 32768; + } + + int window_size_samples = PPSVadChunkSizeSamples(handle); + for (int64_t j = 0; j < num_samples; j += window_size_samples) { + auto start = j; + auto end = start + window_size_samples >= num_samples + ? num_samples + : start + window_size_samples; + auto current_chunk_size = end - start; + + std::vector r{&inputWav[0] + start, &inputWav[0] + end}; + assert(r.size() == static_cast(current_chunk_size)); + + PPSVadState_t s = PPSVadFeedForward(handle, r.data(), r.size()); + std::cout << s << " "; + } + std::cout << std::endl; + + PPSVadReset(handle); + + return 0; +} diff --git a/runtime/engine/vad/nnet/CMakeLists.txt b/runtime/engine/vad/nnet/CMakeLists.txt new file mode 100644 index 000000000..c7a29b28a --- /dev/null +++ b/runtime/engine/vad/nnet/CMakeLists.txt @@ -0,0 +1,13 @@ +set(srcs + vad.cc +) + +add_library(vad ${srcs}) +target_link_libraries(vad INTERFACE ${FASTDEPLOY_LIBS} common) +target_link_libraries(vad PRIVATE common) + + +set(bin_name vad_nnet_main) +add_executable(${bin_name} ${CMAKE_CURRENT_SOURCE_DIR}/${bin_name}.cc) +# target_link_libraries(${bin_name} ${FASTDEPLOY_LIBS} vad gflags extern_glog) +target_link_libraries(${bin_name} ${FASTDEPLOY_LIBS} vad) \ No newline at end of file diff --git a/runtime/engine/vad/vad.cc b/runtime/engine/vad/nnet/vad.cc similarity index 93% rename from runtime/engine/vad/vad.cc rename to runtime/engine/vad/nnet/vad.cc index 7630b98df..b5ae3be0e 100644 --- a/runtime/engine/vad/vad.cc +++ b/runtime/engine/vad/nnet/vad.cc @@ -1,4 +1,5 @@ // Copyright (c) 2023 Chen Qianhe Authors. All Rights Reserved. +// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -11,7 +12,7 @@ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. -#include "vad.h" +#include "vad/nnet/vad.h" #include #include @@ -26,6 +27,8 @@ << __REL_FILE__ << "(" << __LINE__ << ")::" << __FUNCTION__ << "\t" #endif +namespace ppspeech { + Vad::Vad(const std::string& model_file, const fastdeploy::RuntimeOption& custom_option /* = fastdeploy::RuntimeOption() */) { @@ -48,18 +51,29 @@ Vad::Vad(const std::string& model_file, } void Vad::Init() { - std::call_once(init_, [&]() { initialized = Initialize(); }); + std::lock_guard lock(init_lock_); + Initialize(); } std::string Vad::ModelName() const { return "VAD"; } -void Vad::SetConfig(int sr, - int frame_ms, - float threshold, - int min_silence_duration_ms, - int speech_pad_left_ms, - int speech_pad_right_ms) { - if (initialized) { +void Vad::SetConfig(const VadNnetConf conf){ + SetConfig( + conf.sr, + conf.frame_ms, + conf.threshold, + conf.min_silence_duration_ms, + conf.speech_pad_left_ms, + conf.speech_pad_right_ms); +} + +void Vad::SetConfig(const int& sr, + const int& frame_ms, + const float& threshold, + const int& min_silence_duration_ms, + const int& speech_pad_left_ms, + const int& speech_pad_right_ms) { + if (initialized_) { fastdeploy::FDERROR << "SetConfig must be called before init" << std::endl; throw std::runtime_error("SetConfig must be called before init"); @@ -114,12 +128,18 @@ bool Vad::Initialize() { Reset(); + + // InitRuntime if (!InitRuntime()) { fastdeploy::FDERROR << "Failed to initialize fastdeploy backend." << std::endl; return false; } + + initialized_=true; + + fastdeploy::FDINFO << "init done."; return true; } @@ -303,4 +323,6 @@ std::ostream& operator<<(std::ostream& os, const Vad::State& s) { break; } return os; -} \ No newline at end of file +} + +} // namepsace ppspeech \ No newline at end of file diff --git a/runtime/engine/vad/vad.h b/runtime/engine/vad/nnet/vad.h similarity index 79% rename from runtime/engine/vad/vad.h rename to runtime/engine/vad/nnet/vad.h index 6eed7d1c3..8eb0a9871 100644 --- a/runtime/engine/vad/vad.h +++ b/runtime/engine/vad/nnet/vad.h @@ -1,4 +1,5 @@ // Copyright (c) 2023 Chen Qianhe Authors. All Rights Reserved. +// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -11,17 +12,37 @@ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. + #pragma once #include #include #include -#include "./wav.h" +#include "vad/frontend/wav.h" #include "fastdeploy/fastdeploy_model.h" #include "fastdeploy/runtime.h" +namespace ppspeech { + +struct VadNnetConf { + // wav + int sr; + int frame_ms; + float threshold; + int min_silence_duration_ms; + int speech_pad_left_ms; + int speech_pad_right_ms; + + // model + std::string model_file_path; + std::string param_file_path; + std::string dict_file_path; + int num_cpu_thread; // 1 thred + std::string backend; // ort,lite, etc. +}; + class Vad : public fastdeploy::FastDeployModel { public: - enum class State { SIL = 0, START, SPEECH, END }; + enum class State { ILLEGAL = 0, SIL, START, SPEECH, END }; friend std::ostream& operator<<(std::ostream& os, const Vad::State& s); Vad(const std::string& model_file, @@ -32,12 +53,13 @@ class Vad : public fastdeploy::FastDeployModel { void Reset(); - void SetConfig(int sr, - int frame_ms, - float threshold, - int min_silence_duration_ms, - int speech_pad_left_ms, - int speech_pad_right_ms); + void SetConfig(const int& sr, + const int& frame_ms, + const float& threshold, + const int& min_silence_duration_ms, + const int& speech_pad_left_ms, + const int& speech_pad_right_ms); + void SetConfig(const VadNnetConf conf); bool ForwardChunk(std::vector& chunk); @@ -78,7 +100,9 @@ class Vad : public fastdeploy::FastDeployModel { bool Initialize(); private: - std::once_flag init_; + std::mutex init_lock_; + bool initialized_{false}; + // input and output std::vector inputTensors_; std::vector outputTensors_; @@ -122,3 +146,5 @@ class Vad : public fastdeploy::FastDeployModel { const std::vector sr_node_dims_ = {1}; const std::vector hc_node_dims_ = {2, 1, 64}; }; + +} // namepsace ppspeech \ No newline at end of file diff --git a/runtime/engine/vad/silero_vad_main.cc b/runtime/engine/vad/nnet/vad_nnet_main.cc similarity index 63% rename from runtime/engine/vad/silero_vad_main.cc rename to runtime/engine/vad/nnet/vad_nnet_main.cc index 7fb524060..4615aa956 100644 --- a/runtime/engine/vad/silero_vad_main.cc +++ b/runtime/engine/vad/nnet/vad_nnet_main.cc @@ -1,11 +1,25 @@ +// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. -#include "vad.h" + +#include "vad/nnet/vad.h" int main(int argc, char* argv[]) { if (argc < 3) { - std::cout << "Usage: infer_onnx_silero_vad path/to/model path/to/audio " + std::cout << "Usage: vad_nnet_main path/to/model path/to/audio " "run_option, " - "e.g ./infer_onnx_silero_vad silero_vad.onnx sample.wav" + "e.g ./vad_nnet_main silero_vad.onnx sample.wav" << std::endl; return -1; } @@ -14,7 +28,7 @@ int main(int argc, char* argv[]) { std::string audio_file = argv[2]; int sr = 16000; - Vad vad(model_file); + ppspeech::Vad vad(model_file); // custom config, but must be set before init vad.SetConfig(sr, 32, 0.45f, 200, 0, 0); vad.Init(); @@ -39,7 +53,7 @@ int main(int argc, char* argv[]) { auto current_chunk_size = end - start; std::vector r{&inputWav[0] + start, &inputWav[0] + end}; - assert(r.size() == current_chunk_size); + assert(r.size() == static_cast(current_chunk_size)); if (!vad.ForwardChunk(r)) { std::cerr << "Failed to inference while using model:" @@ -47,7 +61,7 @@ int main(int argc, char* argv[]) { return false; } - Vad::State s = vad.Postprocess(); + ppspeech::Vad::State s = vad.Postprocess(); std::cout << s << " "; } std::cout << std::endl; diff --git a/runtime/examples/silero_vad/.gitignore b/runtime/examples/vad/.gitignore similarity index 100% rename from runtime/examples/silero_vad/.gitignore rename to runtime/examples/vad/.gitignore diff --git a/runtime/examples/silero_vad/README.md b/runtime/examples/vad/README.md similarity index 100% rename from runtime/examples/silero_vad/README.md rename to runtime/examples/vad/README.md diff --git a/runtime/examples/silero_vad/README_CN.md b/runtime/examples/vad/README_CN.md similarity index 100% rename from runtime/examples/silero_vad/README_CN.md rename to runtime/examples/vad/README_CN.md diff --git a/runtime/examples/vad/conf/vad.ini b/runtime/examples/vad/conf/vad.ini new file mode 100644 index 000000000..94742ff5b --- /dev/null +++ b/runtime/examples/vad/conf/vad.ini @@ -0,0 +1,10 @@ +[model] +model_path=./data/silero_vad/silero_vad.onnx + +[vad] +sr = 16000 # 16k +frame_ms = 32 # 32, 64, 96 for 16k +threshold = 0.45 +min_silence_duration_ms = 200 +speech_pad_left_ms = 200 +speech_pad_right_ms = 0 diff --git a/runtime/examples/silero_vad/local/build.sh b/runtime/examples/vad/local/build.sh similarity index 100% rename from runtime/examples/silero_vad/local/build.sh rename to runtime/examples/vad/local/build.sh diff --git a/runtime/examples/silero_vad/local/build_android.sh b/runtime/examples/vad/local/build_android.sh similarity index 100% rename from runtime/examples/silero_vad/local/build_android.sh rename to runtime/examples/vad/local/build_android.sh diff --git a/runtime/examples/silero_vad/local/decode.sh b/runtime/examples/vad/local/decode.sh similarity index 51% rename from runtime/examples/silero_vad/local/decode.sh rename to runtime/examples/vad/local/decode.sh index e6fc47730..ff0a0d447 100755 --- a/runtime/examples/silero_vad/local/decode.sh +++ b/runtime/examples/vad/local/decode.sh @@ -1,6 +1,7 @@ #!/bin/bash set -e +conf=conf data=data exp=exp @@ -10,8 +11,13 @@ mkdir -p $exp ckpt_dir=$data/silero_vad model=$ckpt_dir/silero_vad.onnx test_wav=$data/silero_vad_sample.wav +conf_file=$conf/vad.ini -silero_vad_main $model $test_wav +vad_nnet_main $model $test_wav +echo "vad_nnet_main done!" + +vad_interface_main $conf_file $test_wav +echo "vad_interface_main done!" + -echo "silero vad done!" \ No newline at end of file diff --git a/runtime/examples/silero_vad/local/download.sh b/runtime/examples/vad/local/download.sh similarity index 100% rename from runtime/examples/silero_vad/local/download.sh rename to runtime/examples/vad/local/download.sh diff --git a/runtime/examples/silero_vad/path.sh b/runtime/examples/vad/path.sh similarity index 86% rename from runtime/examples/silero_vad/path.sh rename to runtime/examples/vad/path.sh index 55976bcd8..3ed85dc1e 100644 --- a/runtime/examples/silero_vad/path.sh +++ b/runtime/examples/vad/path.sh @@ -12,6 +12,6 @@ TOOLS_BIN=$ENGINE_TOOLS/valgrind/install/bin export LC_AL=C -export PATH=$PATH:$TOOLS_BIN:$ENGINE_BUILD +export PATH=$PATH:$TOOLS_BIN:$ENGINE_BUILD/nnet:$ENGINE_BUILD/interface export LD_LIBRARY_PATH=$PADDLE_LIB_PATH:$LD_LIBRARY_PATH diff --git a/runtime/examples/silero_vad/run.sh b/runtime/examples/vad/run.sh similarity index 100% rename from runtime/examples/silero_vad/run.sh rename to runtime/examples/vad/run.sh diff --git a/runtime/examples/silero_vad/utils b/runtime/examples/vad/utils similarity index 100% rename from runtime/examples/silero_vad/utils rename to runtime/examples/vad/utils