diff --git a/runtime/engine/vad/interface/vad_interface.cc b/runtime/engine/vad/interface/vad_interface.cc index 820988169..734b44185 100644 --- a/runtime/engine/vad/interface/vad_interface.cc +++ b/runtime/engine/vad/interface/vad_interface.cc @@ -15,7 +15,6 @@ #include "vad/interface/vad_interface.h" #include "common/base/config.h" -#include "common/base/log.h" #include "vad/nnet/vad.h" @@ -68,14 +67,14 @@ PPSVadState_t PPSVadFeedForward(PPSHandle_t instance, int num_element) { ppspeech::Vad* model = static_cast(instance); if (model == nullptr) { - LOG(ERROR) << "instance is null"; - return PPS_ILLEGAL; + printf("instance is null\n"); + return PPS_VAD_ILLEGAL; } std::vector chunk_in(chunk, chunk + num_element); if (!model->ForwardChunk(chunk_in)) { - LOG(ERROR) << "forward chunk failed"; - return PPS_ILLEGAL; + printf("forward chunk failed\n"); + return PPS_VAD_ILLEGAL; } ppspeech::Vad::State s = model->Postprocess(); PPSVadState_t ret = (PPSVadState_t)s; diff --git a/runtime/engine/vad/interface/vad_interface.h b/runtime/engine/vad/interface/vad_interface.h index 947b1f654..5d7ca7091 100644 --- a/runtime/engine/vad/interface/vad_interface.h +++ b/runtime/engine/vad/interface/vad_interface.h @@ -21,11 +21,12 @@ extern "C" { typedef void* PPSHandle_t; typedef enum { - PPS_ILLEGAL = 0, // error - PPS_SIL, // silence - PPS_START, // start speech - PPS_SPEECH, // in speech - PPS_END, // end speech + PPS_VAD_ILLEGAL = 0, // error + PPS_VAD_SIL, // silence + PPS_VAD_START, // start speech + PPS_VAD_SPEECH, // in speech + PPS_VAD_END, // end speech + PPS_VAD_NUMSTATES, // number of states } PPSVadState_t; PPSHandle_t PPSVadCreateInstance(const char* conf_path); diff --git a/runtime/engine/vad/nnet/vad.h b/runtime/engine/vad/nnet/vad.h index ac8f64b8b..565a5680b 100644 --- a/runtime/engine/vad/nnet/vad.h +++ b/runtime/engine/vad/nnet/vad.h @@ -49,6 +49,8 @@ class Vad : public fastdeploy::FastDeployModel { const fastdeploy::RuntimeOption& custom_option = fastdeploy::RuntimeOption()); + virtual ~Vad() {} + void Init(); void Reset(); diff --git a/runtime/examples/vad/README.md b/runtime/examples/vad/README.md index f032be862..6df4359e1 100644 --- a/runtime/examples/vad/README.md +++ b/runtime/examples/vad/README.md @@ -1,121 +1,166 @@ -English | [简体中文](README_CN.md) +# Silero VAD - pre-trained enterprise-grade Voice Activity Detector -# Silero VAD Deployment Example +This directory provides VAD models on CPU/GPU. -This directory provides examples that `infer_onnx_silero_vad` fast finishes the deployment of VAD models on CPU/GPU. +![](https://user-images.githubusercontent.com/36505480/198026365-8da383e0-5398-4a12-b7f8-22c2c0059512.png) -Before deployment, two steps require confirmation. +## Linux -- 1. Software and hardware should meet the requirements. Please refer to [FastDeploy Environment Requirements](../../../../docs/en/build_and_install/download_prebuilt_libraries.md). -- 2. Download the precompiled deployment library and samples code according to your development environment. Refer to [FastDeploy Precompiled Library](../../../../docs/en/build_and_install/download_prebuilt_libraries.md). +### Build Runtime +```bash +# cd /path/to/paddlespeech/runtime +cmake -B build -DBUILD_SHARED_LIBS=OFF -DWITH_ASR=OFF -DWITH_CLS=OFF -DWITH_VAD=ON +cmake --build build +``` -Taking VAD inference on Linux as an example, the compilation test can be completed by executing the following command in this directory. +Since VAD using FastDeploy runtime, if you have another FastDeploy Library, you can using this command to build: ```bash -mkdir build -cd build -# Download the FastDeploy precompiled library. Users can choose your appropriate version in the `FastDeploy Precompiled Library` mentioned above -wget https://bj.bcebos.com/fastdeploy/release/cpp/fastdeploy-linux-x64-x.x.x.tgz -tar xvf fastdeploy-linux-x64-x.x.x.tgz -cmake .. -DFASTDEPLOY_INSTALL_DIR=${PWD}/fastdeploy-linux-x64-x.x.x -make -j - -# Download the VAD model file and test audio. After decompression, place the model and test audio in the infer_onnx_silero_vad.cc peer directory -wget https://bj.bcebos.com/paddlehub/fastdeploy/silero_vad.tgz -wget https://bj.bcebos.com/paddlehub/fastdeploy/silero_vad_sample.wav - -# inference -./infer_onnx_silero_vad ../silero_vad.onnx ../silero_vad_sample.wav +# cd /path/to/paddlespeech/runtime +cmake -B build -DBUILD_SHARED_LIBS=OFF -DWITH_ASR=OFF -DWITH_CLS=OFF -DWITH_VAD=ON -DFASTDEPLOY_INSTALL_DIR=/workspace/zhanghui/paddle/FastDeploy/build/Linux/x86_64/install +cmake --build build ``` -- The above command works for Linux or MacOS. Refer to: - - [How to use FastDeploy C++ SDK in Windows](../../../../docs/en/faq/use_sdk_on_windows.md) for SDK use-pattern in Windows +`DFASTDEPLOY_INSTALL_DIR` is the directory of FastDeploy Library. -## VAD C++ Interface +### Run Demo -### Vad Class +After building success, we can do this to run demo under this example dir: -```c++ -Vad::Vad(const std::string& model_file, - const fastdeploy::RuntimeOption& custom_option = fastdeploy::RuntimeOption()) +```bash +bash run.sh ``` -**Parameter** +### Result + +```bash +/workspace/zhanghui/PaddleSpeech/runtime/engine/vad/nnet/vad.cc(92)::SetConfig sr=16000 threshold=0.45 frame_ms=32 min_silence_duration_ms=200 speech_pad_left_ms=0 speech_pad_right_ms=0[INFO] fastdeploy/runtime/runtime.cc(293)::CreateOrtBackend Runtime initialized with Backend::ORT in Device::CPU./workspace/zhanghui/PaddleSpeech/runtime/engine/vad/nnet/vad.cc(141)::Initialize init done. +[SIL] [SIL] [SIL] [SIL] [SIL] [SIL] [SIL] [SIL] [SIL] [SIL] [STA] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SIL] [SIL] [SIL] [SIL] [SIL] [SIL] [SIL] [END] [SIL] [SIL] [SIL] [SIL] [SIL] [SIL] [SIL] [SIL] [SIL] [SIL] [SIL] [SIL] [SIL] [SIL] [SIL] [SIL] [SIL] [SIL] [SIL] [SIL] [SIL] [SIL] [SIL] [SIL] [SIL] [STA] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SIL] [SIL] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SIL] [SIL] [SIL] [SIL] [SIL] [SIL] [SIL] [END] [SIL] [SIL] [SIL] [SIL] [SIL] [SIL] [SIL] [SIL] [SIL] [SIL] [SIL] [SIL] [SIL] [SIL] [SIL] [SIL] [SIL] [SIL] [SIL] [SIL] [SIL] [SIL] [SIL] [STA] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SIL] [SIL] [SIL] [SIL] [SIL] [SIL] [SIL] [END] [SIL] [SIL] [SIL] [SIL] [SIL] [SIL] [SIL] [SIL] [STA] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SIL] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SPE] [SIL] [SIL] [SIL] [SIL] [SIL] [SIL] [SIL] [END] [SIL] [SIL] [SIL] [SIL] [SIL] [SIL] [SIL] [SIL] [SIL] speak start: 0.32 s, end: 2.496 s | speak start: 3.296 s, end: 4.672 s | speak start: 5.408 s, end: 7.936 s | speak start: 8.192 s, end: 10.72 s +vad_nnet_main done! + +sr = 16000 +frame_ms = 32 +threshold = 0.45 +min_silence_duration_ms = 200 +speech_pad_left_ms = 200 +speech_pad_right_ms = 0 +model_path = ./data/silero_vad/silero_vad.onnx +param_path = (default) +num_cpu_thread = 1(default) +/workspace/zhanghui/PaddleSpeech/runtime/engine/vad/nnet/vad.cc(92)::SetConfig sr=16000 threshold=0.45 frame_ms=32 min_silence_duration_ms=200 speech_pad_left_ms=200 speech_pad_right_ms=0 +[INFO] fastdeploy/runtime/runtime.cc(293)::CreateOrtBackend Runtime initialized with Backend::ORT in Device::CPU. +/workspace/zhanghui/PaddleSpeech/runtime/engine/vad/nnet/vad.cc(141)::Initialize init done. +1 1 1 1 1 1 1 1 1 1 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 1 1 1 1 1 1 1 4 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 1 1 3 3 3 3 3 3 3 3 3 3 3 3 3 3 1 1 1 1 1 1 1 4 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 1 1 1 1 1 1 1 4 1 1 1 1 1 1 1 1 2 3 3 3 3 3 3 3 3 3 3 3 3 1 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 1 1 1 1 1 1 1 4 1 1 1 1 1 1 1 1 1 vad_interface_main done! +``` -> * **model_file**(str): Model file path -> * **runtime_option**(RuntimeOption): Backend inference configuration. None by default. (use the default configuration) +The environment as below: + +```text +Architecture: x86_64 +CPU op-mode(s): 32-bit, 64-bit +Byte Order: Little Endian +CPU(s): 80 +On-line CPU(s) list: 0-79 +Thread(s) per core: 2 +Core(s) per socket: 20 +Socket(s): 2 +NUMA node(s): 2 +Vendor ID: GenuineIntel +CPU family: 6Model: 85Model name: Intel(R) Xeon(R) Gold 6271C CPU @ 2.60GHz +Stepping: 7 +CPU MHz: 2599.998 +BogoMIPS: 5199.99 +Hypervisor vendor: KVM +Virtualization type: full +L1d cache: 32KL1i cache: 32KL2 cache: 1024K +L3 cache: 33792K +NUMA node0 CPU(s): 0-39 +NUMA node1 CPU(s): 40-79 +Flags: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ss ht syscall nx pdpe1gb rdtscp lm constant_tsc arch_perfmon rep_good nopl xtopology nonstop_tsc eagerfpu pni pclmulqdq monitor ssse3 fma cx16 pcid sse4_1 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave avx f16c rdrand hypervisor lahf_lm abm 3dnowprefetch invpcid_single ssbd ibrs ibpb ibrs_enhanced fsgsbase tsc_adjust bmi1 avx2 smep bmi2 erms invpcid mpx avx512f avx512dq rdseed adx smap clflushopt clwb avx512cd avx512bw avx512vl xsaveopt xsavec xgetbv1 arat umip pku ospke avx512_vnni spec_ctrl arch_capabilities +``` -### setAudioCofig function +## Android -**Must be called before the `init` function** +When to using on Android, please setup your `NDK` enverment before, then do as below: -```c++ -void Vad::setAudioCofig(int sr, int frame_ms, float threshold, int min_silence_duration_ms, int speech_pad_ms); +```bash +# cd /path/to/paddlespeech/runtime +bash build_android.sh ``` -**Parameter** - -> * **sr**(int): sampling rate -> * **frame_ms**(int): The length of each detection frame, and it is used to calculate the detection window size -> * **threshold**(float): Result probability judgment threshold -> * **min_silence_duration_ms**(int): The threshold used to calculate whether it is silence -> * **speech_pad_ms**(int): Used to calculate the end time of the speech +## VAD Interface -### init function +For vad interface please see [](../../engine/vad/interface/). -Used to initialize audio-related parameters. +### Create Handdle ```c++ -void Vad::init(); +PPSHandle_t PPSVadCreateInstance(const char* conf_path); ``` -### loadAudio function - -Load audio. +### Destroy Handdle ```c++ -void Vad::loadAudio(const std::string& wavPath) +int PPSVadDestroyInstance(PPSHandle_t instance); ``` -**Parameter** +### Reset Vad State -> * **wavPath**(str): Audio file path +```c++ +int PPSVadReset(PPSHandle_t instance); +``` -### Predict function +Reset Vad state before processing next `wav`. -Used to start model reasoning. +### Get Chunk Size ```c++ -bool Vad::Predict(); +int PPSVadChunkSizeSamples(PPSHandle_t instance); ``` -### getResult function +This API will return chunk size in `sample` unit. +When do forward, we need feed `chunk size` samples, except last chunk. -**Used to obtain reasoning results** +### Vad Forward + +```c++ +PPSVadState_t PPSVadFeedForward(PPSHandle_t instance, + float* chunk, + int num_element); +``` +Vad has below states: ```c++ -std::vector> Vad::getResult( - float removeThreshold = 1.6, float expandHeadThreshold = 0.32, float expandTailThreshold = 0, - float mergeThreshold = 0.3); +typedef enum { + PPS_VAD_ILLEGAL = 0, // error + PPS_VAD_SIL, // silence + PPS_VAD_START, // start speech + PPS_VAD_SPEECH, // in speech + PPS_VAD_END, // end speech + PPS_VAD_NUMSTATES, // number of states +} PPSVadState_t; ``` -**Parameter** +If `PPSVadFeedForward` occur an error will return `PPS_VAD_ILLEGAL` state. + + +## FastDeploy Runtime + +For FastDeploy software and hardware requements, and pre-released library please to see [FastDeploy](https://github.com/PaddlePaddle/FastDeploy): + +- 1. [FastDeploy Environment Requirements](https://github.com/PaddlePaddle/FastDeploy/docs/en/build_and_install/download_prebuilt_libraries.md). +- 2. [FastDeploy Precompiled Library](https://github.com/PaddlePaddle/FastDeploy/docs/en/build_and_install/download_prebuilt_libraries.md). -> * **removeThreshold**(float): Discard result fragment threshold; If some recognition results are too short, they will be discarded according to this threshold -> * **expandHeadThreshold**(float): Offset at the beginning of the segment; The recognized start time may be too close to the voice part, so move forward the start time accordingly -> * **expandTailThreshold**(float): Offset at the end of the segment; The recognized end time may be too close to the voice part, so the end time is moved back accordingly -> * **mergeThreshold**(float): Some result segments are very close and can be combined into one, and the vocal segments can be combined accordingly -**The output result format is**`std::vector>` +## Download Pre-trained ONNX Model -> Output a list, each element is a speech fragment -> -> Each clip can use 'start' to get the start time and 'end' to get the end time +For developers' testing, model exported by VAD are provided below. Developers can download them directly. -### Tips +| 模型 | 大小 | 备注 | +| :----------------------------------------------------------- | :---- | :----------------------------------------------------------- | +| [silero-vad](https://bj.bcebos.com/paddlehub/fastdeploy/silero_vad.tgz) | 1.8MB | This model file is sourced from [snakers4/silero-vad](https://github.com/snakers4/silero-vad),MIT License | -1. `The setAudioCofig`function must be called before the `init` function -2. The sampling rate of the input audio file must be consistent with that set in the code -- [Model Description](../) -- [How to switch the model inference backend engine](../../../../docs/en/faq/how_to_change_backend.md) +## Reference +* https://github.com/snakers4/silero-vad +* https://github.com/PaddlePaddle/FastDeploy/blob/develop/examples/audio/silero-vad/README.md diff --git a/runtime/examples/vad/README_CN.md b/runtime/examples/vad/README_CN.md deleted file mode 100644 index c45d9896c..000000000 --- a/runtime/examples/vad/README_CN.md +++ /dev/null @@ -1,119 +0,0 @@ -[English](README.md) | 简体中文 -# Silero VAD 部署示例 - -本目录下提供`infer_onnx_silero_vad`快速完成 Silero VAD 模型在CPU/GPU。 - -在部署前,需确认以下两个步骤 - -- 1. 软硬件环境满足要求,参考[FastDeploy环境要求](../../../../docs/cn/build_and_install/download_prebuilt_libraries.md) -- 2. 根据开发环境,下载预编译部署库和samples代码,参考[FastDeploy预编译库](../../../../docs/cn/build_and_install/download_prebuilt_libraries.md) - -以Linux上 VAD 推理为例,在本目录执行如下命令即可完成编译测试。 - -```bash -mkdir build -cd build -# 下载FastDeploy预编译库,用户可在上文提到的`FastDeploy预编译库`中自行选择合适的版本使用 -wget https://bj.bcebos.com/fastdeploy/release/cpp/fastdeploy-linux-x64-x.x.x.tgz -tar xvf fastdeploy-linux-x64-x.x.x.tgz -cmake .. -DFASTDEPLOY_INSTALL_DIR=${PWD}/fastdeploy-linux-x64-x.x.x -make -j - -# 下载 VAD 模型文件和测试音频,解压后将模型和测试音频放置在与 infer_onnx_silero_vad.cc 同级目录下 -wget https://bj.bcebos.com/paddlehub/fastdeploy/silero_vad.tgz -wget https://bj.bcebos.com/paddlehub/fastdeploy/silero_vad_sample.wav - -# 推理 -./infer_onnx_silero_vad ../silero_vad.onnx ../silero_vad_sample.wav -``` - -以上命令只适用于Linux或MacOS, Windows下SDK的使用方式请参考: -- [如何在Windows中使用FastDeploy C++ SDK](../../../../docs/cn/faq/use_sdk_on_windows.md) - -## VAD C++ 接口 -### Vad 类 - -```c++ -Vad::Vad(const std::string& model_file, - const fastdeploy::RuntimeOption& custom_option = fastdeploy::RuntimeOption()) -``` - -**参数** - -> * **model_file**(str): 模型文件路径 -> * **runtime_option**(RuntimeOption): 后端推理配置,默认为None,即采用默认配置 - -### setAudioCofig 函数 - -**必须在`init`函数前调用** - -```c++ -void Vad::setAudioCofig(int sr, int frame_ms, float threshold, int min_silence_duration_ms, int speech_pad_ms); -``` - -**参数** - -> * **sr**(int): 采样率 -> * **frame_ms**(int): 每次检测帧长,用于计算检测窗口大小 -> * **threshold**(float): 结果概率判断阈值 -> * **min_silence_duration_ms**(int): 用于计算判断是否是 silence 的阈值 -> * **speech_pad_ms**(int): 用于计算 speach 结束时刻 - -### init 函数 - -用于初始化音频相关参数 - -```c++ -void Vad::init(); -``` - -### loadAudio 函数 - -加载音频 - -```c++ -void Vad::loadAudio(const std::string& wavPath) -``` - -**参数** - -> * **wavPath**(str): 音频文件路径 - -### Predict 函数 - -用于开始模型推理 - -```c++ -bool Vad::Predict(); -``` - -### getResult 函数 - -**用于获取推理结果** - -```c++ -std::vector> Vad::getResult( - float removeThreshold = 1.6, float expandHeadThreshold = 0.32, float expandTailThreshold = 0, - float mergeThreshold = 0.3); -``` - -**参数** - -> * **removeThreshold**(float): 丢弃结果片段阈值;部分识别结果太短则根据此阈值丢弃 -> * **expandHeadThreshold**(float): 结果片段开始时刻偏移;识别到的开始时刻可能过于贴近发声部分,因此据此前移开始时刻 -> * **expandTailThreshold**(float): 结果片段结束时刻偏移;识别到的结束时刻可能过于贴近发声部分,因此据此后移结束时刻 -> * **mergeThreshold**(float): 有的结果片段十分靠近,可以合并成一个,据此合并发声片段 - -**输出结果格式为**`std::vector>` - -> 输出一个列表,每个元素是一个讲话片段 -> -> 每个片段可以用 'start' 获取到开始时刻,用 'end' 获取到结束时刻 - -### 提示 - -1. `setAudioCofig`函数必须在`init`函数前调用 -2. 输入的音频文件的采样率必须与代码中设置的保持一致 - -- [模型介绍](../) -- [如何切换模型推理后端引擎](../../../../docs/cn/faq/how_to_change_backend.md) diff --git a/runtime/examples/vad/run.sh b/runtime/examples/vad/run.sh index 9707df1bb..606a44f8c 100755 --- a/runtime/examples/vad/run.sh +++ b/runtime/examples/vad/run.sh @@ -15,8 +15,8 @@ exp=exp mkdir -p $exp $data # 1. compile -if [ ! -d ${SPEECHX_BUILD} ]; then - pushd ${SPEECHX_ROOT} +if [ ! -d ${ENGINE_BUILD} ]; then + pushd ${ENGINE_ROOT} bash build.sh # build for android armv8/armv7 @@ -24,8 +24,6 @@ if [ ! -d ${SPEECHX_BUILD} ]; then popd fi -ckpt_dir=$data/silero_vad -wav=$data/silero_vad_sample.wav if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ];then ./local/download.sh