add brpc to use paraformer (#3149)

* add brpc to use paraformer
2 years ago · 8340ad5472
parent ce4af0e765
commit 8340ad5472
62 changed files with 10364 additions and 0 deletions
--- a/runtime/engine/asr/server/brpc/BCLOUD
+++ b/runtime/engine/asr/server/brpc/BCLOUD
@ -0,0 +1,22 @@
+#edit-mode: -*- python -*-
+#coding:gbk
+
+WORKROOT('../../../')
+GCC('gcc82')
+
+CPPFLAGS('-g -DNDEBUG -pipe -W -Wall -Werror -fPIC -Wno-deprecated -Wno-unused-parameter -fno-omit-frame-pointer -D__const__= -std=c++11 -D__STDC_FORMAT_MACROS -DBAIDU_RPC_ENABLE_CPU_PROFILER -DBAIDU_RPC_ENABLE_HEAP_PROFILER')
+LDFLAGS('-lpthread -pthread -lrt -ldl -lz')
+
+CONFIGS('baidu/base/baidu-rpc@stable')
+CONFIGS('baidu/third-party/openssl@openssl_V1.0.2.10_GCC820_6U3_K2_GEN_PD_BL@git_tag')
+CONFIGS('baidu/third-party/tcmalloc@tcmalloc_V2.7.0.7_GCC820_4U3_K3_GEN_PD_BL@git_tag')
+CONFIGS("baidu/third-party/protobuf@protobuf_V2.6.1.1_GCC820_4U3_K3_GEN_PD_BL@git_tag")
+CONFIGS('baidu/base/ullib@stable')
+
+HEADERS(GLOB_GEN_SRCS('./proto/*.pb.h'), '$INC')
+PROTOC(ENV.WorkRoot() + '/third-64/protobuf/bin/protoc')
+PROTOFLAGS('-I../../../third-64/protobuf/include')
+PROTOFLAGS('--proto_path=.', '--cpp_out=.')
+
+
+Directory("paraformerCPP")
--- a/runtime/engine/asr/server/brpc/README.md
+++ b/runtime/engine/asr/server/brpc/README.md
@ -0,0 +1,4 @@
+## Extra Description
+this codes are using BCLOUD to compile, and this method does not satisfy the rule of repos.
+
+
--- a/runtime/engine/asr/server/brpc/paraformerCPP/Audio.cpp
+++ b/runtime/engine/asr/server/brpc/paraformerCPP/Audio.cpp
@ -0,0 +1,232 @@
+#include <math.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <webrtc_vad.h>
+#include "ComDefine.h"
+#include "Audio.h"
+
+using namespace std;
+
+class AudioWindow {
+  private:
+    int *window;
+    int in_idx;
+    int out_idx;
+    int sum;
+    int window_size = 0;
+
+  public:
+    AudioWindow(int window_size) : window_size(window_size)
+    {
+        window = (int *)calloc(sizeof(int), window_size + 1);
+        in_idx = 0;
+        out_idx = 1;
+        sum = 0;
+    };
+    ~AudioWindow(){
+        free(window);
+    };
+    int put(int val)
+    {
+        sum = sum + val - window[out_idx];
+        window[in_idx] = val;
+        in_idx = in_idx == window_size ? 0 : in_idx + 1;
+        out_idx = out_idx == window_size ? 0 : out_idx + 1;
+        return sum;
+    };
+};
+
+AudioFrame::AudioFrame(){};
+AudioFrame::AudioFrame(int len) : len(len)
+{
+    start = 0;
+};
+AudioFrame::~AudioFrame(){};
+int AudioFrame::set_start(int val)
+{
+    start = val < 0 ? 0 : val;
+    return start;
+};
+
+int AudioFrame::set_end(int val, int max_len)
+{
+
+    float num_samples = val - start;
+    float frame_length = 400;
+    float frame_shift = 160;
+    float num_new_samples =
+        ceil((num_samples - frame_length) / frame_shift) * frame_shift + frame_length;
+
+    end = start + num_new_samples;
+    len = (int)num_new_samples;
+    if (end > max_len){
+        printf("frame end > max_len!!!!!!!\n");
+    }
+        
+    return end;
+};
+
+int AudioFrame::get_start()
+{
+    return start;
+};
+
+int AudioFrame::get_len()
+{
+    return len;
+};
+
+int AudioFrame::disp()
+{
+    printf("not imp!!!!\n");
+
+    return 0;
+};
+
+Audio::Audio(int data_type) : data_type(data_type)
+{
+    speech_buff = NULL;
+    align_size = 1360;
+}
+
+Audio::Audio(int data_type, int size) : data_type(data_type)
+{
+    speech_buff = NULL;
+    align_size = (float)size;
+}
+
+Audio::~Audio()
+{
+    if (speech_buff != NULL) {
+        free(speech_buff);
+        speech_data.clear();
+    }
+}
+
+void Audio::disp()
+{
+    printf("Audio time is %f s. len is %d\n", (float)speech_len / 16000,
+           speech_len);
+}
+
+void Audio::loadwavfrommem(AudioFile<float>audio)
+{
+    if (speech_buff != NULL) {
+        free(speech_buff);
+        speech_data.clear();
+    }
+    int wav_length = audio.getNumSamplesPerChannel();
+    int channelNum = audio.getNumChannels();
+
+    speech_len = wav_length * channelNum;
+    printf("wav_length:%d, channelNum: %d", wav_length, channelNum);
+    
+    speech_align_len = (int)(ceil((float)speech_len / align_size) * align_size);
+    speech_buff = (int16_t *)malloc(sizeof(int16_t) * speech_align_len);
+    memset(speech_buff, 0, sizeof(int16_t) * speech_align_len);
+   
+    for (int i = 0; i < wav_length; i++)
+    {
+        for (int channel = 0; channel < channelNum; channel++)
+        {
+            speech_buff[i * channelNum + channel] = (int16_t)(audio.samples[channel][i] * 32768);
+        }
+    }
+    
+    for (int i = 0; i < speech_len; i++) {
+        float temp = (float)speech_buff[i];
+        speech_data.emplace_back(temp);
+    }
+    
+    AudioFrame *frame = new AudioFrame(speech_len);
+    frame_queue.push(frame);
+}
+
+int Audio::fetch(vector<float> &dout, int &len, int &flag)
+{
+    if (frame_queue.size() > 0) {
+        AudioFrame *frame = frame_queue.front();
+        frame_queue.pop();
+        len = frame->get_len();
+        int speech_len = speech_data.size();
+        auto last = min(speech_len, frame->get_start() + len);
+        dout.insert(dout.begin(), speech_data.begin() + frame->get_start(), speech_data.begin() + last);
+        delete frame;
+        flag = S_END;
+        return 1;
+    } else {
+        return 0;
+    }
+}
+
+
+#define UNTRIGGERED 0
+#define TRIGGERED   1
+
+#define SPEECH_LEN_5S  (16000 * 5)
+#define SPEECH_LEN_10S (16000 * 10)
+#define SPEECH_LEN_15S (16000 * 15)
+#define SPEECH_LEN_20S (16000 * 20)
+#define SPEECH_LEN_30S (16000 * 30)
+#define SPEECH_LEN_60S (16000 * 60)
+
+void Audio::split()
+{
+    VadInst *handle = WebRtcVad_Create();
+    WebRtcVad_Init(handle);
+    WebRtcVad_set_mode(handle, 2);
+    int window_size = 10;
+    AudioWindow audiowindow(window_size);
+    int status = UNTRIGGERED;
+    int offset = 0;
+    int fs = 16000;
+    int step = 160;
+
+    AudioFrame *frame;
+
+    frame = frame_queue.front();
+    frame_queue.pop();
+    delete frame;
+
+    while (offset < speech_len - step) {
+        int n = WebRtcVad_Process(handle, fs, speech_buff + offset, step);
+        
+        if (status == UNTRIGGERED && audiowindow.put(n) >= window_size - 1) {
+            frame = new AudioFrame();
+            int start = offset - step * (window_size - 1);
+            frame->set_start(start);
+            status = TRIGGERED;
+        } else if (status == TRIGGERED) {
+            int win_weight = audiowindow.put(n);
+            int voice_len = (offset - frame->get_start());
+            int gap = 0;
+            if (voice_len < SPEECH_LEN_5S) {
+                offset += step;
+                continue;
+            } else if (voice_len < SPEECH_LEN_10S) {
+                gap = 1;
+            } else if (voice_len < SPEECH_LEN_20S) {
+                gap = window_size / 5;
+            } else {
+                gap = window_size - 1;
+            }
+
+            if (win_weight < gap || voice_len >= SPEECH_LEN_15S) {
+                status = UNTRIGGERED;
+                offset = frame->set_end(offset, speech_align_len);
+                frame_queue.push(frame);
+                frame = NULL;
+            }
+        }
+        offset += step;
+    }
+
+    if (frame != NULL) {
+        frame->set_end(speech_len, speech_align_len);
+        frame_queue.push(frame);
+        frame = NULL;
+    }
+    WebRtcVad_Free(handle);
+}
--- a/runtime/engine/asr/server/brpc/paraformerCPP/Audio.h
+++ b/runtime/engine/asr/server/brpc/paraformerCPP/Audio.h
@ -0,0 +1,55 @@
+
+#ifndef AUDIO_H
+#define AUDIO_H
+
+#include <queue>
+#include <stdint.h>
+#include <vector>
+#include <AudioFile.h>
+using namespace std;
+
+class AudioFrame {
+  private:
+    int start;
+    int end;
+    int len;
+
+  public:
+    AudioFrame();
+    AudioFrame(int len);
+
+    ~AudioFrame();
+    int set_start(int val);
+    int set_end(int val, int max_len);
+    int get_start();
+    int get_len();
+    int disp();
+};
+
+class Audio {
+    private:
+        vector<float> speech_data;
+        int16_t *speech_buff;
+        int speech_len;
+        int speech_align_len;
+        int16_t sample_rate;
+        int offset;
+        float align_size;
+        int data_type;
+        queue<AudioFrame *> frame_queue;
+
+    public:
+        vector<float> speech_vec;
+        Audio(int data_type);
+        Audio(int data_type, int size);
+        ~Audio();
+        void disp();
+        void loadwav(const char *filename);
+        void loadwavfrommem(AudioFile<float>audio);
+        int fetch_chunck(float *&dout, int len);
+        int fetch(vector<float> &dout, int &len, int &flag);
+        void padding();
+        void split();
+};
+
+#endif
--- a/runtime/engine/asr/server/brpc/paraformerCPP/AudioFile.h
+++ b/runtime/engine/asr/server/brpc/paraformerCPP/AudioFile.h
--- a/runtime/engine/asr/server/brpc/paraformerCPP/ComDefine.h
+++ b/runtime/engine/asr/server/brpc/paraformerCPP/ComDefine.h
@ -0,0 +1,11 @@
+
+#ifndef COMDEFINE_H
+#define COMDEFINE_H
+
+#define S_BEGIN  0
+#define S_MIDDLE 1
+#define S_END    2
+#define S_ALL    3
+#define S_ERR    4
+
+#endif
--- a/runtime/engine/asr/server/brpc/paraformerCPP/Vocab.cpp
+++ b/runtime/engine/asr/server/brpc/paraformerCPP/Vocab.cpp
@ -0,0 +1,267 @@
+#include "Vocab.h"
+
+#include <fstream>
+#include <iostream>
+#include <list>
+#include <sstream>
+#include <string>
+
+using namespace std;
+
+Vocab::Vocab(const char *filename)
+{
+    ifstream in(filename);
+    string line;
+
+    if (in) // 有该文件
+    {
+        while (getline(in, line)) // line中不包括每行的换行符
+        {
+            vocab.push_back(line);
+        }
+        // cout << vocab[1719] << endl;
+    }
+    // else // 没有该文件
+    //{
+    //     cout << "no such file" << endl;
+    // }
+}
+Vocab::~Vocab()
+{
+}
+
+string Vocab::vector2string(vector<int> in)
+{
+    stringstream ss;
+    for (auto it = in.begin(); it != in.end(); it++) {
+        ss << vocab[*it];
+    }
+
+    return ss.str();
+}
+
+int str2int(string str)
+{
+    const char *ch_array = str.c_str();
+    if (((ch_array[0] & 0xf0) != 0xe0) || ((ch_array[1] & 0xc0) != 0x80) ||
+        ((ch_array[2] & 0xc0) != 0x80))
+        return 0;
+
+    int val = ((ch_array[0] & 0x0f) << 12) | ((ch_array[1] & 0x3f) << 6) |
+              (ch_array[2] & 0x3f);
+    return val;
+}
+
+bool Vocab::isChinese(string ch)
+{
+    if (ch.size() != 3) {
+        return false;
+    }
+
+    int unicode = str2int(ch);
+    if (unicode >= 19968 && unicode <= 40959) {
+        return true;
+    }
+
+    return false;
+}
+
+
+string Vocab::vector2stringV2(vector<int> in)
+{
+    int i;
+    list<string> words;
+
+    int is_pre_english = false;
+    int pre_english_len = 0;
+
+    int is_combining = false;
+    string combine = "";
+
+    for (auto it = in.begin(); it != in.end(); it++) {
+        string word = vocab[*it];
+
+        // step1 space character skips
+        if (word == "<s>" || word == "</s>" || word == "<unk>")
+            continue;
+
+        // step2 combie phoneme to full word
+        {
+            int sub_word = !(word.find("@@") == string::npos);
+
+            // process word start and middle part
+            if (sub_word) {
+                combine += word.erase(word.length() - 2);
+                is_combining = true;
+                continue;
+            }
+            // process word end part
+            else if (is_combining) {
+                combine += word;
+                is_combining = false;
+                word = combine;
+                combine = "";
+            }
+        }
+
+        // step3 process english word deal with space , turn abbreviation to upper case
+        {
+
+            // input word is chinese, not need process 
+            if (isChinese(word)) {
+                words.push_back(word);
+                is_pre_english = false;
+            }
+            // input word is english word
+            else {
+
+                // pre word is chinese
+                if (!is_pre_english) {
+                    word[0] = word[0] - 32;
+                    words.push_back(word);
+                    pre_english_len = word.size();
+
+                }
+
+                // pre word is english word
+                else {
+
+                    // single letter turn to upper case
+                    if (word.size() == 1) {
+                        word[0] = word[0] - 32;
+                    }
+
+                    if (pre_english_len > 1) {
+                        words.push_back(" ");
+                        words.push_back(word);
+                        pre_english_len = word.size();
+                    } 
+                    else {
+                        if (word.size() > 1) {
+                            words.push_back(" ");
+                        }
+                        words.push_back(word);
+                        pre_english_len = word.size();
+                    }
+                }
+
+                is_pre_english = true;
+
+            }
+        }
+    }
+
+    // for (auto it = words.begin(); it != words.end(); it++) {
+    //     cout << *it << endl;
+    // }
+
+    stringstream ss;
+    for (auto it = words.begin(); it != words.end(); it++) {
+        ss << *it;
+    }
+
+    return ss.str();
+}
+
+string Vocab::vector2stringV3(string in)
+{
+    int i;
+    list<string> words;
+    words.push_back(in.c_str());
+    
+    int is_pre_english = false;
+    int pre_english_len = 0;
+
+    int is_combining = false;
+    string combine = "";
+
+    for (auto it = in.begin(); it != in.end(); it++) {
+        string word = vocab[*it];
+
+        // step1 space character skips
+        if (word == "<s>" || word == "</s>" || word == "<unk>")
+            continue;
+
+        // step2 combie phoneme to full word
+        {
+            int sub_word = !(word.find("@@") == string::npos);
+
+            // process word start and middle part
+            if (sub_word) {
+                combine += word.erase(word.length() - 2);
+                is_combining = true;
+                continue;
+            }
+            // process word end part
+            else if (is_combining) {
+                combine += word;
+                is_combining = false;
+                word = combine;
+                combine = "";
+            }
+        }
+
+        // step3 process english word deal with space , turn abbreviation to upper case
+        {
+
+            // input word is chinese, not need process 
+            if (isChinese(word)) {
+                words.push_back(word);
+                is_pre_english = false;
+            }
+            // input word is english word
+            else {
+
+                // pre word is chinese
+                if (!is_pre_english) {
+                    word[0] = word[0] - 32;
+                    words.push_back(word);
+                    pre_english_len = word.size();
+
+                }
+
+                // pre word is english word
+                else {
+
+                    // single letter turn to upper case
+                    if (word.size() == 1) {
+                        word[0] = word[0] - 32;
+                    }
+
+                    if (pre_english_len > 1) {
+                        words.push_back(" ");
+                        words.push_back(word);
+                        pre_english_len = word.size();
+                    } 
+                    else {
+                        if (word.size() > 1) {
+                            words.push_back(" ");
+                        }
+                        words.push_back(word);
+                        pre_english_len = word.size();
+                    }
+                }
+
+                is_pre_english = true;
+
+            }
+        }
+    }
+
+    // for (auto it = words.begin(); it != words.end(); it++) {
+    //     cout << *it << endl;
+    // }
+
+    stringstream ss;
+    for (auto it = words.begin(); it != words.end(); it++) {
+        ss << *it;
+    }
+
+    return ss.str();
+}
+
+
+int Vocab::size()
+{
+    return vocab.size();
+}
--- a/runtime/engine/asr/server/brpc/paraformerCPP/Vocab.h
+++ b/runtime/engine/asr/server/brpc/paraformerCPP/Vocab.h
@ -0,0 +1,24 @@
+#ifndef VOCAB_H
+#define VOCAB_H
+
+#include <stdint.h>
+#include <string>
+#include <vector>
+using namespace std;
+
+class Vocab {
+  private:
+    vector<string> vocab;
+    bool isChinese(string ch);
+    bool isEnglish(string ch);
+
+  public:
+    Vocab(const char *filename);
+    ~Vocab();
+    int size();
+    string vector2string(vector<int> in);
+    string vector2stringV2(vector<int> in);
+    string vector2stringV2(string in);
+};
+
+#endif
--- a/runtime/engine/asr/server/brpc/paraformerCPP/asr.conf
+++ b/runtime/engine/asr/server/brpc/paraformerCPP/asr.conf
@ -0,0 +1,49 @@
+# comlog related params
+##########################################
+# 进程名
+COMLOG_PROCNAME : asr-service
+
+#日志等级
+#[默认配置(uint)]
+COMLOG_LEVEL : 4
+
+#设备数目
+#[默认配置(uint),  COMLOG_DEVICE_NUM : 2]
+COMLOG_DEVICE_NUM : 2
+#设备0 名
+#[默认配置(字符串),  COMLOG_DEVICE0 : FILE]
+COMLOG_DEVICE0 : FILE
+#设备0 名
+#[默认配置(字符串),  COMLOG_DEVICE1 : TTY]
+COMLOG_DEVICE1 : TTY
+
+#设备类型, ULLOG
+#[默认配置(字符串)]
+FILE_TYPE : ULLOG
+FILE_SIZE : 2048
+FILE_SPLITE_TYPE : DATECUT
+FILE_DATA_CRONOCUT : 1
+FILE_RESERVED1 : %Y%m%d%H
+FILE_QUOTA_DAY : 30
+#日志名
+#[默认配置(字符串)]
+FILE_NAME : asr-service.log
+#日志路径
+#[默认配置(字符串),  FILE_PATH : ./log]
+FILE_PATH : ./log
+#是否打开这个设备
+#[默认配置(uint),  FILE_OPEN : 1]
+FILE_OPEN : 1
+
+#设备类型, TTY
+#[默认配置(字符串),  TTY_TYPE : TTY]
+TTY_TYPE : TTY
+#日志名
+#[默认配置(字符串),  TTY_NAME : receiver]
+TTY_NAME : asr-service
+#日志路径
+#[默认配置(字符串),  TTY_PATH : ./log]
+TTY_PATH : ./log
+#是否打开这个设备
+#[默认配置(uint),  TTY_OPEN : 1]
+TTY_OPEN : 1
--- a/runtime/engine/asr/server/brpc/paraformerCPP/client.cc
+++ b/runtime/engine/asr/server/brpc/paraformerCPP/client.cc
@ -0,0 +1,34 @@
+// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "recognizer.h"
+#include <iostream>
+#include <string>
+
+using std::vector;
+
+int main(int argc, char* argv[]) {
+
+        std::string model_file = argv[1];
+        std::string word_symbol_file = argv[2];
+        std::string wav_audio = argv[3];
+        InitRecognizer(model_file, word_symbol_file);
+        int idx = AddRecognizerInstance(); // idx == 0
+        for (int i = 0; i < 50; i++){
+                AcceptWav(wav_audio, idx);
+                std::string result = GetResult(idx);
+                std::cout << "idx:" << idx << "result :" << result << std::endl;
+                Reset(idx);
+        }
+}
--- a/runtime/engine/asr/server/brpc/paraformerCPP/main.cc
+++ b/runtime/engine/asr/server/brpc/paraformerCPP/main.cc
@ -0,0 +1,33 @@
+// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "recognizer.h"
+#include <iostream>
+#include <string>
+
+using std::vector;
+
+int main(int argc, char* argv[]) {
+
+        std::string model_file = "model.onnx";
+        std::string word_symbol_file = "words.txt";
+        std::string wav_audio = argv[1];
+        InitRecognizer(model_file, word_symbol_file);
+
+        AcceptWav(wav_audio);
+        
+        std::string result = GetResult();
+        std::cout << "result :" << result << std::endl;
+        Reset();
+}
--- a/runtime/engine/asr/server/brpc/paraformerCPP/recognizer.h
+++ b/runtime/engine/asr/server/brpc/paraformerCPP/recognizer.h
@ -0,0 +1,28 @@
+
+// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include <string>
+#include <vector>
+
+bool InitRecognizer(const std::string& model_file, 
+                    const std::string& word_symbol_table_file);
+int AddRecognizerInstance(); // instance id is from 0 to size - 1
+int GetRecognizerInstanceSize(); // return size
+void Accept(const std::vector<float>& waves, int instance_id); 
+void AcceptWav(const std::string wav_file, int instance_id);
+std::string GetResult(int instance_id);
+void Reset(int instance_id);
--- a/runtime/engine/asr/server/brpc/paraformerCPP/run.sh
+++ b/runtime/engine/asr/server/brpc/paraformerCPP/run.sh
@ -0,0 +1,2 @@
+export LD_LIBRARY_PATH=./fdlib/lib:$LD_LIBRARY_PATH
+../bin/asrcpuserver --port 8765
--- a/runtime/engine/asr/server/brpc/paraformerCPP/server.cpp
+++ b/runtime/engine/asr/server/brpc/paraformerCPP/server.cpp
@ -0,0 +1,243 @@
+// Copyright (c) 2014 baidu-rpc authors.
+// 
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// 
+//     http://www.apache.org/licenses/LICENSE-2.0
+// 
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// A server to receive EchoRequest and send back EchoResponse.
+
+#include <iostream>
+#ifndef _WIN32
+#include <sys/time.h>
+#else
+#include <win_func.h>
+#endif
+
+
+#include <gflags/gflags.h>
+#include <baas-lib-c/baas.h>
+#include <baas-lib-c/giano_mock_helper.h>
+#include <base/logging.h>
+#include "base/base64.h"
+#include <baidu/rpc/server.h>
+#include <baidu/rpc/policy/giano_authenticator.h>
+#include "echo.pb.h"
+#include <typeinfo>
+#include <stdexcept>
+#include <fstream>
+#include <string>
+#include <com_log.h>
+#include <vector>
+#include "rapidjson/document.h"
+#include "rapidjson/stringbuffer.h"
+#include "rapidjson/writer.h"
+#include "recognizer.h"
+#include <AudioFile.h>
+#include <Audio.h>
+#include <cstdlib>
+
+using namespace std;
+
+DEFINE_int32(port, 8857, "TCP Port of this server");
+DEFINE_string(modelpath, "/home/bae/sourcecode/paddlespeech_cli", "the path of model");
+DEFINE_int32(max_concurrency, 2, "Limit of request processing in parallel");
+
+
+namespace example {
+struct subRes{
+    int s; // start time
+    int e; // end time
+    string t; // text
+};
+class AudioServiceImpl : public EchoService {
+public:
+    AudioServiceImpl() {};
+    virtual ~AudioServiceImpl() {};
+    string to_string(rapidjson::Document& out_doc) {
+        rapidjson::StringBuffer out_buffer;
+        rapidjson::Writer<rapidjson::StringBuffer> out_writer(out_buffer);
+        out_doc.Accept(out_writer);
+        std::string out_params = out_buffer.GetString();
+        return out_params;
+    }
+
+    string convertvector(vector<subRes> result){
+        rapidjson::Document document;
+        document.SetArray();
+        rapidjson::Document::AllocatorType& allocator = document.GetAllocator();
+        for (const auto sub : result) {
+           rapidjson::Value obj(rapidjson::kObjectType);
+           obj.AddMember("s", sub.s, allocator);
+           obj.AddMember("e", sub.e, allocator);
+           obj.AddMember("t", sub.t.c_str(), allocator);
+           document.PushBack(obj, allocator);
+        }
+        rapidjson::StringBuffer strbuf;
+        rapidjson::Writer<rapidjson::StringBuffer> writer(strbuf);
+        document.Accept(writer);
+        return strbuf.GetString();
+    }
+
+    string convertvectorV2(vector<subRes> result){
+        rapidjson::Document document;
+        document.SetObject();
+        rapidjson::Document::AllocatorType& allocator = document.GetAllocator();
+        rapidjson::Value ObjectArray(rapidjson::kArrayType);
+        for (const auto sub : result) {
+           rapidjson::Value obj(rapidjson::kObjectType);
+           obj.AddMember("s", sub.s, allocator);
+           obj.AddMember("e", sub.e, allocator);
+           obj.AddMember("t", sub.t.c_str(), allocator);
+           ObjectArray.PushBack(obj, allocator);
+        }
+        document.AddMember("AllTrans", ObjectArray, allocator);
+        rapidjson::StringBuffer strbuf;
+        rapidjson::Writer<rapidjson::StringBuffer> writer(strbuf);
+        document.Accept(writer);
+        return strbuf.GetString();
+    }
+ 
+    virtual void audiorecognition(google::protobuf::RpcController* cntl_base,
+                      const AudioRequest* request,
+                      AudioResponse* response,
+                      google::protobuf::Closure* done) {
+        // This object helps you to call done->Run() in RAII style. If you need
+        // to process the request asynchronously, pass done_guard.release().
+        baidu::rpc::ClosureGuard done_guard(done); 
+       
+
+        string decode_audio_buffer;
+        base::Base64Decode(request->audio(), &decode_audio_buffer);
+        vector<uint8_t> vec;
+        vec.assign(decode_audio_buffer.begin(), decode_audio_buffer.end());
+        
+        
+        AudioFile<float> a;
+        bool res = a.loadFromMemory(vec);
+        Audio audi(0);
+        audi.loadwavfrommem(a);
+        audi.split();
+
+        vector<float> buff;
+        int len = 0;
+        int flag = 1;
+        vector<subRes> results;
+        int tmp_len = 0;
+        while (audi.fetch(buff, len, flag) > 0) {
+            int do_idx = rand() % 2; //random number [0,1)
+            Accept(buff, do_idx);
+            std::string subtxt = GetResult(do_idx);
+            Reset(do_idx);
+            buff.clear();
+            int start_time = (int)(tmp_len/16000.0 * 1000);
+            int end_time = (int)((tmp_len + len)/16000.0 * 1000);
+            struct subRes subres = {
+                start_time,
+                end_time,
+                subtxt.c_str(),
+            };
+            tmp_len += len;
+            results.push_back(subres);
+            com_writelog(COMLOG_NOTICE, "using process: %d, start: %d, end: %d, result: %s", do_idx, start_time, end_time, subtxt.c_str());
+        }
+        
+        // vector<float> inputAudio;
+        // for (int i = 0; i < a.getNumSamplesPerChannel(); i++)
+        // {
+        //     float tempval = 0.0;
+        //     for (int channel = 0; channel < a.getNumChannels(); channel++)
+        //     {
+        //          tempval += a.samples[channel][i] * 32768;
+        //     }
+        //     inputAudio.emplace_back(tempval);
+        // }
+        
+        // int do_idx = rand() % 2; //random number [0,1)
+        // Accept(inputAudio, do_idx);
+        // std::string result = GetResult(do_idx);
+        // Reset(do_idx);
+        // com_writelog(COMLOG_NOTICE, "using process: %d, result: %s", do_idx, result.c_str());
+        // std::cout << "Result: " << result << std::endl;
+
+        response->set_err_no(0);
+        response->set_err_msg("");
+        string jsonresult = convertvector(results);
+        response->set_result(jsonresult);
+        response->set_cost_time(0);
+        results.clear();
+    }
+};
+}  // namespace example
+
+
+
+int main(int argc, char* argv[]) {
+    // Parse gflags. We recommend you to use gflags as well.
+    google::ParseCommandLineFlags(&argc, &argv, true);
+    bool flag_auth = false;
+    // Setup for `GianoAuthenticator'.
+    std::unique_ptr<baidu::rpc::policy::GianoAuthenticator> auth;
+    if (flag_auth) {
+        if (baas::BAAS_Init() != 0) {
+            LOG(ERROR) << "Fail to init BAAS";
+            return -1;
+        }
+        baas::CredentialVerifier 
+                ver = baas::ServerUtility::CreateCredentialVerifier();
+        auth.reset(new baidu::rpc::policy::GianoAuthenticator(NULL, &ver));
+    }
+
+    int ret = com_loadlog("./", "asr.conf");    
+    if (ret != 0)
+    {
+        fprintf(stderr, "load err\n");
+        return -1;
+    }
+    // 打印日志，线程安全
+    com_writelog(COMLOG_NOTICE, "server start1"); 
+
+
+    // Generally you only need one Server.
+    baidu::rpc::Server server;
+    
+
+    // Instance of your service.
+    example::AudioServiceImpl audio_service_impl;
+
+    // Add the service into server. Notice the second parameter, because the
+    // service is put on stack, we don't want server to delete it, otherwise
+    // use baidu::rpc::SERVER_OWNS_SERVICE.
+    if (server.AddService(&audio_service_impl, 
+                          baidu::rpc::SERVER_DOESNT_OWN_SERVICE,
+                          "/v1/audiorecognition => audiorecognition") != 0) {
+        LOG(ERROR) << "Fail to add service";
+        return -1;
+    }
+    InitRecognizer("model.onnx", "words.txt");
+    for (int i =0 ;i<= 1; i++){
+        int idx = AddRecognizerInstance(); 
+    }
+    
+    srand((unsigned)time(NULL));
+    // Start the server.
+    baidu::rpc::ServerOptions options;
+    options.idle_timeout_sec = -1;
+    options.auth = auth.get();
+    options.max_concurrency = FLAGS_max_concurrency;
+    if (server.Start(FLAGS_port, &options) != 0) {
+        LOG(ERROR) << "Fail to start EchoServer";
+        return -1;
+    }
+
+    // Wait until Ctrl-C is pressed, then Stop() and Join() the server.
+    server.RunUntilAskedToQuit();
+    return 0;
+}
--- a/runtime/engine/asr/server/brpc/paraformerCPP/webrtc/CMakeLists.txt
+++ b/runtime/engine/asr/server/brpc/paraformerCPP/webrtc/CMakeLists.txt
@ -0,0 +1,16 @@
+
+
+if(WIN32)
+    add_definitions(-DWEBRTC_WIN)
+else()
+    add_definitions(-DWEBRTC_POSIX)
+endif()
+
+
+include_directories("..")
+
+file(GLOB_RECURSE files "*.c" "rtc_base/checks.cc")
+
+message("${files}")
+
+add_library(webrtcvad ${files})
--- a/runtime/engine/asr/server/brpc/paraformerCPP/webrtc/common_audio/signal_processing/complex_bit_reverse.c
+++ b/runtime/engine/asr/server/brpc/paraformerCPP/webrtc/common_audio/signal_processing/complex_bit_reverse.c
@ -0,0 +1,108 @@
+/*
+ *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
+
+/* Tables for data buffer indexes that are bit reversed and thus need to be
+ * swapped. Note that, index_7[{0, 2, 4, ...}] are for the left side of the swap
+ * operations, while index_7[{1, 3, 5, ...}] are for the right side of the
+ * operation. Same for index_8.
+ */
+
+/* Indexes for the case of stages == 7. */
+static const int16_t index_7[112] = {
+  1, 64, 2, 32, 3, 96, 4, 16, 5, 80, 6, 48, 7, 112, 9, 72, 10, 40, 11, 104,
+  12, 24, 13, 88, 14, 56, 15, 120, 17, 68, 18, 36, 19, 100, 21, 84, 22, 52,
+  23, 116, 25, 76, 26, 44, 27, 108, 29, 92, 30, 60, 31, 124, 33, 66, 35, 98,
+  37, 82, 38, 50, 39, 114, 41, 74, 43, 106, 45, 90, 46, 58, 47, 122, 49, 70,
+  51, 102, 53, 86, 55, 118, 57, 78, 59, 110, 61, 94, 63, 126, 67, 97, 69,
+  81, 71, 113, 75, 105, 77, 89, 79, 121, 83, 101, 87, 117, 91, 109, 95, 125,
+  103, 115, 111, 123
+};
+
+/* Indexes for the case of stages == 8. */
+static const int16_t index_8[240] = {
+  1, 128, 2, 64, 3, 192, 4, 32, 5, 160, 6, 96, 7, 224, 8, 16, 9, 144, 10, 80,
+  11, 208, 12, 48, 13, 176, 14, 112, 15, 240, 17, 136, 18, 72, 19, 200, 20,
+  40, 21, 168, 22, 104, 23, 232, 25, 152, 26, 88, 27, 216, 28, 56, 29, 184,
+  30, 120, 31, 248, 33, 132, 34, 68, 35, 196, 37, 164, 38, 100, 39, 228, 41,
+  148, 42, 84, 43, 212, 44, 52, 45, 180, 46, 116, 47, 244, 49, 140, 50, 76,
+  51, 204, 53, 172, 54, 108, 55, 236, 57, 156, 58, 92, 59, 220, 61, 188, 62,
+  124, 63, 252, 65, 130, 67, 194, 69, 162, 70, 98, 71, 226, 73, 146, 74, 82,
+  75, 210, 77, 178, 78, 114, 79, 242, 81, 138, 83, 202, 85, 170, 86, 106, 87,
+  234, 89, 154, 91, 218, 93, 186, 94, 122, 95, 250, 97, 134, 99, 198, 101,
+  166, 103, 230, 105, 150, 107, 214, 109, 182, 110, 118, 111, 246, 113, 142,
+  115, 206, 117, 174, 119, 238, 121, 158, 123, 222, 125, 190, 127, 254, 131,
+  193, 133, 161, 135, 225, 137, 145, 139, 209, 141, 177, 143, 241, 147, 201,
+  149, 169, 151, 233, 155, 217, 157, 185, 159, 249, 163, 197, 167, 229, 171,
+  213, 173, 181, 175, 245, 179, 205, 183, 237, 187, 221, 191, 253, 199, 227,
+  203, 211, 207, 243, 215, 235, 223, 251, 239, 247
+};
+
+void WebRtcSpl_ComplexBitReverse(int16_t* __restrict complex_data, int stages) {
+  /* For any specific value of stages, we know exactly the indexes that are
+   * bit reversed. Currently (Feb. 2012) in WebRTC the only possible values of
+   * stages are 7 and 8, so we use tables to save unnecessary iterations and
+   * calculations for these two cases.
+   */
+  if (stages == 7 || stages == 8) {
+    int m = 0;
+    int length = 112;
+    const int16_t* index = index_7;
+
+    if (stages == 8) {
+      length = 240;
+      index = index_8;
+    }
+
+    /* Decimation in time. Swap the elements with bit-reversed indexes. */
+    for (m = 0; m < length; m += 2) {
+      /* We declare a int32_t* type pointer, to load both the 16-bit real
+       * and imaginary elements from complex_data in one instruction, reducing
+       * complexity.
+       */
+      int32_t* complex_data_ptr = (int32_t*)complex_data;
+      int32_t temp = 0;
+
+      temp = complex_data_ptr[index[m]];  /* Real and imaginary */
+      complex_data_ptr[index[m]] = complex_data_ptr[index[m + 1]];
+      complex_data_ptr[index[m + 1]] = temp;
+    }
+  }
+  else {
+    int m = 0, mr = 0, l = 0;
+    int n = 1 << stages;
+    int nn = n - 1;
+
+    /* Decimation in time - re-order data */
+    for (m = 1; m <= nn; ++m) {
+      int32_t* complex_data_ptr = (int32_t*)complex_data;
+      int32_t temp = 0;
+
+      /* Find out indexes that are bit-reversed. */
+      l = n;
+      do {
+        l >>= 1;
+      } while (l > nn - mr);
+      mr = (mr & (l - 1)) + l;
+
+      if (mr <= m) {
+        continue;
+      }
+
+      /* Swap the elements with bit-reversed indexes.
+       * This is similar to the loop in the stages == 7 or 8 cases.
+       */
+      temp = complex_data_ptr[m];  /* Real and imaginary */
+      complex_data_ptr[m] = complex_data_ptr[mr];
+      complex_data_ptr[mr] = temp;
+    }
+  }
+}
--- a/runtime/engine/asr/server/brpc/paraformerCPP/webrtc/common_audio/signal_processing/complex_fft.c
+++ b/runtime/engine/asr/server/brpc/paraformerCPP/webrtc/common_audio/signal_processing/complex_fft.c
@ -0,0 +1,299 @@
+/*
+ *  Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+/*
+ * This file contains the function WebRtcSpl_ComplexFFT().
+ * The description header can be found in signal_processing_library.h
+ *
+ */
+
+#include "webrtc/common_audio/signal_processing/complex_fft_tables.h"
+#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
+#include "webrtc/rtc_base/system/arch.h"
+
+#define CFFTSFT 14
+#define CFFTRND 1
+#define CFFTRND2 16384
+
+#define CIFFTSFT 14
+#define CIFFTRND 1
+
+
+int WebRtcSpl_ComplexFFT(int16_t frfi[], int stages, int mode)
+{
+    int i, j, l, k, istep, n, m;
+    int16_t wr, wi;
+    int32_t tr32, ti32, qr32, qi32;
+
+    /* The 1024-value is a constant given from the size of kSinTable1024[],
+     * and should not be changed depending on the input parameter 'stages'
+     */
+    n = 1 << stages;
+    if (n > 1024)
+        return -1;
+
+    l = 1;
+    k = 10 - 1; /* Constant for given kSinTable1024[]. Do not change
+         depending on the input parameter 'stages' */
+
+    if (mode == 0)
+    {
+        // mode==0: Low-complexity and Low-accuracy mode
+        while (l < n)
+        {
+            istep = l << 1;
+
+            for (m = 0; m < l; ++m)
+            {
+                j = m << k;
+
+                /* The 256-value is a constant given as 1/4 of the size of
+                 * kSinTable1024[], and should not be changed depending on the input
+                 * parameter 'stages'. It will result in 0 <= j < N_SINE_WAVE/2
+                 */
+                wr = kSinTable1024[j + 256];
+                wi = -kSinTable1024[j];
+
+                for (i = m; i < n; i += istep)
+                {
+                    j = i + l;
+
+                    tr32 = (wr * frfi[2 * j] - wi * frfi[2 * j + 1]) >> 15;
+
+                    ti32 = (wr * frfi[2 * j + 1] + wi * frfi[2 * j]) >> 15;
+
+                    qr32 = (int32_t)frfi[2 * i];
+                    qi32 = (int32_t)frfi[2 * i + 1];
+                    frfi[2 * j] = (int16_t)((qr32 - tr32) >> 1);
+                    frfi[2 * j + 1] = (int16_t)((qi32 - ti32) >> 1);
+                    frfi[2 * i] = (int16_t)((qr32 + tr32) >> 1);
+                    frfi[2 * i + 1] = (int16_t)((qi32 + ti32) >> 1);
+                }
+            }
+
+            --k;
+            l = istep;
+
+        }
+
+    } else
+    {
+        // mode==1: High-complexity and High-accuracy mode
+        while (l < n)
+        {
+            istep = l << 1;
+
+            for (m = 0; m < l; ++m)
+            {
+                j = m << k;
+
+                /* The 256-value is a constant given as 1/4 of the size of
+                 * kSinTable1024[], and should not be changed depending on the input
+                 * parameter 'stages'. It will result in 0 <= j < N_SINE_WAVE/2
+                 */
+                wr = kSinTable1024[j + 256];
+                wi = -kSinTable1024[j];
+
+#ifdef WEBRTC_ARCH_ARM_V7
+                int32_t wri = 0;
+                __asm __volatile("pkhbt %0, %1, %2, lsl #16" : "=r"(wri) :
+                    "r"((int32_t)wr), "r"((int32_t)wi));
+#endif
+
+                for (i = m; i < n; i += istep)
+                {
+                    j = i + l;
+
+#ifdef WEBRTC_ARCH_ARM_V7
+                    register int32_t frfi_r;
+                    __asm __volatile(
+                        "pkhbt %[frfi_r], %[frfi_even], %[frfi_odd],"
+                        " lsl #16\n\t"
+                        "smlsd %[tr32], %[wri], %[frfi_r], %[cfftrnd]\n\t"
+                        "smladx %[ti32], %[wri], %[frfi_r], %[cfftrnd]\n\t"
+                        :[frfi_r]"=&r"(frfi_r),
+                         [tr32]"=&r"(tr32),
+                         [ti32]"=r"(ti32)
+                        :[frfi_even]"r"((int32_t)frfi[2*j]),
+                         [frfi_odd]"r"((int32_t)frfi[2*j +1]),
+                         [wri]"r"(wri),
+                         [cfftrnd]"r"(CFFTRND));
+#else
+                    tr32 = wr * frfi[2 * j] - wi * frfi[2 * j + 1] + CFFTRND;
+
+                    ti32 = wr * frfi[2 * j + 1] + wi * frfi[2 * j] + CFFTRND;
+#endif
+
+                    tr32 >>= 15 - CFFTSFT;
+                    ti32 >>= 15 - CFFTSFT;
+
+                    qr32 = ((int32_t)frfi[2 * i]) * (1 << CFFTSFT);
+                    qi32 = ((int32_t)frfi[2 * i + 1]) * (1 << CFFTSFT);
+
+                    frfi[2 * j] = (int16_t)(
+                        (qr32 - tr32 + CFFTRND2) >> (1 + CFFTSFT));
+                    frfi[2 * j + 1] = (int16_t)(
+                        (qi32 - ti32 + CFFTRND2) >> (1 + CFFTSFT));
+                    frfi[2 * i] = (int16_t)(
+                        (qr32 + tr32 + CFFTRND2) >> (1 + CFFTSFT));
+                    frfi[2 * i + 1] = (int16_t)(
+                        (qi32 + ti32 + CFFTRND2) >> (1 + CFFTSFT));
+                }
+            }
+
+            --k;
+            l = istep;
+        }
+    }
+    return 0;
+}
+
+int WebRtcSpl_ComplexIFFT(int16_t frfi[], int stages, int mode)
+{
+    size_t i, j, l, istep, n, m;
+    int k, scale, shift;
+    int16_t wr, wi;
+    int32_t tr32, ti32, qr32, qi32;
+    int32_t tmp32, round2;
+
+    /* The 1024-value is a constant given from the size of kSinTable1024[],
+     * and should not be changed depending on the input parameter 'stages'
+     */
+    n = ((size_t)1) << stages;
+    if (n > 1024)
+        return -1;
+
+    scale = 0;
+
+    l = 1;
+    k = 10 - 1; /* Constant for given kSinTable1024[]. Do not change
+         depending on the input parameter 'stages' */
+
+    while (l < n)
+    {
+        // variable scaling, depending upon data
+        shift = 0;
+        round2 = 8192;
+
+        tmp32 = WebRtcSpl_MaxAbsValueW16(frfi, 2 * n);
+        if (tmp32 > 13573)
+        {
+            shift++;
+            scale++;
+            round2 <<= 1;
+        }
+        if (tmp32 > 27146)
+        {
+            shift++;
+            scale++;
+            round2 <<= 1;
+        }
+
+        istep = l << 1;
+
+        if (mode == 0)
+        {
+            // mode==0: Low-complexity and Low-accuracy mode
+            for (m = 0; m < l; ++m)
+            {
+                j = m << k;
+
+                /* The 256-value is a constant given as 1/4 of the size of
+                 * kSinTable1024[], and should not be changed depending on the input
+                 * parameter 'stages'. It will result in 0 <= j < N_SINE_WAVE/2
+                 */
+                wr = kSinTable1024[j + 256];
+                wi = kSinTable1024[j];
+
+                for (i = m; i < n; i += istep)
+                {
+                    j = i + l;
+
+                    tr32 = (wr * frfi[2 * j] - wi * frfi[2 * j + 1]) >> 15;
+
+                    ti32 = (wr * frfi[2 * j + 1] + wi * frfi[2 * j]) >> 15;
+
+                    qr32 = (int32_t)frfi[2 * i];
+                    qi32 = (int32_t)frfi[2 * i + 1];
+                    frfi[2 * j] = (int16_t)((qr32 - tr32) >> shift);
+                    frfi[2 * j + 1] = (int16_t)((qi32 - ti32) >> shift);
+                    frfi[2 * i] = (int16_t)((qr32 + tr32) >> shift);
+                    frfi[2 * i + 1] = (int16_t)((qi32 + ti32) >> shift);
+                }
+            }
+        } else
+        {
+            // mode==1: High-complexity and High-accuracy mode
+
+            for (m = 0; m < l; ++m)
+            {
+                j = m << k;
+
+                /* The 256-value is a constant given as 1/4 of the size of
+                 * kSinTable1024[], and should not be changed depending on the input
+                 * parameter 'stages'. It will result in 0 <= j < N_SINE_WAVE/2
+                 */
+                wr = kSinTable1024[j + 256];
+                wi = kSinTable1024[j];
+
+#ifdef WEBRTC_ARCH_ARM_V7
+                int32_t wri = 0;
+                __asm __volatile("pkhbt %0, %1, %2, lsl #16" : "=r"(wri) :
+                    "r"((int32_t)wr), "r"((int32_t)wi));
+#endif
+
+                for (i = m; i < n; i += istep)
+                {
+                    j = i + l;
+
+#ifdef WEBRTC_ARCH_ARM_V7
+                    register int32_t frfi_r;
+                    __asm __volatile(
+                      "pkhbt %[frfi_r], %[frfi_even], %[frfi_odd], lsl #16\n\t"
+                      "smlsd %[tr32], %[wri], %[frfi_r], %[cifftrnd]\n\t"
+                      "smladx %[ti32], %[wri], %[frfi_r], %[cifftrnd]\n\t"
+                      :[frfi_r]"=&r"(frfi_r),
+                       [tr32]"=&r"(tr32),
+                       [ti32]"=r"(ti32)
+                      :[frfi_even]"r"((int32_t)frfi[2*j]),
+                       [frfi_odd]"r"((int32_t)frfi[2*j +1]),
+                       [wri]"r"(wri),
+                       [cifftrnd]"r"(CIFFTRND)
+                    );
+#else
+
+                    tr32 = wr * frfi[2 * j] - wi * frfi[2 * j + 1] + CIFFTRND;
+
+                    ti32 = wr * frfi[2 * j + 1] + wi * frfi[2 * j] + CIFFTRND;
+#endif
+                    tr32 >>= 15 - CIFFTSFT;
+                    ti32 >>= 15 - CIFFTSFT;
+
+                    qr32 = ((int32_t)frfi[2 * i]) * (1 << CIFFTSFT);
+                    qi32 = ((int32_t)frfi[2 * i + 1]) * (1 << CIFFTSFT);
+
+                    frfi[2 * j] = (int16_t)(
+                        (qr32 - tr32 + round2) >> (shift + CIFFTSFT));
+                    frfi[2 * j + 1] = (int16_t)(
+                        (qi32 - ti32 + round2) >> (shift + CIFFTSFT));
+                    frfi[2 * i] = (int16_t)(
+                        (qr32 + tr32 + round2) >> (shift + CIFFTSFT));
+                    frfi[2 * i + 1] = (int16_t)(
+                        (qi32 + ti32 + round2) >> (shift + CIFFTSFT));
+                }
+            }
+
+        }
+        --k;
+        l = istep;
+    }
+    return scale;
+}
--- a/runtime/engine/asr/server/brpc/paraformerCPP/webrtc/common_audio/signal_processing/complex_fft_tables.h
+++ b/runtime/engine/asr/server/brpc/paraformerCPP/webrtc/common_audio/signal_processing/complex_fft_tables.h
@ -0,0 +1,132 @@
+/*
+ *  Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef COMMON_AUDIO_SIGNAL_PROCESSING_COMPLEX_FFT_TABLES_H_
+#define COMMON_AUDIO_SIGNAL_PROCESSING_COMPLEX_FFT_TABLES_H_
+
+#include <stdint.h>
+
+static const int16_t kSinTable1024[] = {
+    0,      201,    402,    603,    804,    1005,   1206,   1406,   1607,
+    1808,   2009,   2209,   2410,   2610,   2811,   3011,   3211,   3411,
+    3611,   3811,   4011,   4210,   4409,   4608,   4807,   5006,   5205,
+    5403,   5601,   5799,   5997,   6195,   6392,   6589,   6786,   6982,
+    7179,   7375,   7571,   7766,   7961,   8156,   8351,   8545,   8739,
+    8932,   9126,   9319,   9511,   9703,   9895,   10087,  10278,  10469,
+    10659,  10849,  11038,  11227,  11416,  11604,  11792,  11980,  12166,
+    12353,  12539,  12724,  12909,  13094,  13278,  13462,  13645,  13827,
+    14009,  14191,  14372,  14552,  14732,  14911,  15090,  15268,  15446,
+    15623,  15799,  15975,  16150,  16325,  16499,  16672,  16845,  17017,
+    17189,  17360,  17530,  17699,  17868,  18036,  18204,  18371,  18537,
+    18702,  18867,  19031,  19194,  19357,  19519,  19680,  19840,  20000,
+    20159,  20317,  20474,  20631,  20787,  20942,  21096,  21249,  21402,
+    21554,  21705,  21855,  22004,  22153,  22301,  22448,  22594,  22739,
+    22883,  23027,  23169,  23311,  23452,  23592,  23731,  23869,  24006,
+    24143,  24278,  24413,  24546,  24679,  24811,  24942,  25072,  25201,
+    25329,  25456,  25582,  25707,  25831,  25954,  26077,  26198,  26318,
+    26437,  26556,  26673,  26789,  26905,  27019,  27132,  27244,  27355,
+    27466,  27575,  27683,  27790,  27896,  28001,  28105,  28208,  28309,
+    28410,  28510,  28608,  28706,  28802,  28897,  28992,  29085,  29177,
+    29268,  29358,  29446,  29534,  29621,  29706,  29790,  29873,  29955,
+    30036,  30116,  30195,  30272,  30349,  30424,  30498,  30571,  30643,
+    30713,  30783,  30851,  30918,  30984,  31049,  31113,  31175,  31236,
+    31297,  31356,  31413,  31470,  31525,  31580,  31633,  31684,  31735,
+    31785,  31833,  31880,  31926,  31970,  32014,  32056,  32097,  32137,
+    32176,  32213,  32249,  32284,  32318,  32350,  32382,  32412,  32441,
+    32468,  32495,  32520,  32544,  32567,  32588,  32609,  32628,  32646,
+    32662,  32678,  32692,  32705,  32717,  32727,  32736,  32744,  32751,
+    32757,  32761,  32764,  32766,  32767,  32766,  32764,  32761,  32757,
+    32751,  32744,  32736,  32727,  32717,  32705,  32692,  32678,  32662,
+    32646,  32628,  32609,  32588,  32567,  32544,  32520,  32495,  32468,
+    32441,  32412,  32382,  32350,  32318,  32284,  32249,  32213,  32176,
+    32137,  32097,  32056,  32014,  31970,  31926,  31880,  31833,  31785,
+    31735,  31684,  31633,  31580,  31525,  31470,  31413,  31356,  31297,
+    31236,  31175,  31113,  31049,  30984,  30918,  30851,  30783,  30713,
+    30643,  30571,  30498,  30424,  30349,  30272,  30195,  30116,  30036,
+    29955,  29873,  29790,  29706,  29621,  29534,  29446,  29358,  29268,
+    29177,  29085,  28992,  28897,  28802,  28706,  28608,  28510,  28410,
+    28309,  28208,  28105,  28001,  27896,  27790,  27683,  27575,  27466,
+    27355,  27244,  27132,  27019,  26905,  26789,  26673,  26556,  26437,
+    26318,  26198,  26077,  25954,  25831,  25707,  25582,  25456,  25329,
+    25201,  25072,  24942,  24811,  24679,  24546,  24413,  24278,  24143,
+    24006,  23869,  23731,  23592,  23452,  23311,  23169,  23027,  22883,
+    22739,  22594,  22448,  22301,  22153,  22004,  21855,  21705,  21554,
+    21402,  21249,  21096,  20942,  20787,  20631,  20474,  20317,  20159,
+    20000,  19840,  19680,  19519,  19357,  19194,  19031,  18867,  18702,
+    18537,  18371,  18204,  18036,  17868,  17699,  17530,  17360,  17189,
+    17017,  16845,  16672,  16499,  16325,  16150,  15975,  15799,  15623,
+    15446,  15268,  15090,  14911,  14732,  14552,  14372,  14191,  14009,
+    13827,  13645,  13462,  13278,  13094,  12909,  12724,  12539,  12353,
+    12166,  11980,  11792,  11604,  11416,  11227,  11038,  10849,  10659,
+    10469,  10278,  10087,  9895,   9703,   9511,   9319,   9126,   8932,
+    8739,   8545,   8351,   8156,   7961,   7766,   7571,   7375,   7179,
+    6982,   6786,   6589,   6392,   6195,   5997,   5799,   5601,   5403,
+    5205,   5006,   4807,   4608,   4409,   4210,   4011,   3811,   3611,
+    3411,   3211,   3011,   2811,   2610,   2410,   2209,   2009,   1808,
+    1607,   1406,   1206,   1005,   804,    603,    402,    201,    0,
+    -201,   -402,   -603,   -804,   -1005,  -1206,  -1406,  -1607,  -1808,
+    -2009,  -2209,  -2410,  -2610,  -2811,  -3011,  -3211,  -3411,  -3611,
+    -3811,  -4011,  -4210,  -4409,  -4608,  -4807,  -5006,  -5205,  -5403,
+    -5601,  -5799,  -5997,  -6195,  -6392,  -6589,  -6786,  -6982,  -7179,
+    -7375,  -7571,  -7766,  -7961,  -8156,  -8351,  -8545,  -8739,  -8932,
+    -9126,  -9319,  -9511,  -9703,  -9895,  -10087, -10278, -10469, -10659,
+    -10849, -11038, -11227, -11416, -11604, -11792, -11980, -12166, -12353,
+    -12539, -12724, -12909, -13094, -13278, -13462, -13645, -13827, -14009,
+    -14191, -14372, -14552, -14732, -14911, -15090, -15268, -15446, -15623,
+    -15799, -15975, -16150, -16325, -16499, -16672, -16845, -17017, -17189,
+    -17360, -17530, -17699, -17868, -18036, -18204, -18371, -18537, -18702,
+    -18867, -19031, -19194, -19357, -19519, -19680, -19840, -20000, -20159,
+    -20317, -20474, -20631, -20787, -20942, -21096, -21249, -21402, -21554,
+    -21705, -21855, -22004, -22153, -22301, -22448, -22594, -22739, -22883,
+    -23027, -23169, -23311, -23452, -23592, -23731, -23869, -24006, -24143,
+    -24278, -24413, -24546, -24679, -24811, -24942, -25072, -25201, -25329,
+    -25456, -25582, -25707, -25831, -25954, -26077, -26198, -26318, -26437,
+    -26556, -26673, -26789, -26905, -27019, -27132, -27244, -27355, -27466,
+    -27575, -27683, -27790, -27896, -28001, -28105, -28208, -28309, -28410,
+    -28510, -28608, -28706, -28802, -28897, -28992, -29085, -29177, -29268,
+    -29358, -29446, -29534, -29621, -29706, -29790, -29873, -29955, -30036,
+    -30116, -30195, -30272, -30349, -30424, -30498, -30571, -30643, -30713,
+    -30783, -30851, -30918, -30984, -31049, -31113, -31175, -31236, -31297,
+    -31356, -31413, -31470, -31525, -31580, -31633, -31684, -31735, -31785,
+    -31833, -31880, -31926, -31970, -32014, -32056, -32097, -32137, -32176,
+    -32213, -32249, -32284, -32318, -32350, -32382, -32412, -32441, -32468,
+    -32495, -32520, -32544, -32567, -32588, -32609, -32628, -32646, -32662,
+    -32678, -32692, -32705, -32717, -32727, -32736, -32744, -32751, -32757,
+    -32761, -32764, -32766, -32767, -32766, -32764, -32761, -32757, -32751,
+    -32744, -32736, -32727, -32717, -32705, -32692, -32678, -32662, -32646,
+    -32628, -32609, -32588, -32567, -32544, -32520, -32495, -32468, -32441,
+    -32412, -32382, -32350, -32318, -32284, -32249, -32213, -32176, -32137,
+    -32097, -32056, -32014, -31970, -31926, -31880, -31833, -31785, -31735,
+    -31684, -31633, -31580, -31525, -31470, -31413, -31356, -31297, -31236,
+    -31175, -31113, -31049, -30984, -30918, -30851, -30783, -30713, -30643,
+    -30571, -30498, -30424, -30349, -30272, -30195, -30116, -30036, -29955,
+    -29873, -29790, -29706, -29621, -29534, -29446, -29358, -29268, -29177,
+    -29085, -28992, -28897, -28802, -28706, -28608, -28510, -28410, -28309,
+    -28208, -28105, -28001, -27896, -27790, -27683, -27575, -27466, -27355,
+    -27244, -27132, -27019, -26905, -26789, -26673, -26556, -26437, -26318,
+    -26198, -26077, -25954, -25831, -25707, -25582, -25456, -25329, -25201,
+    -25072, -24942, -24811, -24679, -24546, -24413, -24278, -24143, -24006,
+    -23869, -23731, -23592, -23452, -23311, -23169, -23027, -22883, -22739,
+    -22594, -22448, -22301, -22153, -22004, -21855, -21705, -21554, -21402,
+    -21249, -21096, -20942, -20787, -20631, -20474, -20317, -20159, -20000,
+    -19840, -19680, -19519, -19357, -19194, -19031, -18867, -18702, -18537,
+    -18371, -18204, -18036, -17868, -17699, -17530, -17360, -17189, -17017,
+    -16845, -16672, -16499, -16325, -16150, -15975, -15799, -15623, -15446,
+    -15268, -15090, -14911, -14732, -14552, -14372, -14191, -14009, -13827,
+    -13645, -13462, -13278, -13094, -12909, -12724, -12539, -12353, -12166,
+    -11980, -11792, -11604, -11416, -11227, -11038, -10849, -10659, -10469,
+    -10278, -10087, -9895,  -9703,  -9511,  -9319,  -9126,  -8932,  -8739,
+    -8545,  -8351,  -8156,  -7961,  -7766,  -7571,  -7375,  -7179,  -6982,
+    -6786,  -6589,  -6392,  -6195,  -5997,  -5799,  -5601,  -5403,  -5205,
+    -5006,  -4807,  -4608,  -4409,  -4210,  -4011,  -3811,  -3611,  -3411,
+    -3211,  -3011,  -2811,  -2610,  -2410,  -2209,  -2009,  -1808,  -1607,
+    -1406,  -1206,  -1005,  -804,   -603,   -402,   -201};
+
+#endif  // COMMON_AUDIO_SIGNAL_PROCESSING_COMPLEX_FFT_TABLES_H_
--- a/runtime/engine/asr/server/brpc/paraformerCPP/webrtc/common_audio/signal_processing/cross_correlation.c
+++ b/runtime/engine/asr/server/brpc/paraformerCPP/webrtc/common_audio/signal_processing/cross_correlation.c
@ -0,0 +1,30 @@
+/*
+ *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
+
+/* C version of WebRtcSpl_CrossCorrelation() for generic platforms. */
+void WebRtcSpl_CrossCorrelationC(int32_t* cross_correlation,
+                                 const int16_t* seq1,
+                                 const int16_t* seq2,
+                                 size_t dim_seq,
+                                 size_t dim_cross_correlation,
+                                 int right_shifts,
+                                 int step_seq2) {
+  size_t i = 0, j = 0;
+
+  for (i = 0; i < dim_cross_correlation; i++) {
+    int32_t corr = 0;
+    for (j = 0; j < dim_seq; j++)
+      corr += (seq1[j] * seq2[j]) >> right_shifts;
+    seq2 += step_seq2;
+    *cross_correlation++ = corr;
+  }
+}
--- a/runtime/engine/asr/server/brpc/paraformerCPP/webrtc/common_audio/signal_processing/division_operations.c
+++ b/runtime/engine/asr/server/brpc/paraformerCPP/webrtc/common_audio/signal_processing/division_operations.c
@ -0,0 +1,141 @@
+/*
+ *  Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+/*
+ * This file contains implementations of the divisions
+ * WebRtcSpl_DivU32U16()
+ * WebRtcSpl_DivW32W16()
+ * WebRtcSpl_DivW32W16ResW16()
+ * WebRtcSpl_DivResultInQ31()
+ * WebRtcSpl_DivW32HiLow()
+ *
+ * The description header can be found in signal_processing_library.h
+ *
+ */
+
+#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
+#include "webrtc/rtc_base/sanitizer.h"
+
+uint32_t WebRtcSpl_DivU32U16(uint32_t num, uint16_t den)
+{
+    // Guard against division with 0
+    if (den != 0)
+    {
+        return (uint32_t)(num / den);
+    } else
+    {
+        return (uint32_t)0xFFFFFFFF;
+    }
+}
+
+int32_t WebRtcSpl_DivW32W16(int32_t num, int16_t den)
+{
+    // Guard against division with 0
+    if (den != 0)
+    {
+        return (int32_t)(num / den);
+    } else
+    {
+        return (int32_t)0x7FFFFFFF;
+    }
+}
+
+int16_t WebRtcSpl_DivW32W16ResW16(int32_t num, int16_t den)
+{
+    // Guard against division with 0
+    if (den != 0)
+    {
+        return (int16_t)(num / den);
+    } else
+    {
+        return (int16_t)0x7FFF;
+    }
+}
+
+int32_t WebRtcSpl_DivResultInQ31(int32_t num, int32_t den)
+{
+    int32_t L_num = num;
+    int32_t L_den = den;
+    int32_t div = 0;
+    int k = 31;
+    int change_sign = 0;
+
+    if (num == 0)
+        return 0;
+
+    if (num < 0)
+    {
+        change_sign++;
+        L_num = -num;
+    }
+    if (den < 0)
+    {
+        change_sign++;
+        L_den = -den;
+    }
+    while (k--)
+    {
+        div <<= 1;
+        L_num <<= 1;
+        if (L_num >= L_den)
+        {
+            L_num -= L_den;
+            div++;
+        }
+    }
+    if (change_sign == 1)
+    {
+        div = -div;
+    }
+    return div;
+}
+
+int32_t RTC_NO_SANITIZE("signed-integer-overflow")  // bugs.webrtc.org/5486
+WebRtcSpl_DivW32HiLow(int32_t num, int16_t den_hi, int16_t den_low)
+{
+    int16_t approx, tmp_hi, tmp_low, num_hi, num_low;
+    int32_t tmpW32;
+
+    approx = (int16_t)WebRtcSpl_DivW32W16((int32_t)0x1FFFFFFF, den_hi);
+    // result in Q14 (Note: 3FFFFFFF = 0.5 in Q30)
+
+    // tmpW32 = 1/den = approx * (2.0 - den * approx) (in Q30)
+    tmpW32 = (den_hi * approx << 1) + ((den_low * approx >> 15) << 1);
+    // tmpW32 = den * approx
+
+    tmpW32 = (int32_t)0x7fffffffL - tmpW32; // result in Q30 (tmpW32 = 2.0-(den*approx))
+    // UBSan: 2147483647 - -2 cannot be represented in type 'int'
+
+    // Store tmpW32 in hi and low format
+    tmp_hi = (int16_t)(tmpW32 >> 16);
+    tmp_low = (int16_t)((tmpW32 - ((int32_t)tmp_hi << 16)) >> 1);
+
+    // tmpW32 = 1/den in Q29
+    tmpW32 = (tmp_hi * approx + (tmp_low * approx >> 15)) << 1;
+
+    // 1/den in hi and low format
+    tmp_hi = (int16_t)(tmpW32 >> 16);
+    tmp_low = (int16_t)((tmpW32 - ((int32_t)tmp_hi << 16)) >> 1);
+
+    // Store num in hi and low format
+    num_hi = (int16_t)(num >> 16);
+    num_low = (int16_t)((num - ((int32_t)num_hi << 16)) >> 1);
+
+    // num * (1/den) by 32 bit multiplication (result in Q28)
+
+    tmpW32 = num_hi * tmp_hi + (num_hi * tmp_low >> 15) +
+        (num_low * tmp_hi >> 15);
+
+    // Put result in Q31 (convert from Q28)
+    tmpW32 = WEBRTC_SPL_LSHIFT_W32(tmpW32, 3);
+
+    return tmpW32;
+}
--- a/runtime/engine/asr/server/brpc/paraformerCPP/webrtc/common_audio/signal_processing/dot_product_with_scale.cc
+++ b/runtime/engine/asr/server/brpc/paraformerCPP/webrtc/common_audio/signal_processing/dot_product_with_scale.cc
@ -0,0 +1,34 @@
+/*
+ *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "webrtc/common_audio/signal_processing/dot_product_with_scale.h"
+
+#include "webrtc/rtc_base/numerics/safe_conversions.h"
+
+int32_t WebRtcSpl_DotProductWithScale(const int16_t* vector1,
+                                      const int16_t* vector2,
+                                      size_t length,
+                                      int scaling) {
+  int64_t sum = 0;
+  size_t i = 0;
+
+  /* Unroll the loop to improve performance. */
+  for (i = 0; i + 3 < length; i += 4) {
+    sum += (vector1[i + 0] * vector2[i + 0]) >> scaling;
+    sum += (vector1[i + 1] * vector2[i + 1]) >> scaling;
+    sum += (vector1[i + 2] * vector2[i + 2]) >> scaling;
+    sum += (vector1[i + 3] * vector2[i + 3]) >> scaling;
+  }
+  for (; i < length; i++) {
+    sum += (vector1[i] * vector2[i]) >> scaling;
+  }
+
+  return rtc::saturated_cast<int32_t>(sum);
+}
--- a/runtime/engine/asr/server/brpc/paraformerCPP/webrtc/common_audio/signal_processing/dot_product_with_scale.h
+++ b/runtime/engine/asr/server/brpc/paraformerCPP/webrtc/common_audio/signal_processing/dot_product_with_scale.h
@ -0,0 +1,40 @@
+/*
+ *  Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef COMMON_AUDIO_SIGNAL_PROCESSING_DOT_PRODUCT_WITH_SCALE_H_
+#define COMMON_AUDIO_SIGNAL_PROCESSING_DOT_PRODUCT_WITH_SCALE_H_
+
+#include <stdint.h>
+#include <string.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+// Calculates the dot product between two (int16_t) vectors.
+//
+// Input:
+//      - vector1       : Vector 1
+//      - vector2       : Vector 2
+//      - vector_length : Number of samples used in the dot product
+//      - scaling       : The number of right bit shifts to apply on each term
+//                        during calculation to avoid overflow, i.e., the
+//                        output will be in Q(-|scaling|)
+//
+// Return value         : The dot product in Q(-scaling)
+int32_t WebRtcSpl_DotProductWithScale(const int16_t* vector1,
+                                      const int16_t* vector2,
+                                      size_t length,
+                                      int scaling);
+
+#ifdef __cplusplus
+}
+#endif  // __cplusplus
+#endif  // COMMON_AUDIO_SIGNAL_PROCESSING_DOT_PRODUCT_WITH_SCALE_H_
--- a/runtime/engine/asr/server/brpc/paraformerCPP/webrtc/common_audio/signal_processing/downsample_fast.c
+++ b/runtime/engine/asr/server/brpc/paraformerCPP/webrtc/common_audio/signal_processing/downsample_fast.c
@ -0,0 +1,65 @@
+/*
+ *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
+
+#include "webrtc/rtc_base/checks.h"
+#include "webrtc/rtc_base/sanitizer.h"
+
+// TODO(Bjornv): Change the function parameter order to WebRTC code style.
+// C version of WebRtcSpl_DownsampleFast() for generic platforms.
+int WebRtcSpl_DownsampleFastC(const int16_t* data_in,
+                              size_t data_in_length,
+                              int16_t* data_out,
+                              size_t data_out_length,
+                              const int16_t* __restrict coefficients,
+                              size_t coefficients_length,
+                              int factor,
+                              size_t delay) {
+  int16_t* const original_data_out = data_out;
+  size_t i = 0;
+  size_t j = 0;
+  int32_t out_s32 = 0;
+  size_t endpos = delay + factor * (data_out_length - 1) + 1;
+
+  // Return error if any of the running conditions doesn't meet.
+  if (data_out_length == 0 || coefficients_length == 0
+                           || data_in_length < endpos) {
+    return -1;
+  }
+
+  rtc_MsanCheckInitialized(coefficients, sizeof(coefficients[0]),
+                           coefficients_length);
+
+  for (i = delay; i < endpos; i += factor) {
+    out_s32 = 2048;  // Round value, 0.5 in Q12.
+
+    for (j = 0; j < coefficients_length; j++) {
+      // Negative overflow is permitted here, because this is
+      // auto-regressive filters, and the state for each batch run is
+      // stored in the "negative" positions of the output vector.
+      rtc_MsanCheckInitialized(&data_in[(ptrdiff_t) i - (ptrdiff_t) j],
+          sizeof(data_in[0]), 1);
+      // out_s32 is in Q12 domain.
+      out_s32 += coefficients[j] * data_in[(ptrdiff_t) i - (ptrdiff_t) j];
+    }
+
+    out_s32 >>= 12;  // Q0.
+
+    // Saturate and store the output.
+    *data_out++ = WebRtcSpl_SatW32ToW16(out_s32);
+  }
+
+  RTC_DCHECK_EQ(original_data_out + data_out_length, data_out);
+  rtc_MsanCheckInitialized(original_data_out, sizeof(original_data_out[0]),
+                           data_out_length);
+
+  return 0;
+}
--- a/runtime/engine/asr/server/brpc/paraformerCPP/webrtc/common_audio/signal_processing/energy.c
+++ b/runtime/engine/asr/server/brpc/paraformerCPP/webrtc/common_audio/signal_processing/energy.c
@ -0,0 +1,39 @@
+/*
+ *  Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+/*
+ * This file contains the function WebRtcSpl_Energy().
+ * The description header can be found in signal_processing_library.h
+ *
+ */
+
+#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
+
+int32_t WebRtcSpl_Energy(int16_t* vector,
+                         size_t vector_length,
+                         int* scale_factor)
+{
+    int32_t en = 0;
+    size_t i;
+    int scaling =
+        WebRtcSpl_GetScalingSquare(vector, vector_length, vector_length);
+    size_t looptimes = vector_length;
+    int16_t *vectorptr = vector;
+
+    for (i = 0; i < looptimes; i++)
+    {
+      en += (*vectorptr * *vectorptr) >> scaling;
+      vectorptr++;
+    }
+    *scale_factor = scaling;
+
+    return en;
+}
--- a/runtime/engine/asr/server/brpc/paraformerCPP/webrtc/common_audio/signal_processing/get_scaling_square.c
+++ b/runtime/engine/asr/server/brpc/paraformerCPP/webrtc/common_audio/signal_processing/get_scaling_square.c
@ -0,0 +1,46 @@
+/*
+ *  Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+/*
+ * This file contains the function WebRtcSpl_GetScalingSquare().
+ * The description header can be found in signal_processing_library.h
+ *
+ */
+
+#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
+
+int16_t WebRtcSpl_GetScalingSquare(int16_t* in_vector,
+                                   size_t in_vector_length,
+                                   size_t times)
+{
+    int16_t nbits = WebRtcSpl_GetSizeInBits((uint32_t)times);
+    size_t i;
+    int16_t smax = -1;
+    int16_t sabs;
+    int16_t *sptr = in_vector;
+    int16_t t;
+    size_t looptimes = in_vector_length;
+
+    for (i = looptimes; i > 0; i--)
+    {
+        sabs = (*sptr > 0 ? *sptr++ : -*sptr++);
+        smax = (sabs > smax ? sabs : smax);
+    }
+    t = WebRtcSpl_NormW32(WEBRTC_SPL_MUL(smax, smax));
+
+    if (smax == 0)
+    {
+        return 0; // Since norm(0) returns 0
+    } else
+    {
+        return (t > nbits) ? 0 : nbits - t;
+    }
+}
--- a/runtime/engine/asr/server/brpc/paraformerCPP/webrtc/common_audio/signal_processing/include/real_fft.h
+++ b/runtime/engine/asr/server/brpc/paraformerCPP/webrtc/common_audio/signal_processing/include/real_fft.h
@ -0,0 +1,96 @@
+/*
+ *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef COMMON_AUDIO_SIGNAL_PROCESSING_INCLUDE_REAL_FFT_H_
+#define COMMON_AUDIO_SIGNAL_PROCESSING_INCLUDE_REAL_FFT_H_
+
+#include <stdint.h>
+
+// For ComplexFFT(), the maximum fft order is 10;
+// WebRTC APM uses orders of only 7 and 8.
+enum { kMaxFFTOrder = 10 };
+
+struct RealFFT;
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct RealFFT* WebRtcSpl_CreateRealFFT(int order);
+void WebRtcSpl_FreeRealFFT(struct RealFFT* self);
+
+// Compute an FFT for a real-valued signal of length of 2^order,
+// where 1 < order <= MAX_FFT_ORDER. Transform length is determined by the
+// specification structure, which must be initialized prior to calling the FFT
+// function with WebRtcSpl_CreateRealFFT().
+// The relationship between the input and output sequences can
+// be expressed in terms of the DFT, i.e.:
+//     x[n] = (2^(-scalefactor)/N)  . SUM[k=0,...,N-1] X[k].e^(jnk.2.pi/N)
+//     n=0,1,2,...N-1
+//     N=2^order.
+// The conjugate-symmetric output sequence is represented using a CCS vector,
+// which is of length N+2, and is organized as follows:
+//     Index:      0  1  2  3  4  5   . . .   N-2       N-1       N       N+1
+//     Component:  R0 0  R1 I1 R2 I2  . . .   R[N/2-1]  I[N/2-1]  R[N/2]  0
+// where R[n] and I[n], respectively, denote the real and imaginary components
+// for FFT bin 'n'. Bins  are numbered from 0 to N/2, where N is the FFT length.
+// Bin index 0 corresponds to the DC component, and bin index N/2 corresponds to
+// the foldover frequency.
+//
+// Input Arguments:
+//   self - pointer to preallocated and initialized FFT specification structure.
+//   real_data_in - the input signal. For an ARM Neon platform, it must be
+//                  aligned on a 32-byte boundary.
+//
+// Output Arguments:
+//   complex_data_out - the output complex signal with (2^order + 2) 16-bit
+//                      elements. For an ARM Neon platform, it must be different
+//                      from real_data_in, and aligned on a 32-byte boundary.
+//
+// Return Value:
+//   0  - FFT calculation is successful.
+//   -1 - Error with bad arguments (null pointers).
+int WebRtcSpl_RealForwardFFT(struct RealFFT* self,
+                             const int16_t* real_data_in,
+                             int16_t* complex_data_out);
+
+// Compute the inverse FFT for a conjugate-symmetric input sequence of length of
+// 2^order, where 1 < order <= MAX_FFT_ORDER. Transform length is determined by
+// the specification structure, which must be initialized prior to calling the
+// FFT function with WebRtcSpl_CreateRealFFT().
+// For a transform of length M, the input sequence is represented using a packed
+// CCS vector of length M+2, which is explained in the comments for
+// WebRtcSpl_RealForwardFFTC above.
+//
+// Input Arguments:
+//   self - pointer to preallocated and initialized FFT specification structure.
+//   complex_data_in - the input complex signal with (2^order + 2) 16-bit
+//                     elements. For an ARM Neon platform, it must be aligned on
+//                     a 32-byte boundary.
+//
+// Output Arguments:
+//   real_data_out - the output real signal. For an ARM Neon platform, it must
+//                   be different to complex_data_in, and aligned on a 32-byte
+//                   boundary.
+//
+// Return Value:
+//   0 or a positive number - a value that the elements in the |real_data_out|
+//                            should be shifted left with in order to get
+//                            correct physical values.
+//   -1 - Error with bad arguments (null pointers).
+int WebRtcSpl_RealInverseFFT(struct RealFFT* self,
+                             const int16_t* complex_data_in,
+                             int16_t* real_data_out);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif  // COMMON_AUDIO_SIGNAL_PROCESSING_INCLUDE_REAL_FFT_H_
--- a/runtime/engine/asr/server/brpc/paraformerCPP/webrtc/common_audio/signal_processing/include/signal_processing_library.h
+++ b/runtime/engine/asr/server/brpc/paraformerCPP/webrtc/common_audio/signal_processing/include/signal_processing_library.h
--- a/runtime/engine/asr/server/brpc/paraformerCPP/webrtc/common_audio/signal_processing/include/spl_inl.h
+++ b/runtime/engine/asr/server/brpc/paraformerCPP/webrtc/common_audio/signal_processing/include/spl_inl.h
@ -0,0 +1,153 @@
+/*
+ *  Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+// This header file includes the inline functions in
+// the fix point signal processing library.
+
+#ifndef COMMON_AUDIO_SIGNAL_PROCESSING_INCLUDE_SPL_INL_H_
+#define COMMON_AUDIO_SIGNAL_PROCESSING_INCLUDE_SPL_INL_H_
+
+#include "webrtc/rtc_base/compile_assert_c.h"
+
+extern const int8_t kWebRtcSpl_CountLeadingZeros32_Table[64];
+
+// Don't call this directly except in tests!
+static __inline int WebRtcSpl_CountLeadingZeros32_NotBuiltin(uint32_t n) {
+  // Normalize n by rounding up to the nearest number that is a sequence of 0
+  // bits followed by a sequence of 1 bits. This number has the same number of
+  // leading zeros as the original n. There are exactly 33 such values.
+  n |= n >> 1;
+  n |= n >> 2;
+  n |= n >> 4;
+  n |= n >> 8;
+  n |= n >> 16;
+
+  // Multiply the modified n with a constant selected (by exhaustive search)
+  // such that each of the 33 possible values of n give a product whose 6 most
+  // significant bits are unique. Then look up the answer in the table.
+  return kWebRtcSpl_CountLeadingZeros32_Table[(n * 0x8c0b2891) >> 26];
+}
+
+// Don't call this directly except in tests!
+static __inline int WebRtcSpl_CountLeadingZeros64_NotBuiltin(uint64_t n) {
+  const int leading_zeros = n >> 32 == 0 ? 32 : 0;
+  return leading_zeros + WebRtcSpl_CountLeadingZeros32_NotBuiltin(
+                             (uint32_t)(n >> (32 - leading_zeros)));
+}
+
+// Returns the number of leading zero bits in the argument.
+static __inline int WebRtcSpl_CountLeadingZeros32(uint32_t n) {
+#ifdef __GNUC__
+  RTC_COMPILE_ASSERT(sizeof(unsigned int) == sizeof(uint32_t));
+  return n == 0 ? 32 : __builtin_clz(n);
+#else
+  return WebRtcSpl_CountLeadingZeros32_NotBuiltin(n);
+#endif
+}
+
+// Returns the number of leading zero bits in the argument.
+static __inline int WebRtcSpl_CountLeadingZeros64(uint64_t n) {
+#ifdef __GNUC__
+  RTC_COMPILE_ASSERT(sizeof(unsigned long long) == sizeof(uint64_t));  // NOLINT
+  return n == 0 ? 64 : __builtin_clzll(n);
+#else
+  return WebRtcSpl_CountLeadingZeros64_NotBuiltin(n);
+#endif
+}
+
+#ifdef WEBRTC_ARCH_ARM_V7
+#include "webrtc/common_audio/signal_processing/include/spl_inl_armv7.h"
+#else
+
+#if defined(MIPS32_LE)
+#include "webrtc/common_audio/signal_processing/include/spl_inl_mips.h"
+#endif
+
+#if !defined(MIPS_DSP_R1_LE)
+static __inline int16_t WebRtcSpl_SatW32ToW16(int32_t value32) {
+  int16_t out16 = (int16_t)value32;
+
+  if (value32 > 32767)
+    out16 = 32767;
+  else if (value32 < -32768)
+    out16 = -32768;
+
+  return out16;
+}
+
+static __inline int32_t WebRtcSpl_AddSatW32(int32_t a, int32_t b) {
+  // Do the addition in unsigned numbers, since signed overflow is undefined
+  // behavior.
+  const int32_t sum = (int32_t)((uint32_t)a + (uint32_t)b);
+
+  // a + b can't overflow if a and b have different signs. If they have the
+  // same sign, a + b also has the same sign iff it didn't overflow.
+  if ((a < 0) == (b < 0) && (a < 0) != (sum < 0)) {
+    // The direction of the overflow is obvious from the sign of a + b.
+    return sum < 0 ? INT32_MAX : INT32_MIN;
+  }
+  return sum;
+}
+
+static __inline int32_t WebRtcSpl_SubSatW32(int32_t a, int32_t b) {
+  // Do the subtraction in unsigned numbers, since signed overflow is undefined
+  // behavior.
+  const int32_t diff = (int32_t)((uint32_t)a - (uint32_t)b);
+
+  // a - b can't overflow if a and b have the same sign. If they have different
+  // signs, a - b has the same sign as a iff it didn't overflow.
+  if ((a < 0) != (b < 0) && (a < 0) != (diff < 0)) {
+    // The direction of the overflow is obvious from the sign of a - b.
+    return diff < 0 ? INT32_MAX : INT32_MIN;
+  }
+  return diff;
+}
+
+static __inline int16_t WebRtcSpl_AddSatW16(int16_t a, int16_t b) {
+  return WebRtcSpl_SatW32ToW16((int32_t)a + (int32_t)b);
+}
+
+static __inline int16_t WebRtcSpl_SubSatW16(int16_t var1, int16_t var2) {
+  return WebRtcSpl_SatW32ToW16((int32_t)var1 - (int32_t)var2);
+}
+#endif  // #if !defined(MIPS_DSP_R1_LE)
+
+#if !defined(MIPS32_LE)
+static __inline int16_t WebRtcSpl_GetSizeInBits(uint32_t n) {
+  return 32 - WebRtcSpl_CountLeadingZeros32(n);
+}
+
+// Return the number of steps a can be left-shifted without overflow,
+// or 0 if a == 0.
+static __inline int16_t WebRtcSpl_NormW32(int32_t a) {
+  return a == 0 ? 0 : WebRtcSpl_CountLeadingZeros32(a < 0 ? ~a : a) - 1;
+}
+
+// Return the number of steps a can be left-shifted without overflow,
+// or 0 if a == 0.
+static __inline int16_t WebRtcSpl_NormU32(uint32_t a) {
+  return a == 0 ? 0 : WebRtcSpl_CountLeadingZeros32(a);
+}
+
+// Return the number of steps a can be left-shifted without overflow,
+// or 0 if a == 0.
+static __inline int16_t WebRtcSpl_NormW16(int16_t a) {
+  const int32_t a32 = a;
+  return a == 0 ? 0 : WebRtcSpl_CountLeadingZeros32(a < 0 ? ~a32 : a32) - 17;
+}
+
+static __inline int32_t WebRtc_MulAccumW16(int16_t a, int16_t b, int32_t c) {
+  return (a * b + c);
+}
+#endif  // #if !defined(MIPS32_LE)
+
+#endif  // WEBRTC_ARCH_ARM_V7
+
+#endif  // COMMON_AUDIO_SIGNAL_PROCESSING_INCLUDE_SPL_INL_H_
--- a/runtime/engine/asr/server/brpc/paraformerCPP/webrtc/common_audio/signal_processing/min_max_operations.c
+++ b/runtime/engine/asr/server/brpc/paraformerCPP/webrtc/common_audio/signal_processing/min_max_operations.c
@ -0,0 +1,224 @@
+/*
+ *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+/*
+ * This file contains the implementation of functions
+ * WebRtcSpl_MaxAbsValueW16C()
+ * WebRtcSpl_MaxAbsValueW32C()
+ * WebRtcSpl_MaxValueW16C()
+ * WebRtcSpl_MaxValueW32C()
+ * WebRtcSpl_MinValueW16C()
+ * WebRtcSpl_MinValueW32C()
+ * WebRtcSpl_MaxAbsIndexW16()
+ * WebRtcSpl_MaxIndexW16()
+ * WebRtcSpl_MaxIndexW32()
+ * WebRtcSpl_MinIndexW16()
+ * WebRtcSpl_MinIndexW32()
+ *
+ */
+
+#include <stdlib.h>
+
+#include "webrtc/rtc_base/checks.h"
+#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
+
+// TODO(bjorn/kma): Consolidate function pairs (e.g. combine
+//   WebRtcSpl_MaxAbsValueW16C and WebRtcSpl_MaxAbsIndexW16 into a single one.)
+// TODO(kma): Move the next six functions into min_max_operations_c.c.
+
+// Maximum absolute value of word16 vector. C version for generic platforms.
+int16_t WebRtcSpl_MaxAbsValueW16C(const int16_t* vector, size_t length) {
+  size_t i = 0;
+  int absolute = 0, maximum = 0;
+
+  RTC_DCHECK_GT(length, 0);
+
+  for (i = 0; i < length; i++) {
+    absolute = abs((int)vector[i]);
+
+    if (absolute > maximum) {
+      maximum = absolute;
+    }
+  }
+
+  // Guard the case for abs(-32768).
+  if (maximum > WEBRTC_SPL_WORD16_MAX) {
+    maximum = WEBRTC_SPL_WORD16_MAX;
+  }
+
+  return (int16_t)maximum;
+}
+
+// Maximum absolute value of word32 vector. C version for generic platforms.
+int32_t WebRtcSpl_MaxAbsValueW32C(const int32_t* vector, size_t length) {
+  // Use uint32_t for the local variables, to accommodate the return value
+  // of abs(0x80000000), which is 0x80000000.
+
+  uint32_t absolute = 0, maximum = 0;
+  size_t i = 0;
+
+  RTC_DCHECK_GT(length, 0);
+
+  for (i = 0; i < length; i++) {
+    absolute = abs((int)vector[i]);
+    if (absolute > maximum) {
+      maximum = absolute;
+    }
+  }
+
+  maximum = WEBRTC_SPL_MIN(maximum, WEBRTC_SPL_WORD32_MAX);
+
+  return (int32_t)maximum;
+}
+
+// Maximum value of word16 vector. C version for generic platforms.
+int16_t WebRtcSpl_MaxValueW16C(const int16_t* vector, size_t length) {
+  int16_t maximum = WEBRTC_SPL_WORD16_MIN;
+  size_t i = 0;
+
+  RTC_DCHECK_GT(length, 0);
+
+  for (i = 0; i < length; i++) {
+    if (vector[i] > maximum)
+      maximum = vector[i];
+  }
+  return maximum;
+}
+
+// Maximum value of word32 vector. C version for generic platforms.
+int32_t WebRtcSpl_MaxValueW32C(const int32_t* vector, size_t length) {
+  int32_t maximum = WEBRTC_SPL_WORD32_MIN;
+  size_t i = 0;
+
+  RTC_DCHECK_GT(length, 0);
+
+  for (i = 0; i < length; i++) {
+    if (vector[i] > maximum)
+      maximum = vector[i];
+  }
+  return maximum;
+}
+
+// Minimum value of word16 vector. C version for generic platforms.
+int16_t WebRtcSpl_MinValueW16C(const int16_t* vector, size_t length) {
+  int16_t minimum = WEBRTC_SPL_WORD16_MAX;
+  size_t i = 0;
+
+  RTC_DCHECK_GT(length, 0);
+
+  for (i = 0; i < length; i++) {
+    if (vector[i] < minimum)
+      minimum = vector[i];
+  }
+  return minimum;
+}
+
+// Minimum value of word32 vector. C version for generic platforms.
+int32_t WebRtcSpl_MinValueW32C(const int32_t* vector, size_t length) {
+  int32_t minimum = WEBRTC_SPL_WORD32_MAX;
+  size_t i = 0;
+
+  RTC_DCHECK_GT(length, 0);
+
+  for (i = 0; i < length; i++) {
+    if (vector[i] < minimum)
+      minimum = vector[i];
+  }
+  return minimum;
+}
+
+// Index of maximum absolute value in a word16 vector.
+size_t WebRtcSpl_MaxAbsIndexW16(const int16_t* vector, size_t length) {
+  // Use type int for local variables, to accomodate the value of abs(-32768).
+
+  size_t i = 0, index = 0;
+  int absolute = 0, maximum = 0;
+
+  RTC_DCHECK_GT(length, 0);
+
+  for (i = 0; i < length; i++) {
+    absolute = abs((int)vector[i]);
+
+    if (absolute > maximum) {
+      maximum = absolute;
+      index = i;
+    }
+  }
+
+  return index;
+}
+
+// Index of maximum value in a word16 vector.
+size_t WebRtcSpl_MaxIndexW16(const int16_t* vector, size_t length) {
+  size_t i = 0, index = 0;
+  int16_t maximum = WEBRTC_SPL_WORD16_MIN;
+
+  RTC_DCHECK_GT(length, 0);
+
+  for (i = 0; i < length; i++) {
+    if (vector[i] > maximum) {
+      maximum = vector[i];
+      index = i;
+    }
+  }
+
+  return index;
+}
+
+// Index of maximum value in a word32 vector.
+size_t WebRtcSpl_MaxIndexW32(const int32_t* vector, size_t length) {
+  size_t i = 0, index = 0;
+  int32_t maximum = WEBRTC_SPL_WORD32_MIN;
+
+  RTC_DCHECK_GT(length, 0);
+
+  for (i = 0; i < length; i++) {
+    if (vector[i] > maximum) {
+      maximum = vector[i];
+      index = i;
+    }
+  }
+
+  return index;
+}
+
+// Index of minimum value in a word16 vector.
+size_t WebRtcSpl_MinIndexW16(const int16_t* vector, size_t length) {
+  size_t i = 0, index = 0;
+  int16_t minimum = WEBRTC_SPL_WORD16_MAX;
+
+  RTC_DCHECK_GT(length, 0);
+
+  for (i = 0; i < length; i++) {
+    if (vector[i] < minimum) {
+      minimum = vector[i];
+      index = i;
+    }
+  }
+
+  return index;
+}
+
+// Index of minimum value in a word32 vector.
+size_t WebRtcSpl_MinIndexW32(const int32_t* vector, size_t length) {
+  size_t i = 0, index = 0;
+  int32_t minimum = WEBRTC_SPL_WORD32_MAX;
+
+  RTC_DCHECK_GT(length, 0);
+
+  for (i = 0; i < length; i++) {
+    if (vector[i] < minimum) {
+      minimum = vector[i];
+      index = i;
+    }
+  }
+
+  return index;
+}
--- a/runtime/engine/asr/server/brpc/paraformerCPP/webrtc/common_audio/signal_processing/resample_48khz.c
+++ b/runtime/engine/asr/server/brpc/paraformerCPP/webrtc/common_audio/signal_processing/resample_48khz.c
@ -0,0 +1,186 @@
+/*
+ *  Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+/*
+ * This file contains resampling functions between 48 kHz and nb/wb.
+ * The description header can be found in signal_processing_library.h
+ *
+ */
+
+#include <string.h>
+#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
+#include "webrtc/common_audio/signal_processing/resample_by_2_internal.h"
+
+////////////////////////////
+///// 48 kHz -> 16 kHz /////
+////////////////////////////
+
+// 48 -> 16 resampler
+void WebRtcSpl_Resample48khzTo16khz(const int16_t* in, int16_t* out,
+                                    WebRtcSpl_State48khzTo16khz* state, int32_t* tmpmem)
+{
+    ///// 48 --> 48(LP) /////
+    // int16_t  in[480]
+    // int32_t out[480]
+    /////
+    WebRtcSpl_LPBy2ShortToInt(in, 480, tmpmem + 16, state->S_48_48);
+
+    ///// 48 --> 32 /////
+    // int32_t  in[480]
+    // int32_t out[320]
+    /////
+    // copy state to and from input array
+    memcpy(tmpmem + 8, state->S_48_32, 8 * sizeof(int32_t));
+    memcpy(state->S_48_32, tmpmem + 488, 8 * sizeof(int32_t));
+    WebRtcSpl_Resample48khzTo32khz(tmpmem + 8, tmpmem, 160);
+
+    ///// 32 --> 16 /////
+    // int32_t  in[320]
+    // int16_t out[160]
+    /////
+    WebRtcSpl_DownBy2IntToShort(tmpmem, 320, out, state->S_32_16);
+}
+
+// initialize state of 48 -> 16 resampler
+void WebRtcSpl_ResetResample48khzTo16khz(WebRtcSpl_State48khzTo16khz* state)
+{
+    memset(state->S_48_48, 0, 16 * sizeof(int32_t));
+    memset(state->S_48_32, 0, 8 * sizeof(int32_t));
+    memset(state->S_32_16, 0, 8 * sizeof(int32_t));
+}
+
+////////////////////////////
+///// 16 kHz -> 48 kHz /////
+////////////////////////////
+
+// 16 -> 48 resampler
+void WebRtcSpl_Resample16khzTo48khz(const int16_t* in, int16_t* out,
+                                    WebRtcSpl_State16khzTo48khz* state, int32_t* tmpmem)
+{
+    ///// 16 --> 32 /////
+    // int16_t  in[160]
+    // int32_t out[320]
+    /////
+    WebRtcSpl_UpBy2ShortToInt(in, 160, tmpmem + 16, state->S_16_32);
+
+    ///// 32 --> 24 /////
+    // int32_t  in[320]
+    // int32_t out[240]
+    // copy state to and from input array
+    /////
+    memcpy(tmpmem + 8, state->S_32_24, 8 * sizeof(int32_t));
+    memcpy(state->S_32_24, tmpmem + 328, 8 * sizeof(int32_t));
+    WebRtcSpl_Resample32khzTo24khz(tmpmem + 8, tmpmem, 80);
+
+    ///// 24 --> 48 /////
+    // int32_t  in[240]
+    // int16_t out[480]
+    /////
+    WebRtcSpl_UpBy2IntToShort(tmpmem, 240, out, state->S_24_48);
+}
+
+// initialize state of 16 -> 48 resampler
+void WebRtcSpl_ResetResample16khzTo48khz(WebRtcSpl_State16khzTo48khz* state)
+{
+    memset(state->S_16_32, 0, 8 * sizeof(int32_t));
+    memset(state->S_32_24, 0, 8 * sizeof(int32_t));
+    memset(state->S_24_48, 0, 8 * sizeof(int32_t));
+}
+
+////////////////////////////
+///// 48 kHz ->  8 kHz /////
+////////////////////////////
+
+// 48 -> 8 resampler
+void WebRtcSpl_Resample48khzTo8khz(const int16_t* in, int16_t* out,
+                                   WebRtcSpl_State48khzTo8khz* state, int32_t* tmpmem)
+{
+    ///// 48 --> 24 /////
+    // int16_t  in[480]
+    // int32_t out[240]
+    /////
+    WebRtcSpl_DownBy2ShortToInt(in, 480, tmpmem + 256, state->S_48_24);
+
+    ///// 24 --> 24(LP) /////
+    // int32_t  in[240]
+    // int32_t out[240]
+    /////
+    WebRtcSpl_LPBy2IntToInt(tmpmem + 256, 240, tmpmem + 16, state->S_24_24);
+
+    ///// 24 --> 16 /////
+    // int32_t  in[240]
+    // int32_t out[160]
+    /////
+    // copy state to and from input array
+    memcpy(tmpmem + 8, state->S_24_16, 8 * sizeof(int32_t));
+    memcpy(state->S_24_16, tmpmem + 248, 8 * sizeof(int32_t));
+    WebRtcSpl_Resample48khzTo32khz(tmpmem + 8, tmpmem, 80);
+
+    ///// 16 --> 8 /////
+    // int32_t  in[160]
+    // int16_t out[80]
+    /////
+    WebRtcSpl_DownBy2IntToShort(tmpmem, 160, out, state->S_16_8);
+}
+
+// initialize state of 48 -> 8 resampler
+void WebRtcSpl_ResetResample48khzTo8khz(WebRtcSpl_State48khzTo8khz* state)
+{
+    memset(state->S_48_24, 0, 8 * sizeof(int32_t));
+    memset(state->S_24_24, 0, 16 * sizeof(int32_t));
+    memset(state->S_24_16, 0, 8 * sizeof(int32_t));
+    memset(state->S_16_8, 0, 8 * sizeof(int32_t));
+}
+
+////////////////////////////
+/////  8 kHz -> 48 kHz /////
+////////////////////////////
+
+// 8 -> 48 resampler
+void WebRtcSpl_Resample8khzTo48khz(const int16_t* in, int16_t* out,
+                                   WebRtcSpl_State8khzTo48khz* state, int32_t* tmpmem)
+{
+    ///// 8 --> 16 /////
+    // int16_t  in[80]
+    // int32_t out[160]
+    /////
+    WebRtcSpl_UpBy2ShortToInt(in, 80, tmpmem + 264, state->S_8_16);
+
+    ///// 16 --> 12 /////
+    // int32_t  in[160]
+    // int32_t out[120]
+    /////
+    // copy state to and from input array
+    memcpy(tmpmem + 256, state->S_16_12, 8 * sizeof(int32_t));
+    memcpy(state->S_16_12, tmpmem + 416, 8 * sizeof(int32_t));
+    WebRtcSpl_Resample32khzTo24khz(tmpmem + 256, tmpmem + 240, 40);
+
+    ///// 12 --> 24 /////
+    // int32_t  in[120]
+    // int16_t out[240]
+    /////
+    WebRtcSpl_UpBy2IntToInt(tmpmem + 240, 120, tmpmem, state->S_12_24);
+
+    ///// 24 --> 48 /////
+    // int32_t  in[240]
+    // int16_t out[480]
+    /////
+    WebRtcSpl_UpBy2IntToShort(tmpmem, 240, out, state->S_24_48);
+}
+
+// initialize state of 8 -> 48 resampler
+void WebRtcSpl_ResetResample8khzTo48khz(WebRtcSpl_State8khzTo48khz* state)
+{
+    memset(state->S_8_16, 0, 8 * sizeof(int32_t));
+    memset(state->S_16_12, 0, 8 * sizeof(int32_t));
+    memset(state->S_12_24, 0, 8 * sizeof(int32_t));
+    memset(state->S_24_48, 0, 8 * sizeof(int32_t));
+}
--- a/runtime/engine/asr/server/brpc/paraformerCPP/webrtc/common_audio/signal_processing/resample_by_2_internal.c
+++ b/runtime/engine/asr/server/brpc/paraformerCPP/webrtc/common_audio/signal_processing/resample_by_2_internal.c
@ -0,0 +1,689 @@
+/*
+ *  Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+/*
+ * This header file contains some internal resampling functions.
+ *
+ */
+
+#include "webrtc/common_audio/signal_processing/resample_by_2_internal.h"
+#include "webrtc/rtc_base/sanitizer.h"
+
+// allpass filter coefficients.
+static const int16_t kResampleAllpass[2][3] = {
+        {821, 6110, 12382},
+        {3050, 9368, 15063}
+};
+
+//
+//   decimator
+// input:  int32_t (shifted 15 positions to the left, + offset 16384) OVERWRITTEN!
+// output: int16_t (saturated) (of length len/2)
+// state:  filter state array; length = 8
+
+void RTC_NO_SANITIZE("signed-integer-overflow")  // bugs.webrtc.org/5486
+WebRtcSpl_DownBy2IntToShort(int32_t *in, int32_t len, int16_t *out,
+                            int32_t *state)
+{
+    int32_t tmp0, tmp1, diff;
+    int32_t i;
+
+    len >>= 1;
+
+    // lower allpass filter (operates on even input samples)
+    for (i = 0; i < len; i++)
+    {
+        tmp0 = in[i << 1];
+        diff = tmp0 - state[1];
+        // UBSan: -1771017321 - 999586185 cannot be represented in type 'int'
+
+        // scale down and round
+        diff = (diff + (1 << 13)) >> 14;
+        tmp1 = state[0] + diff * kResampleAllpass[1][0];
+        state[0] = tmp0;
+        diff = tmp1 - state[2];
+        // scale down and truncate
+        diff = diff >> 14;
+        if (diff < 0)
+            diff += 1;
+        tmp0 = state[1] + diff * kResampleAllpass[1][1];
+        state[1] = tmp1;
+        diff = tmp0 - state[3];
+        // scale down and truncate
+        diff = diff >> 14;
+        if (diff < 0)
+            diff += 1;
+        state[3] = state[2] + diff * kResampleAllpass[1][2];
+        state[2] = tmp0;
+
+        // divide by two and store temporarily
+        in[i << 1] = (state[3] >> 1);
+    }
+
+    in++;
+
+    // upper allpass filter (operates on odd input samples)
+    for (i = 0; i < len; i++)
+    {
+        tmp0 = in[i << 1];
+        diff = tmp0 - state[5];
+        // scale down and round
+        diff = (diff + (1 << 13)) >> 14;
+        tmp1 = state[4] + diff * kResampleAllpass[0][0];
+        state[4] = tmp0;
+        diff = tmp1 - state[6];
+        // scale down and round
+        diff = diff >> 14;
+        if (diff < 0)
+            diff += 1;
+        tmp0 = state[5] + diff * kResampleAllpass[0][1];
+        state[5] = tmp1;
+        diff = tmp0 - state[7];
+        // scale down and truncate
+        diff = diff >> 14;
+        if (diff < 0)
+            diff += 1;
+        state[7] = state[6] + diff * kResampleAllpass[0][2];
+        state[6] = tmp0;
+
+        // divide by two and store temporarily
+        in[i << 1] = (state[7] >> 1);
+    }
+
+    in--;
+
+    // combine allpass outputs
+    for (i = 0; i < len; i += 2)
+    {
+        // divide by two, add both allpass outputs and round
+        tmp0 = (in[i << 1] + in[(i << 1) + 1]) >> 15;
+        tmp1 = (in[(i << 1) + 2] + in[(i << 1) + 3]) >> 15;
+        if (tmp0 > (int32_t)0x00007FFF)
+            tmp0 = 0x00007FFF;
+        if (tmp0 < (int32_t)0xFFFF8000)
+            tmp0 = 0xFFFF8000;
+        out[i] = (int16_t)tmp0;
+        if (tmp1 > (int32_t)0x00007FFF)
+            tmp1 = 0x00007FFF;
+        if (tmp1 < (int32_t)0xFFFF8000)
+            tmp1 = 0xFFFF8000;
+        out[i + 1] = (int16_t)tmp1;
+    }
+}
+
+//
+//   decimator
+// input:  int16_t
+// output: int32_t (shifted 15 positions to the left, + offset 16384) (of length len/2)
+// state:  filter state array; length = 8
+
+void RTC_NO_SANITIZE("signed-integer-overflow")  // bugs.webrtc.org/5486
+WebRtcSpl_DownBy2ShortToInt(const int16_t *in,
+                            int32_t len,
+                            int32_t *out,
+                            int32_t *state)
+{
+    int32_t tmp0, tmp1, diff;
+    int32_t i;
+
+    len >>= 1;
+
+    // lower allpass filter (operates on even input samples)
+    for (i = 0; i < len; i++)
+    {
+        tmp0 = ((int32_t)in[i << 1] << 15) + (1 << 14);
+        diff = tmp0 - state[1];
+        // scale down and round
+        diff = (diff + (1 << 13)) >> 14;
+        tmp1 = state[0] + diff * kResampleAllpass[1][0];
+        state[0] = tmp0;
+        diff = tmp1 - state[2];
+        // UBSan: -1379909682 - 834099714 cannot be represented in type 'int'
+
+        // scale down and truncate
+        diff = diff >> 14;
+        if (diff < 0)
+            diff += 1;
+        tmp0 = state[1] + diff * kResampleAllpass[1][1];
+        state[1] = tmp1;
+        diff = tmp0 - state[3];
+        // scale down and truncate
+        diff = diff >> 14;
+        if (diff < 0)
+            diff += 1;
+        state[3] = state[2] + diff * kResampleAllpass[1][2];
+        state[2] = tmp0;
+
+        // divide by two and store temporarily
+        out[i] = (state[3] >> 1);
+    }
+
+    in++;
+
+    // upper allpass filter (operates on odd input samples)
+    for (i = 0; i < len; i++)
+    {
+        tmp0 = ((int32_t)in[i << 1] << 15) + (1 << 14);
+        diff = tmp0 - state[5];
+        // scale down and round
+        diff = (diff + (1 << 13)) >> 14;
+        tmp1 = state[4] + diff * kResampleAllpass[0][0];
+        state[4] = tmp0;
+        diff = tmp1 - state[6];
+        // scale down and round
+        diff = diff >> 14;
+        if (diff < 0)
+            diff += 1;
+        tmp0 = state[5] + diff * kResampleAllpass[0][1];
+        state[5] = tmp1;
+        diff = tmp0 - state[7];
+        // scale down and truncate
+        diff = diff >> 14;
+        if (diff < 0)
+            diff += 1;
+        state[7] = state[6] + diff * kResampleAllpass[0][2];
+        state[6] = tmp0;
+
+        // divide by two and store temporarily
+        out[i] += (state[7] >> 1);
+    }
+
+    in--;
+}
+
+//
+//   interpolator
+// input:  int16_t
+// output: int32_t (normalized, not saturated) (of length len*2)
+// state:  filter state array; length = 8
+void WebRtcSpl_UpBy2ShortToInt(const int16_t *in, int32_t len, int32_t *out,
+                               int32_t *state)
+{
+    int32_t tmp0, tmp1, diff;
+    int32_t i;
+
+    // upper allpass filter (generates odd output samples)
+    for (i = 0; i < len; i++)
+    {
+        tmp0 = ((int32_t)in[i] << 15) + (1 << 14);
+        diff = tmp0 - state[5];
+        // scale down and round
+        diff = (diff + (1 << 13)) >> 14;
+        tmp1 = state[4] + diff * kResampleAllpass[0][0];
+        state[4] = tmp0;
+        diff = tmp1 - state[6];
+        // scale down and truncate
+        diff = diff >> 14;
+        if (diff < 0)
+            diff += 1;
+        tmp0 = state[5] + diff * kResampleAllpass[0][1];
+        state[5] = tmp1;
+        diff = tmp0 - state[7];
+        // scale down and truncate
+        diff = diff >> 14;
+        if (diff < 0)
+            diff += 1;
+        state[7] = state[6] + diff * kResampleAllpass[0][2];
+        state[6] = tmp0;
+
+        // scale down, round and store
+        out[i << 1] = state[7] >> 15;
+    }
+
+    out++;
+
+    // lower allpass filter (generates even output samples)
+    for (i = 0; i < len; i++)
+    {
+        tmp0 = ((int32_t)in[i] << 15) + (1 << 14);
+        diff = tmp0 - state[1];
+        // scale down and round
+        diff = (diff + (1 << 13)) >> 14;
+        tmp1 = state[0] + diff * kResampleAllpass[1][0];
+        state[0] = tmp0;
+        diff = tmp1 - state[2];
+        // scale down and truncate
+        diff = diff >> 14;
+        if (diff < 0)
+            diff += 1;
+        tmp0 = state[1] + diff * kResampleAllpass[1][1];
+        state[1] = tmp1;
+        diff = tmp0 - state[3];
+        // scale down and truncate
+        diff = diff >> 14;
+        if (diff < 0)
+            diff += 1;
+        state[3] = state[2] + diff * kResampleAllpass[1][2];
+        state[2] = tmp0;
+
+        // scale down, round and store
+        out[i << 1] = state[3] >> 15;
+    }
+}
+
+//
+//   interpolator
+// input:  int32_t (shifted 15 positions to the left, + offset 16384)
+// output: int32_t (shifted 15 positions to the left, + offset 16384) (of length len*2)
+// state:  filter state array; length = 8
+void WebRtcSpl_UpBy2IntToInt(const int32_t *in, int32_t len, int32_t *out,
+                             int32_t *state)
+{
+    int32_t tmp0, tmp1, diff;
+    int32_t i;
+
+    // upper allpass filter (generates odd output samples)
+    for (i = 0; i < len; i++)
+    {
+        tmp0 = in[i];
+        diff = tmp0 - state[5];
+        // scale down and round
+        diff = (diff + (1 << 13)) >> 14;
+        tmp1 = state[4] + diff * kResampleAllpass[0][0];
+        state[4] = tmp0;
+        diff = tmp1 - state[6];
+        // scale down and truncate
+        diff = diff >> 14;
+        if (diff < 0)
+            diff += 1;
+        tmp0 = state[5] + diff * kResampleAllpass[0][1];
+        state[5] = tmp1;
+        diff = tmp0 - state[7];
+        // scale down and truncate
+        diff = diff >> 14;
+        if (diff < 0)
+            diff += 1;
+        state[7] = state[6] + diff * kResampleAllpass[0][2];
+        state[6] = tmp0;
+
+        // scale down, round and store
+        out[i << 1] = state[7];
+    }
+
+    out++;
+
+    // lower allpass filter (generates even output samples)
+    for (i = 0; i < len; i++)
+    {
+        tmp0 = in[i];
+        diff = tmp0 - state[1];
+        // scale down and round
+        diff = (diff + (1 << 13)) >> 14;
+        tmp1 = state[0] + diff * kResampleAllpass[1][0];
+        state[0] = tmp0;
+        diff = tmp1 - state[2];
+        // scale down and truncate
+        diff = diff >> 14;
+        if (diff < 0)
+            diff += 1;
+        tmp0 = state[1] + diff * kResampleAllpass[1][1];
+        state[1] = tmp1;
+        diff = tmp0 - state[3];
+        // scale down and truncate
+        diff = diff >> 14;
+        if (diff < 0)
+            diff += 1;
+        state[3] = state[2] + diff * kResampleAllpass[1][2];
+        state[2] = tmp0;
+
+        // scale down, round and store
+        out[i << 1] = state[3];
+    }
+}
+
+//
+//   interpolator
+// input:  int32_t (shifted 15 positions to the left, + offset 16384)
+// output: int16_t (saturated) (of length len*2)
+// state:  filter state array; length = 8
+void WebRtcSpl_UpBy2IntToShort(const int32_t *in, int32_t len, int16_t *out,
+                               int32_t *state)
+{
+    int32_t tmp0, tmp1, diff;
+    int32_t i;
+
+    // upper allpass filter (generates odd output samples)
+    for (i = 0; i < len; i++)
+    {
+        tmp0 = in[i];
+        diff = tmp0 - state[5];
+        // scale down and round
+        diff = (diff + (1 << 13)) >> 14;
+        tmp1 = state[4] + diff * kResampleAllpass[0][0];
+        state[4] = tmp0;
+        diff = tmp1 - state[6];
+        // scale down and round
+        diff = diff >> 14;
+        if (diff < 0)
+            diff += 1;
+        tmp0 = state[5] + diff * kResampleAllpass[0][1];
+        state[5] = tmp1;
+        diff = tmp0 - state[7];
+        // scale down and truncate
+        diff = diff >> 14;
+        if (diff < 0)
+            diff += 1;
+        state[7] = state[6] + diff * kResampleAllpass[0][2];
+        state[6] = tmp0;
+
+        // scale down, saturate and store
+        tmp1 = state[7] >> 15;
+        if (tmp1 > (int32_t)0x00007FFF)
+            tmp1 = 0x00007FFF;
+        if (tmp1 < (int32_t)0xFFFF8000)
+            tmp1 = 0xFFFF8000;
+        out[i << 1] = (int16_t)tmp1;
+    }
+
+    out++;
+
+    // lower allpass filter (generates even output samples)
+    for (i = 0; i < len; i++)
+    {
+        tmp0 = in[i];
+        diff = tmp0 - state[1];
+        // scale down and round
+        diff = (diff + (1 << 13)) >> 14;
+        tmp1 = state[0] + diff * kResampleAllpass[1][0];
+        state[0] = tmp0;
+        diff = tmp1 - state[2];
+        // scale down and truncate
+        diff = diff >> 14;
+        if (diff < 0)
+            diff += 1;
+        tmp0 = state[1] + diff * kResampleAllpass[1][1];
+        state[1] = tmp1;
+        diff = tmp0 - state[3];
+        // scale down and truncate
+        diff = diff >> 14;
+        if (diff < 0)
+            diff += 1;
+        state[3] = state[2] + diff * kResampleAllpass[1][2];
+        state[2] = tmp0;
+
+        // scale down, saturate and store
+        tmp1 = state[3] >> 15;
+        if (tmp1 > (int32_t)0x00007FFF)
+            tmp1 = 0x00007FFF;
+        if (tmp1 < (int32_t)0xFFFF8000)
+            tmp1 = 0xFFFF8000;
+        out[i << 1] = (int16_t)tmp1;
+    }
+}
+
+//   lowpass filter
+// input:  int16_t
+// output: int32_t (normalized, not saturated)
+// state:  filter state array; length = 8
+void WebRtcSpl_LPBy2ShortToInt(const int16_t* in, int32_t len, int32_t* out,
+                               int32_t* state)
+{
+    int32_t tmp0, tmp1, diff;
+    int32_t i;
+
+    len >>= 1;
+
+    // lower allpass filter: odd input -> even output samples
+    in++;
+    // initial state of polyphase delay element
+    tmp0 = state[12];
+    for (i = 0; i < len; i++)
+    {
+        diff = tmp0 - state[1];
+        // scale down and round
+        diff = (diff + (1 << 13)) >> 14;
+        tmp1 = state[0] + diff * kResampleAllpass[1][0];
+        state[0] = tmp0;
+        diff = tmp1 - state[2];
+        // scale down and truncate
+        diff = diff >> 14;
+        if (diff < 0)
+            diff += 1;
+        tmp0 = state[1] + diff * kResampleAllpass[1][1];
+        state[1] = tmp1;
+        diff = tmp0 - state[3];
+        // scale down and truncate
+        diff = diff >> 14;
+        if (diff < 0)
+            diff += 1;
+        state[3] = state[2] + diff * kResampleAllpass[1][2];
+        state[2] = tmp0;
+
+        // scale down, round and store
+        out[i << 1] = state[3] >> 1;
+        tmp0 = ((int32_t)in[i << 1] << 15) + (1 << 14);
+    }
+    in--;
+
+    // upper allpass filter: even input -> even output samples
+    for (i = 0; i < len; i++)
+    {
+        tmp0 = ((int32_t)in[i << 1] << 15) + (1 << 14);
+        diff = tmp0 - state[5];
+        // scale down and round
+        diff = (diff + (1 << 13)) >> 14;
+        tmp1 = state[4] + diff * kResampleAllpass[0][0];
+        state[4] = tmp0;
+        diff = tmp1 - state[6];
+        // scale down and round
+        diff = diff >> 14;
+        if (diff < 0)
+            diff += 1;
+        tmp0 = state[5] + diff * kResampleAllpass[0][1];
+        state[5] = tmp1;
+        diff = tmp0 - state[7];
+        // scale down and truncate
+        diff = diff >> 14;
+        if (diff < 0)
+            diff += 1;
+        state[7] = state[6] + diff * kResampleAllpass[0][2];
+        state[6] = tmp0;
+
+        // average the two allpass outputs, scale down and store
+        out[i << 1] = (out[i << 1] + (state[7] >> 1)) >> 15;
+    }
+
+    // switch to odd output samples
+    out++;
+
+    // lower allpass filter: even input -> odd output samples
+    for (i = 0; i < len; i++)
+    {
+        tmp0 = ((int32_t)in[i << 1] << 15) + (1 << 14);
+        diff = tmp0 - state[9];
+        // scale down and round
+        diff = (diff + (1 << 13)) >> 14;
+        tmp1 = state[8] + diff * kResampleAllpass[1][0];
+        state[8] = tmp0;
+        diff = tmp1 - state[10];
+        // scale down and truncate
+        diff = diff >> 14;
+        if (diff < 0)
+            diff += 1;
+        tmp0 = state[9] + diff * kResampleAllpass[1][1];
+        state[9] = tmp1;
+        diff = tmp0 - state[11];
+        // scale down and truncate
+        diff = diff >> 14;
+        if (diff < 0)
+            diff += 1;
+        state[11] = state[10] + diff * kResampleAllpass[1][2];
+        state[10] = tmp0;
+
+        // scale down, round and store
+        out[i << 1] = state[11] >> 1;
+    }
+
+    // upper allpass filter: odd input -> odd output samples
+    in++;
+    for (i = 0; i < len; i++)
+    {
+        tmp0 = ((int32_t)in[i << 1] << 15) + (1 << 14);
+        diff = tmp0 - state[13];
+        // scale down and round
+        diff = (diff + (1 << 13)) >> 14;
+        tmp1 = state[12] + diff * kResampleAllpass[0][0];
+        state[12] = tmp0;
+        diff = tmp1 - state[14];
+        // scale down and round
+        diff = diff >> 14;
+        if (diff < 0)
+            diff += 1;
+        tmp0 = state[13] + diff * kResampleAllpass[0][1];
+        state[13] = tmp1;
+        diff = tmp0 - state[15];
+        // scale down and truncate
+        diff = diff >> 14;
+        if (diff < 0)
+            diff += 1;
+        state[15] = state[14] + diff * kResampleAllpass[0][2];
+        state[14] = tmp0;
+
+        // average the two allpass outputs, scale down and store
+        out[i << 1] = (out[i << 1] + (state[15] >> 1)) >> 15;
+    }
+}
+
+//   lowpass filter
+// input:  int32_t (shifted 15 positions to the left, + offset 16384)
+// output: int32_t (normalized, not saturated)
+// state:  filter state array; length = 8
+void RTC_NO_SANITIZE("signed-integer-overflow")  // bugs.webrtc.org/5486
+WebRtcSpl_LPBy2IntToInt(const int32_t* in, int32_t len, int32_t* out,
+                        int32_t* state)
+{
+    int32_t tmp0, tmp1, diff;
+    int32_t i;
+
+    len >>= 1;
+
+    // lower allpass filter: odd input -> even output samples
+    in++;
+    // initial state of polyphase delay element
+    tmp0 = state[12];
+    for (i = 0; i < len; i++)
+    {
+        diff = tmp0 - state[1];
+        // scale down and round
+        diff = (diff + (1 << 13)) >> 14;
+        tmp1 = state[0] + diff * kResampleAllpass[1][0];
+        state[0] = tmp0;
+        diff = tmp1 - state[2];
+        // scale down and truncate
+        diff = diff >> 14;
+        if (diff < 0)
+            diff += 1;
+        tmp0 = state[1] + diff * kResampleAllpass[1][1];
+        state[1] = tmp1;
+        diff = tmp0 - state[3];
+        // scale down and truncate
+        diff = diff >> 14;
+        if (diff < 0)
+            diff += 1;
+        state[3] = state[2] + diff * kResampleAllpass[1][2];
+        state[2] = tmp0;
+
+        // scale down, round and store
+        out[i << 1] = state[3] >> 1;
+        tmp0 = in[i << 1];
+    }
+    in--;
+
+    // upper allpass filter: even input -> even output samples
+    for (i = 0; i < len; i++)
+    {
+        tmp0 = in[i << 1];
+        diff = tmp0 - state[5];
+        // UBSan: -794814117 - 1566149201 cannot be represented in type 'int'
+
+        // scale down and round
+        diff = (diff + (1 << 13)) >> 14;
+        tmp1 = state[4] + diff * kResampleAllpass[0][0];
+        state[4] = tmp0;
+        diff = tmp1 - state[6];
+        // scale down and round
+        diff = diff >> 14;
+        if (diff < 0)
+            diff += 1;
+        tmp0 = state[5] + diff * kResampleAllpass[0][1];
+        state[5] = tmp1;
+        diff = tmp0 - state[7];
+        // scale down and truncate
+        diff = diff >> 14;
+        if (diff < 0)
+            diff += 1;
+        state[7] = state[6] + diff * kResampleAllpass[0][2];
+        state[6] = tmp0;
+
+        // average the two allpass outputs, scale down and store
+        out[i << 1] = (out[i << 1] + (state[7] >> 1)) >> 15;
+    }
+
+    // switch to odd output samples
+    out++;
+
+    // lower allpass filter: even input -> odd output samples
+    for (i = 0; i < len; i++)
+    {
+        tmp0 = in[i << 1];
+        diff = tmp0 - state[9];
+        // scale down and round
+        diff = (diff + (1 << 13)) >> 14;
+        tmp1 = state[8] + diff * kResampleAllpass[1][0];
+        state[8] = tmp0;
+        diff = tmp1 - state[10];
+        // scale down and truncate
+        diff = diff >> 14;
+        if (diff < 0)
+            diff += 1;
+        tmp0 = state[9] + diff * kResampleAllpass[1][1];
+        state[9] = tmp1;
+        diff = tmp0 - state[11];
+        // scale down and truncate
+        diff = diff >> 14;
+        if (diff < 0)
+            diff += 1;
+        state[11] = state[10] + diff * kResampleAllpass[1][2];
+        state[10] = tmp0;
+
+        // scale down, round and store
+        out[i << 1] = state[11] >> 1;
+    }
+
+    // upper allpass filter: odd input -> odd output samples
+    in++;
+    for (i = 0; i < len; i++)
+    {
+        tmp0 = in[i << 1];
+        diff = tmp0 - state[13];
+        // scale down and round
+        diff = (diff + (1 << 13)) >> 14;
+        tmp1 = state[12] + diff * kResampleAllpass[0][0];
+        state[12] = tmp0;
+        diff = tmp1 - state[14];
+        // scale down and round
+        diff = diff >> 14;
+        if (diff < 0)
+            diff += 1;
+        tmp0 = state[13] + diff * kResampleAllpass[0][1];
+        state[13] = tmp1;
+        diff = tmp0 - state[15];
+        // scale down and truncate
+        diff = diff >> 14;
+        if (diff < 0)
+            diff += 1;
+        state[15] = state[14] + diff * kResampleAllpass[0][2];
+        state[14] = tmp0;
+
+        // average the two allpass outputs, scale down and store
+        out[i << 1] = (out[i << 1] + (state[15] >> 1)) >> 15;
+    }
+}
--- a/runtime/engine/asr/server/brpc/paraformerCPP/webrtc/common_audio/signal_processing/resample_by_2_internal.h
+++ b/runtime/engine/asr/server/brpc/paraformerCPP/webrtc/common_audio/signal_processing/resample_by_2_internal.h
@ -0,0 +1,60 @@
+/*
+ *  Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+/*
+ * This header file contains some internal resampling functions.
+ *
+ */
+
+#ifndef COMMON_AUDIO_SIGNAL_PROCESSING_RESAMPLE_BY_2_INTERNAL_H_
+#define COMMON_AUDIO_SIGNAL_PROCESSING_RESAMPLE_BY_2_INTERNAL_H_
+
+#include <stdint.h>
+
+/*******************************************************************
+ * resample_by_2_fast.c
+ * Functions for internal use in the other resample functions
+ ******************************************************************/
+void WebRtcSpl_DownBy2IntToShort(int32_t* in,
+                                 int32_t len,
+                                 int16_t* out,
+                                 int32_t* state);
+
+void WebRtcSpl_DownBy2ShortToInt(const int16_t* in,
+                                 int32_t len,
+                                 int32_t* out,
+                                 int32_t* state);
+
+void WebRtcSpl_UpBy2ShortToInt(const int16_t* in,
+                               int32_t len,
+                               int32_t* out,
+                               int32_t* state);
+
+void WebRtcSpl_UpBy2IntToInt(const int32_t* in,
+                             int32_t len,
+                             int32_t* out,
+                             int32_t* state);
+
+void WebRtcSpl_UpBy2IntToShort(const int32_t* in,
+                               int32_t len,
+                               int16_t* out,
+                               int32_t* state);
+
+void WebRtcSpl_LPBy2ShortToInt(const int16_t* in,
+                               int32_t len,
+                               int32_t* out,
+                               int32_t* state);
+
+void WebRtcSpl_LPBy2IntToInt(const int32_t* in,
+                             int32_t len,
+                             int32_t* out,
+                             int32_t* state);
+
+#endif  // COMMON_AUDIO_SIGNAL_PROCESSING_RESAMPLE_BY_2_INTERNAL_H_
--- a/runtime/engine/asr/server/brpc/paraformerCPP/webrtc/common_audio/signal_processing/resample_fractional.c
+++ b/runtime/engine/asr/server/brpc/paraformerCPP/webrtc/common_audio/signal_processing/resample_fractional.c
@ -0,0 +1,239 @@
+/*
+ *  Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+/*
+ * This file contains the resampling functions between 48, 44, 32 and 24 kHz.
+ * The description headers can be found in signal_processing_library.h
+ *
+ */
+
+#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
+
+// interpolation coefficients
+static const int16_t kCoefficients48To32[2][8] = {
+        {778, -2050, 1087, 23285, 12903, -3783, 441, 222},
+        {222, 441, -3783, 12903, 23285, 1087, -2050, 778}
+};
+
+static const int16_t kCoefficients32To24[3][8] = {
+        {767, -2362, 2434, 24406, 10620, -3838, 721, 90},
+        {386, -381, -2646, 19062, 19062, -2646, -381, 386},
+        {90, 721, -3838, 10620, 24406, 2434, -2362, 767}
+};
+
+static const int16_t kCoefficients44To32[4][9] = {
+        {117, -669, 2245, -6183, 26267, 13529, -3245, 845, -138},
+        {-101, 612, -2283, 8532, 29790, -5138, 1789, -524, 91},
+        {50, -292, 1016, -3064, 32010, 3933, -1147, 315, -53},
+        {-156, 974, -3863, 18603, 21691, -6246, 2353, -712, 126}
+};
+
+//   Resampling ratio: 2/3
+// input:  int32_t (normalized, not saturated) :: size 3 * K
+// output: int32_t (shifted 15 positions to the left, + offset 16384) :: size 2 * K
+//      K: number of blocks
+
+void WebRtcSpl_Resample48khzTo32khz(const int32_t *In, int32_t *Out, size_t K)
+{
+    /////////////////////////////////////////////////////////////
+    // Filter operation:
+    //
+    // Perform resampling (3 input samples -> 2 output samples);
+    // process in sub blocks of size 3 samples.
+    int32_t tmp;
+    size_t m;
+
+    for (m = 0; m < K; m++)
+    {
+        tmp = 1 << 14;
+        tmp += kCoefficients48To32[0][0] * In[0];
+        tmp += kCoefficients48To32[0][1] * In[1];
+        tmp += kCoefficients48To32[0][2] * In[2];
+        tmp += kCoefficients48To32[0][3] * In[3];
+        tmp += kCoefficients48To32[0][4] * In[4];
+        tmp += kCoefficients48To32[0][5] * In[5];
+        tmp += kCoefficients48To32[0][6] * In[6];
+        tmp += kCoefficients48To32[0][7] * In[7];
+        Out[0] = tmp;
+
+        tmp = 1 << 14;
+        tmp += kCoefficients48To32[1][0] * In[1];
+        tmp += kCoefficients48To32[1][1] * In[2];
+        tmp += kCoefficients48To32[1][2] * In[3];
+        tmp += kCoefficients48To32[1][3] * In[4];
+        tmp += kCoefficients48To32[1][4] * In[5];
+        tmp += kCoefficients48To32[1][5] * In[6];
+        tmp += kCoefficients48To32[1][6] * In[7];
+        tmp += kCoefficients48To32[1][7] * In[8];
+        Out[1] = tmp;
+
+        // update pointers
+        In += 3;
+        Out += 2;
+    }
+}
+
+//   Resampling ratio: 3/4
+// input:  int32_t (normalized, not saturated) :: size 4 * K
+// output: int32_t (shifted 15 positions to the left, + offset 16384) :: size 3 * K
+//      K: number of blocks
+
+void WebRtcSpl_Resample32khzTo24khz(const int32_t *In, int32_t *Out, size_t K)
+{
+    /////////////////////////////////////////////////////////////
+    // Filter operation:
+    //
+    // Perform resampling (4 input samples -> 3 output samples);
+    // process in sub blocks of size 4 samples.
+    size_t m;
+    int32_t tmp;
+
+    for (m = 0; m < K; m++)
+    {
+        tmp = 1 << 14;
+        tmp += kCoefficients32To24[0][0] * In[0];
+        tmp += kCoefficients32To24[0][1] * In[1];
+        tmp += kCoefficients32To24[0][2] * In[2];
+        tmp += kCoefficients32To24[0][3] * In[3];
+        tmp += kCoefficients32To24[0][4] * In[4];
+        tmp += kCoefficients32To24[0][5] * In[5];
+        tmp += kCoefficients32To24[0][6] * In[6];
+        tmp += kCoefficients32To24[0][7] * In[7];
+        Out[0] = tmp;
+
+        tmp = 1 << 14;
+        tmp += kCoefficients32To24[1][0] * In[1];
+        tmp += kCoefficients32To24[1][1] * In[2];
+        tmp += kCoefficients32To24[1][2] * In[3];
+        tmp += kCoefficients32To24[1][3] * In[4];
+        tmp += kCoefficients32To24[1][4] * In[5];
+        tmp += kCoefficients32To24[1][5] * In[6];
+        tmp += kCoefficients32To24[1][6] * In[7];
+        tmp += kCoefficients32To24[1][7] * In[8];
+        Out[1] = tmp;
+
+        tmp = 1 << 14;
+        tmp += kCoefficients32To24[2][0] * In[2];
+        tmp += kCoefficients32To24[2][1] * In[3];
+        tmp += kCoefficients32To24[2][2] * In[4];
+        tmp += kCoefficients32To24[2][3] * In[5];
+        tmp += kCoefficients32To24[2][4] * In[6];
+        tmp += kCoefficients32To24[2][5] * In[7];
+        tmp += kCoefficients32To24[2][6] * In[8];
+        tmp += kCoefficients32To24[2][7] * In[9];
+        Out[2] = tmp;
+
+        // update pointers
+        In += 4;
+        Out += 3;
+    }
+}
+
+//
+// fractional resampling filters
+//   Fout = 11/16 * Fin
+//   Fout =  8/11 * Fin
+//
+
+// compute two inner-products and store them to output array
+static void WebRtcSpl_ResampDotProduct(const int32_t *in1, const int32_t *in2,
+                                       const int16_t *coef_ptr, int32_t *out1,
+                                       int32_t *out2)
+{
+    int32_t tmp1 = 16384;
+    int32_t tmp2 = 16384;
+    int16_t coef;
+
+    coef = coef_ptr[0];
+    tmp1 += coef * in1[0];
+    tmp2 += coef * in2[-0];
+
+    coef = coef_ptr[1];
+    tmp1 += coef * in1[1];
+    tmp2 += coef * in2[-1];
+
+    coef = coef_ptr[2];
+    tmp1 += coef * in1[2];
+    tmp2 += coef * in2[-2];
+
+    coef = coef_ptr[3];
+    tmp1 += coef * in1[3];
+    tmp2 += coef * in2[-3];
+
+    coef = coef_ptr[4];
+    tmp1 += coef * in1[4];
+    tmp2 += coef * in2[-4];
+
+    coef = coef_ptr[5];
+    tmp1 += coef * in1[5];
+    tmp2 += coef * in2[-5];
+
+    coef = coef_ptr[6];
+    tmp1 += coef * in1[6];
+    tmp2 += coef * in2[-6];
+
+    coef = coef_ptr[7];
+    tmp1 += coef * in1[7];
+    tmp2 += coef * in2[-7];
+
+    coef = coef_ptr[8];
+    *out1 = tmp1 + coef * in1[8];
+    *out2 = tmp2 + coef * in2[-8];
+}
+
+//   Resampling ratio: 8/11
+// input:  int32_t (normalized, not saturated) :: size 11 * K
+// output: int32_t (shifted 15 positions to the left, + offset 16384) :: size  8 * K
+//      K: number of blocks
+
+void WebRtcSpl_Resample44khzTo32khz(const int32_t *In, int32_t *Out, size_t K)
+{
+    /////////////////////////////////////////////////////////////
+    // Filter operation:
+    //
+    // Perform resampling (11 input samples -> 8 output samples);
+    // process in sub blocks of size 11 samples.
+    int32_t tmp;
+    size_t m;
+
+    for (m = 0; m < K; m++)
+    {
+        tmp = 1 << 14;
+
+        // first output sample
+        Out[0] = ((int32_t)In[3] << 15) + tmp;
+
+        // sum and accumulate filter coefficients and input samples
+        tmp += kCoefficients44To32[3][0] * In[5];
+        tmp += kCoefficients44To32[3][1] * In[6];
+        tmp += kCoefficients44To32[3][2] * In[7];
+        tmp += kCoefficients44To32[3][3] * In[8];
+        tmp += kCoefficients44To32[3][4] * In[9];
+        tmp += kCoefficients44To32[3][5] * In[10];
+        tmp += kCoefficients44To32[3][6] * In[11];
+        tmp += kCoefficients44To32[3][7] * In[12];
+        tmp += kCoefficients44To32[3][8] * In[13];
+        Out[4] = tmp;
+
+        // sum and accumulate filter coefficients and input samples
+        WebRtcSpl_ResampDotProduct(&In[0], &In[17], kCoefficients44To32[0], &Out[1], &Out[7]);
+
+        // sum and accumulate filter coefficients and input samples
+        WebRtcSpl_ResampDotProduct(&In[2], &In[15], kCoefficients44To32[1], &Out[2], &Out[6]);
+
+        // sum and accumulate filter coefficients and input samples
+        WebRtcSpl_ResampDotProduct(&In[3], &In[14], kCoefficients44To32[2], &Out[3], &Out[5]);
+
+        // update pointers
+        In += 11;
+        Out += 8;
+    }
+}
--- a/runtime/engine/asr/server/brpc/paraformerCPP/webrtc/common_audio/signal_processing/spl_init.c
+++ b/runtime/engine/asr/server/brpc/paraformerCPP/webrtc/common_audio/signal_processing/spl_init.c
@ -0,0 +1,139 @@
+/*
+ *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+/* The global function contained in this file initializes SPL function
+ * pointers, currently only for ARM platforms.
+ *
+ * Some code came from common/rtcd.c in the WebM project.
+ */
+
+#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
+#include "webrtc/system_wrappers/include/cpu_features_wrapper.h"
+
+/* Declare function pointers. */
+MaxAbsValueW16 WebRtcSpl_MaxAbsValueW16;
+MaxAbsValueW32 WebRtcSpl_MaxAbsValueW32;
+MaxValueW16 WebRtcSpl_MaxValueW16;
+MaxValueW32 WebRtcSpl_MaxValueW32;
+MinValueW16 WebRtcSpl_MinValueW16;
+MinValueW32 WebRtcSpl_MinValueW32;
+CrossCorrelation WebRtcSpl_CrossCorrelation;
+DownsampleFast WebRtcSpl_DownsampleFast;
+ScaleAndAddVectorsWithRound WebRtcSpl_ScaleAndAddVectorsWithRound;
+
+#if (!defined(WEBRTC_HAS_NEON)) && !defined(MIPS32_LE)
+/* Initialize function pointers to the generic C version. */
+static void InitPointersToC(void) {
+  WebRtcSpl_MaxAbsValueW16 = WebRtcSpl_MaxAbsValueW16C;
+  WebRtcSpl_MaxAbsValueW32 = WebRtcSpl_MaxAbsValueW32C;
+  WebRtcSpl_MaxValueW16 = WebRtcSpl_MaxValueW16C;
+  WebRtcSpl_MaxValueW32 = WebRtcSpl_MaxValueW32C;
+  WebRtcSpl_MinValueW16 = WebRtcSpl_MinValueW16C;
+  WebRtcSpl_MinValueW32 = WebRtcSpl_MinValueW32C;
+  WebRtcSpl_CrossCorrelation = WebRtcSpl_CrossCorrelationC;
+  WebRtcSpl_DownsampleFast = WebRtcSpl_DownsampleFastC;
+  WebRtcSpl_ScaleAndAddVectorsWithRound =
+      WebRtcSpl_ScaleAndAddVectorsWithRoundC;
+}
+#endif
+
+#if defined(WEBRTC_HAS_NEON)
+/* Initialize function pointers to the Neon version. */
+static void InitPointersToNeon(void) {
+  WebRtcSpl_MaxAbsValueW16 = WebRtcSpl_MaxAbsValueW16Neon;
+  WebRtcSpl_MaxAbsValueW32 = WebRtcSpl_MaxAbsValueW32Neon;
+  WebRtcSpl_MaxValueW16 = WebRtcSpl_MaxValueW16Neon;
+  WebRtcSpl_MaxValueW32 = WebRtcSpl_MaxValueW32Neon;
+  WebRtcSpl_MinValueW16 = WebRtcSpl_MinValueW16Neon;
+  WebRtcSpl_MinValueW32 = WebRtcSpl_MinValueW32Neon;
+  WebRtcSpl_CrossCorrelation = WebRtcSpl_CrossCorrelationNeon;
+  WebRtcSpl_DownsampleFast = WebRtcSpl_DownsampleFastNeon;
+  WebRtcSpl_ScaleAndAddVectorsWithRound =
+      WebRtcSpl_ScaleAndAddVectorsWithRoundC;
+}
+#endif
+
+#if defined(MIPS32_LE)
+/* Initialize function pointers to the MIPS version. */
+static void InitPointersToMIPS(void) {
+  WebRtcSpl_MaxAbsValueW16 = WebRtcSpl_MaxAbsValueW16_mips;
+  WebRtcSpl_MaxValueW16 = WebRtcSpl_MaxValueW16_mips;
+  WebRtcSpl_MaxValueW32 = WebRtcSpl_MaxValueW32_mips;
+  WebRtcSpl_MinValueW16 = WebRtcSpl_MinValueW16_mips;
+  WebRtcSpl_MinValueW32 = WebRtcSpl_MinValueW32_mips;
+  WebRtcSpl_CrossCorrelation = WebRtcSpl_CrossCorrelation_mips;
+  WebRtcSpl_DownsampleFast = WebRtcSpl_DownsampleFast_mips;
+#if defined(MIPS_DSP_R1_LE)
+  WebRtcSpl_MaxAbsValueW32 = WebRtcSpl_MaxAbsValueW32_mips;
+  WebRtcSpl_ScaleAndAddVectorsWithRound =
+      WebRtcSpl_ScaleAndAddVectorsWithRound_mips;
+#else
+  WebRtcSpl_MaxAbsValueW32 = WebRtcSpl_MaxAbsValueW32C;
+  WebRtcSpl_ScaleAndAddVectorsWithRound =
+      WebRtcSpl_ScaleAndAddVectorsWithRoundC;
+#endif
+}
+#endif
+
+static void InitFunctionPointers(void) {
+#if defined(WEBRTC_HAS_NEON)
+  InitPointersToNeon();
+#elif defined(MIPS32_LE)
+  InitPointersToMIPS();
+#else
+  InitPointersToC();
+#endif  /* WEBRTC_HAS_NEON */
+}
+
+#include <pthread.h>
+
+static void once(void (*func)(void)) {
+  static pthread_once_t lock = PTHREAD_ONCE_INIT;
+  pthread_once(&lock, func);
+}
+// #if defined(WEBRTC_POSIX)
+// #include <pthread.h>
+
+// static void once(void (*func)(void)) {
+//   static pthread_once_t lock = PTHREAD_ONCE_INIT;
+//   pthread_once(&lock, func);
+// }
+
+// #elif defined(_WIN32)
+// #include <windows.h>
+
+// static void once(void (*func)(void)) {
+//   /* Didn't use InitializeCriticalSection() since there's no race-free context
+//    * in which to execute it.
+//    *
+//    * TODO(kma): Change to different implementation (e.g.
+//    * InterlockedCompareExchangePointer) to avoid issues similar to
+//    * http://code.google.com/p/webm/issues/detail?id=467.
+//    */
+//   static CRITICAL_SECTION lock = {(void *)((size_t)-1), -1, 0, 0, 0, 0};
+//   static int done = 0;
+
+//   EnterCriticalSection(&lock);
+//   if (!done) {
+//     func();
+//     done = 1;
+//   }
+//   LeaveCriticalSection(&lock);
+// }
+
+// /* There's no fallback version as an #else block here to ensure thread safety.
+//  * In case of neither pthread for WEBRTC_POSIX nor _WIN32 is present, build
+//  * system should pick it up.
+//  */
+// #endif  /* WEBRTC_POSIX */
+
+void WebRtcSpl_Init(void) {
+  once(InitFunctionPointers);
+}
--- a/runtime/engine/asr/server/brpc/paraformerCPP/webrtc/common_audio/signal_processing/spl_inl.c
+++ b/runtime/engine/asr/server/brpc/paraformerCPP/webrtc/common_audio/signal_processing/spl_inl.c
@ -0,0 +1,24 @@
+/*
+ *  Copyright (c) 2016 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <stdint.h>
+
+#include "webrtc/common_audio/signal_processing/include/spl_inl.h"
+
+// Table used by WebRtcSpl_CountLeadingZeros32_NotBuiltin. For each uint32_t n
+// that's a sequence of 0 bits followed by a sequence of 1 bits, the entry at
+// index (n * 0x8c0b2891) >> 26 in this table gives the number of zero bits in
+// n.
+const int8_t kWebRtcSpl_CountLeadingZeros32_Table[64] = {
+    32, 8,  17, -1, -1, 14, -1, -1, -1, 20, -1, -1, -1, 28, -1, 18,
+    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0,  26, 25, 24,
+    4,  11, 23, 31, 3,  7,  10, 16, 22, 30, -1, -1, 2,  6,  13, 9,
+    -1, 15, -1, 21, -1, 29, 19, -1, -1, -1, -1, -1, 1,  27, 5,  12,
+};
--- a/runtime/engine/asr/server/brpc/paraformerCPP/webrtc/common_audio/signal_processing/spl_sqrt.c
+++ b/runtime/engine/asr/server/brpc/paraformerCPP/webrtc/common_audio/signal_processing/spl_sqrt.c
@ -0,0 +1,194 @@
+/*
+ *  Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+/*
+ * This file contains the function WebRtcSpl_Sqrt().
+ * The description header can be found in signal_processing_library.h
+ *
+ */
+
+#include "webrtc/rtc_base/checks.h"
+#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
+
+int32_t WebRtcSpl_SqrtLocal(int32_t in);
+
+int32_t WebRtcSpl_SqrtLocal(int32_t in)
+{
+
+    int16_t x_half, t16;
+    int32_t A, B, x2;
+
+    /* The following block performs:
+     y=in/2
+     x=y-2^30
+     x_half=x/2^31
+     t = 1 + (x_half) - 0.5*((x_half)^2) + 0.5*((x_half)^3) - 0.625*((x_half)^4)
+         + 0.875*((x_half)^5)
+     */
+
+    B = in / 2;
+
+    B = B - ((int32_t)0x40000000); // B = in/2 - 1/2
+    x_half = (int16_t)(B >> 16);  // x_half = x/2 = (in-1)/2
+    B = B + ((int32_t)0x40000000); // B = 1 + x/2
+    B = B + ((int32_t)0x40000000); // Add 0.5 twice (since 1.0 does not exist in Q31)
+
+    x2 = ((int32_t)x_half) * ((int32_t)x_half) * 2; // A = (x/2)^2
+    A = -x2; // A = -(x/2)^2
+    B = B + (A >> 1); // B = 1 + x/2 - 0.5*(x/2)^2
+
+    A >>= 16;
+    A = A * A * 2; // A = (x/2)^4
+    t16 = (int16_t)(A >> 16);
+    B += -20480 * t16 * 2;  // B = B - 0.625*A
+    // After this, B = 1 + x/2 - 0.5*(x/2)^2 - 0.625*(x/2)^4
+
+    A = x_half * t16 * 2;  // A = (x/2)^5
+    t16 = (int16_t)(A >> 16);
+    B += 28672 * t16 * 2;  // B = B + 0.875*A
+    // After this, B = 1 + x/2 - 0.5*(x/2)^2 - 0.625*(x/2)^4 + 0.875*(x/2)^5
+
+    t16 = (int16_t)(x2 >> 16);
+    A = x_half * t16 * 2;  // A = x/2^3
+
+    B = B + (A >> 1); // B = B + 0.5*A
+    // After this, B = 1 + x/2 - 0.5*(x/2)^2 + 0.5*(x/2)^3 - 0.625*(x/2)^4 + 0.875*(x/2)^5
+
+    B = B + ((int32_t)32768); // Round off bit
+
+    return B;
+}
+
+int32_t WebRtcSpl_Sqrt(int32_t value)
+{
+    /*
+     Algorithm:
+
+     Six term Taylor Series is used here to compute the square root of a number
+     y^0.5 = (1+x)^0.5 where x = y-1
+     = 1+(x/2)-0.5*((x/2)^2+0.5*((x/2)^3-0.625*((x/2)^4+0.875*((x/2)^5)
+     0.5 <= x < 1
+
+     Example of how the algorithm works, with ut=sqrt(in), and
+     with in=73632 and ut=271 (even shift value case):
+
+     in=73632
+     y= in/131072
+     x=y-1
+     t = 1 + (x/2) - 0.5*((x/2)^2) + 0.5*((x/2)^3) - 0.625*((x/2)^4) + 0.875*((x/2)^5)
+     ut=t*(1/sqrt(2))*512
+
+     or:
+
+     in=73632
+     in2=73632*2^14
+     y= in2/2^31
+     x=y-1
+     t = 1 + (x/2) - 0.5*((x/2)^2) + 0.5*((x/2)^3) - 0.625*((x/2)^4) + 0.875*((x/2)^5)
+     ut=t*(1/sqrt(2))
+     ut2=ut*2^9
+
+     which gives:
+
+     in  = 73632
+     in2 = 1206386688
+     y   = 0.56176757812500
+     x   = -0.43823242187500
+     t   = 0.74973506527313
+     ut  = 0.53014274874797
+     ut2 = 2.714330873589594e+002
+
+     or:
+
+     in=73632
+     in2=73632*2^14
+     y=in2/2
+     x=y-2^30
+     x_half=x/2^31
+     t = 1 + (x_half) - 0.5*((x_half)^2) + 0.5*((x_half)^3) - 0.625*((x_half)^4)
+         + 0.875*((x_half)^5)
+     ut=t*(1/sqrt(2))
+     ut2=ut*2^9
+
+     which gives:
+
+     in  = 73632
+     in2 = 1206386688
+     y   = 603193344
+     x   = -470548480
+     x_half =  -0.21911621093750
+     t   = 0.74973506527313
+     ut  = 0.53014274874797
+     ut2 = 2.714330873589594e+002
+
+     */
+
+    int16_t x_norm, nshift, t16, sh;
+    int32_t A;
+
+    int16_t k_sqrt_2 = 23170; // 1/sqrt2 (==5a82)
+
+    A = value;
+
+    // The convention in this function is to calculate sqrt(abs(A)). Negate the
+    // input if it is negative.
+    if (A < 0) {
+        if (A == WEBRTC_SPL_WORD32_MIN) {
+            // This number cannot be held in an int32_t after negating.
+            // Map it to the maximum positive value.
+            A = WEBRTC_SPL_WORD32_MAX;
+        } else {
+            A = -A;
+        }
+    } else if (A == 0) {
+        return 0;  // sqrt(0) = 0
+    }
+
+    sh = WebRtcSpl_NormW32(A); // # shifts to normalize A
+    A = WEBRTC_SPL_LSHIFT_W32(A, sh); // Normalize A
+    if (A < (WEBRTC_SPL_WORD32_MAX - 32767))
+    {
+        A = A + ((int32_t)32768); // Round off bit
+    } else
+    {
+        A = WEBRTC_SPL_WORD32_MAX;
+    }
+
+    x_norm = (int16_t)(A >> 16);  // x_norm = AH
+
+    nshift = (sh / 2);
+    RTC_DCHECK_GE(nshift, 0);
+
+    A = (int32_t)WEBRTC_SPL_LSHIFT_W32((int32_t)x_norm, 16);
+    A = WEBRTC_SPL_ABS_W32(A); // A = abs(x_norm<<16)
+    A = WebRtcSpl_SqrtLocal(A); // A = sqrt(A)
+
+    if (2 * nshift == sh) {
+        // Even shift value case
+
+        t16 = (int16_t)(A >> 16);  // t16 = AH
+
+        A = k_sqrt_2 * t16 * 2;  // A = 1/sqrt(2)*t16
+        A = A + ((int32_t)32768); // Round off
+        A = A & ((int32_t)0x7fff0000); // Round off
+
+        A >>= 15;  // A = A>>16
+
+    } else
+    {
+        A >>= 16;  // A = A>>16
+    }
+
+    A = A & ((int32_t)0x0000ffff);
+    A >>= nshift;  // De-normalize the result.
+
+    return A;
+}
--- a/runtime/engine/asr/server/brpc/paraformerCPP/webrtc/common_audio/signal_processing/vector_scaling_operations.c
+++ b/runtime/engine/asr/server/brpc/paraformerCPP/webrtc/common_audio/signal_processing/vector_scaling_operations.c
@ -0,0 +1,165 @@
+/*
+ *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+/*
+ * This file contains implementations of the functions
+ * WebRtcSpl_VectorBitShiftW16()
+ * WebRtcSpl_VectorBitShiftW32()
+ * WebRtcSpl_VectorBitShiftW32ToW16()
+ * WebRtcSpl_ScaleVector()
+ * WebRtcSpl_ScaleVectorWithSat()
+ * WebRtcSpl_ScaleAndAddVectors()
+ * WebRtcSpl_ScaleAndAddVectorsWithRoundC()
+ */
+
+#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
+
+void WebRtcSpl_VectorBitShiftW16(int16_t *res, size_t length,
+                                 const int16_t *in, int16_t right_shifts)
+{
+    size_t i;
+
+    if (right_shifts > 0)
+    {
+        for (i = length; i > 0; i--)
+        {
+            (*res++) = ((*in++) >> right_shifts);
+        }
+    } else
+    {
+        for (i = length; i > 0; i--)
+        {
+            (*res++) = ((*in++) * (1 << (-right_shifts)));
+        }
+    }
+}
+
+void WebRtcSpl_VectorBitShiftW32(int32_t *out_vector,
+                                 size_t vector_length,
+                                 const int32_t *in_vector,
+                                 int16_t right_shifts)
+{
+    size_t i;
+
+    if (right_shifts > 0)
+    {
+        for (i = vector_length; i > 0; i--)
+        {
+            (*out_vector++) = ((*in_vector++) >> right_shifts);
+        }
+    } else
+    {
+        for (i = vector_length; i > 0; i--)
+        {
+            (*out_vector++) = ((*in_vector++) << (-right_shifts));
+        }
+    }
+}
+
+void WebRtcSpl_VectorBitShiftW32ToW16(int16_t* out, size_t length,
+                                      const int32_t* in, int right_shifts) {
+  size_t i;
+  int32_t tmp_w32;
+
+  if (right_shifts >= 0) {
+    for (i = length; i > 0; i--) {
+      tmp_w32 = (*in++) >> right_shifts;
+      (*out++) = WebRtcSpl_SatW32ToW16(tmp_w32);
+    }
+  } else {
+    int left_shifts = -right_shifts;
+    for (i = length; i > 0; i--) {
+      tmp_w32 = (*in++) << left_shifts;
+      (*out++) = WebRtcSpl_SatW32ToW16(tmp_w32);
+    }
+  }
+}
+
+void WebRtcSpl_ScaleVector(const int16_t *in_vector, int16_t *out_vector,
+                           int16_t gain, size_t in_vector_length,
+                           int16_t right_shifts)
+{
+    // Performs vector operation: out_vector = (gain*in_vector)>>right_shifts
+    size_t i;
+    const int16_t *inptr;
+    int16_t *outptr;
+
+    inptr = in_vector;
+    outptr = out_vector;
+
+    for (i = 0; i < in_vector_length; i++)
+    {
+      *outptr++ = (int16_t)((*inptr++ * gain) >> right_shifts);
+    }
+}
+
+void WebRtcSpl_ScaleVectorWithSat(const int16_t *in_vector, int16_t *out_vector,
+                                 int16_t gain, size_t in_vector_length,
+                                 int16_t right_shifts)
+{
+    // Performs vector operation: out_vector = (gain*in_vector)>>right_shifts
+    size_t i;
+    const int16_t *inptr;
+    int16_t *outptr;
+
+    inptr = in_vector;
+    outptr = out_vector;
+
+    for (i = 0; i < in_vector_length; i++) {
+      *outptr++ = WebRtcSpl_SatW32ToW16((*inptr++ * gain) >> right_shifts);
+    }
+}
+
+void WebRtcSpl_ScaleAndAddVectors(const int16_t *in1, int16_t gain1, int shift1,
+                                  const int16_t *in2, int16_t gain2, int shift2,
+                                  int16_t *out, size_t vector_length)
+{
+    // Performs vector operation: out = (gain1*in1)>>shift1 + (gain2*in2)>>shift2
+    size_t i;
+    const int16_t *in1ptr;
+    const int16_t *in2ptr;
+    int16_t *outptr;
+
+    in1ptr = in1;
+    in2ptr = in2;
+    outptr = out;
+
+    for (i = 0; i < vector_length; i++)
+    {
+      *outptr++ = (int16_t)((gain1 * *in1ptr++) >> shift1) +
+          (int16_t)((gain2 * *in2ptr++) >> shift2);
+    }
+}
+
+// C version of WebRtcSpl_ScaleAndAddVectorsWithRound() for generic platforms.
+int WebRtcSpl_ScaleAndAddVectorsWithRoundC(const int16_t* in_vector1,
+                                           int16_t in_vector1_scale,
+                                           const int16_t* in_vector2,
+                                           int16_t in_vector2_scale,
+                                           int right_shifts,
+                                           int16_t* out_vector,
+                                           size_t length) {
+  size_t i = 0;
+  int round_value = (1 << right_shifts) >> 1;
+
+  if (in_vector1 == NULL || in_vector2 == NULL || out_vector == NULL ||
+      length == 0 || right_shifts < 0) {
+    return -1;
+  }
+
+  for (i = 0; i < length; i++) {
+    out_vector[i] = (int16_t)((
+        in_vector1[i] * in_vector1_scale + in_vector2[i] * in_vector2_scale +
+        round_value) >> right_shifts);
+  }
+
+  return 0;
+}
--- a/runtime/engine/asr/server/brpc/paraformerCPP/webrtc/common_audio/third_party/spl_sqrt_floor/spl_sqrt_floor.c
+++ b/runtime/engine/asr/server/brpc/paraformerCPP/webrtc/common_audio/third_party/spl_sqrt_floor/spl_sqrt_floor.c
@ -0,0 +1,77 @@
+/*
+ * Written by Wilco Dijkstra, 1996. The following email exchange establishes the
+ * license.
+ *
+ * From: Wilco Dijkstra <Wilco.Dijkstra@ntlworld.com>
+ * Date: Fri, Jun 24, 2011 at 3:20 AM
+ * Subject: Re: sqrt routine
+ * To: Kevin Ma <kma@google.com>
+ * Hi Kevin,
+ * Thanks for asking. Those routines are public domain (originally posted to
+ * comp.sys.arm a long time ago), so you can use them freely for any purpose.
+ * Cheers,
+ * Wilco
+ *
+ * ----- Original Message -----
+ * From: "Kevin Ma" <kma@google.com>
+ * To: <Wilco.Dijkstra@ntlworld.com>
+ * Sent: Thursday, June 23, 2011 11:44 PM
+ * Subject: Fwd: sqrt routine
+ * Hi Wilco,
+ * I saw your sqrt routine from several web sites, including
+ * http://www.finesse.demon.co.uk/steven/sqrt.html.
+ * Just wonder if there's any copyright information with your Successive
+ * approximation routines, or if I can freely use it for any purpose.
+ * Thanks.
+ * Kevin
+ */
+
+// Minor modifications in code style for WebRTC, 2012.
+
+#include "webrtc/common_audio/third_party/spl_sqrt_floor/spl_sqrt_floor.h"
+
+/*
+ * Algorithm:
+ * Successive approximation of the equation (root + delta) ^ 2 = N
+ * until delta < 1. If delta < 1 we have the integer part of SQRT (N).
+ * Use delta = 2^i for i = 15 .. 0.
+ *
+ * Output precision is 16 bits. Note for large input values (close to
+ * 0x7FFFFFFF), bit 15 (the highest bit of the low 16-bit half word)
+ * contains the MSB information (a non-sign value). Do with caution
+ * if you need to cast the output to int16_t type.
+ *
+ * If the input value is negative, it returns 0.
+ */
+
+#define WEBRTC_SPL_SQRT_ITER(N)                 \
+  try1 = root + (1 << (N));                     \
+  if (value >= try1 << (N))                     \
+  {                                             \
+    value -= try1 << (N);                       \
+    root |= 2 << (N);                           \
+  }
+
+int32_t WebRtcSpl_SqrtFloor(int32_t value)
+{
+  int32_t root = 0, try1;
+
+  WEBRTC_SPL_SQRT_ITER (15);
+  WEBRTC_SPL_SQRT_ITER (14);
+  WEBRTC_SPL_SQRT_ITER (13);
+  WEBRTC_SPL_SQRT_ITER (12);
+  WEBRTC_SPL_SQRT_ITER (11);
+  WEBRTC_SPL_SQRT_ITER (10);
+  WEBRTC_SPL_SQRT_ITER ( 9);
+  WEBRTC_SPL_SQRT_ITER ( 8);
+  WEBRTC_SPL_SQRT_ITER ( 7);
+  WEBRTC_SPL_SQRT_ITER ( 6);
+  WEBRTC_SPL_SQRT_ITER ( 5);
+  WEBRTC_SPL_SQRT_ITER ( 4);
+  WEBRTC_SPL_SQRT_ITER ( 3);
+  WEBRTC_SPL_SQRT_ITER ( 2);
+  WEBRTC_SPL_SQRT_ITER ( 1);
+  WEBRTC_SPL_SQRT_ITER ( 0);
+
+  return root >> 1;
+}
--- a/runtime/engine/asr/server/brpc/paraformerCPP/webrtc/common_audio/third_party/spl_sqrt_floor/spl_sqrt_floor.h
+++ b/runtime/engine/asr/server/brpc/paraformerCPP/webrtc/common_audio/third_party/spl_sqrt_floor/spl_sqrt_floor.h
@ -0,0 +1,29 @@
+/*
+ *  Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <stdint.h>
+
+//
+// WebRtcSpl_SqrtFloor(...)
+//
+// Returns the square root of the input value |value|. The precision of this
+// function is rounding down integer precision, i.e., sqrt(8) gives 2 as answer.
+// If |value| is a negative number then 0 is returned.
+//
+// Algorithm:
+//
+// An iterative 4 cylce/bit routine
+//
+// Input:
+//      - value     : Value to calculate sqrt of
+//
+// Return value     : Result of the sqrt calculation
+//
+int32_t WebRtcSpl_SqrtFloor(int32_t value);
--- a/runtime/engine/asr/server/brpc/paraformerCPP/webrtc/common_audio/vad/include/webrtc_vad.h
+++ b/runtime/engine/asr/server/brpc/paraformerCPP/webrtc/common_audio/vad/include/webrtc_vad.h
@ -0,0 +1,87 @@
+/*
+ *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+/*
+ * This header file includes the VAD API calls. Specific function calls are
+ * given below.
+ */
+
+#ifndef COMMON_AUDIO_VAD_INCLUDE_WEBRTC_VAD_H_  // NOLINT
+#define COMMON_AUDIO_VAD_INCLUDE_WEBRTC_VAD_H_
+
+#include <stddef.h>
+#include <stdint.h>
+
+typedef struct WebRtcVadInst VadInst;
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+// Creates an instance to the VAD structure.
+VadInst* WebRtcVad_Create(void);
+
+// Frees the dynamic memory of a specified VAD instance.
+//
+// - handle [i] : Pointer to VAD instance that should be freed.
+void WebRtcVad_Free(VadInst* handle);
+
+// Initializes a VAD instance.
+//
+// - handle [i/o] : Instance that should be initialized.
+//
+// returns        : 0 - (OK),
+//                 -1 - (null pointer or Default mode could not be set).
+int WebRtcVad_Init(VadInst* handle);
+
+// Sets the VAD operating mode. A more aggressive (higher mode) VAD is more
+// restrictive in reporting speech. Put in other words the probability of being
+// speech when the VAD returns 1 is increased with increasing mode. As a
+// consequence also the missed detection rate goes up.
+//
+// - handle [i/o] : VAD instance.
+// - mode   [i]   : Aggressiveness mode (0, 1, 2, or 3).
+//
+// returns        : 0 - (OK),
+//                 -1 - (null pointer, mode could not be set or the VAD instance
+//                       has not been initialized).
+int WebRtcVad_set_mode(VadInst* handle, int mode);
+
+// Calculates a VAD decision for the |audio_frame|. For valid sampling rates
+// frame lengths, see the description of WebRtcVad_ValidRatesAndFrameLengths().
+//
+// - handle       [i/o] : VAD Instance. Needs to be initialized by
+//                        WebRtcVad_Init() before call.
+// - fs           [i]   : Sampling frequency (Hz): 8000, 16000, or 32000
+// - audio_frame  [i]   : Audio frame buffer.
+// - frame_length [i]   : Length of audio frame buffer in number of samples.
+//
+// returns              : 1 - (Active Voice),
+//                        0 - (Non-active Voice),
+//                       -1 - (Error)
+int WebRtcVad_Process(VadInst* handle,
+                      int fs,
+                      const int16_t* audio_frame,
+                      size_t frame_length);
+
+// Checks for valid combinations of |rate| and |frame_length|. We support 10,
+// 20 and 30 ms frames and the rates 8000, 16000 and 32000 Hz.
+//
+// - rate         [i] : Sampling frequency (Hz).
+// - frame_length [i] : Speech frame buffer length in number of samples.
+//
+// returns            : 0 - (valid combination), -1 - (invalid combination)
+int WebRtcVad_ValidRateAndFrameLength(int rate, size_t frame_length);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif  // COMMON_AUDIO_VAD_INCLUDE_WEBRTC_VAD_H_  // NOLINT
--- a/runtime/engine/asr/server/brpc/paraformerCPP/webrtc/common_audio/vad/vad_core.c
+++ b/runtime/engine/asr/server/brpc/paraformerCPP/webrtc/common_audio/vad/vad_core.c
@ -0,0 +1,685 @@
+/*
+ *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "webrtc/common_audio/vad/vad_core.h"
+
+#include "webrtc/rtc_base/sanitizer.h"
+#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
+#include "webrtc/common_audio/vad/vad_filterbank.h"
+#include "webrtc/common_audio/vad/vad_gmm.h"
+#include "webrtc/common_audio/vad/vad_sp.h"
+
+// Spectrum Weighting
+static const int16_t kSpectrumWeight[kNumChannels] = { 6, 8, 10, 12, 14, 16 };
+static const int16_t kNoiseUpdateConst = 655; // Q15
+static const int16_t kSpeechUpdateConst = 6554; // Q15
+static const int16_t kBackEta = 154; // Q8
+// Minimum difference between the two models, Q5
+static const int16_t kMinimumDifference[kNumChannels] = {
+    544, 544, 576, 576, 576, 576 };
+// Upper limit of mean value for speech model, Q7
+static const int16_t kMaximumSpeech[kNumChannels] = {
+    11392, 11392, 11520, 11520, 11520, 11520 };
+// Minimum value for mean value
+static const int16_t kMinimumMean[kNumGaussians] = { 640, 768 };
+// Upper limit of mean value for noise model, Q7
+static const int16_t kMaximumNoise[kNumChannels] = {
+    9216, 9088, 8960, 8832, 8704, 8576 };
+// Start values for the Gaussian models, Q7
+// Weights for the two Gaussians for the six channels (noise)
+static const int16_t kNoiseDataWeights[kTableSize] = {
+    34, 62, 72, 66, 53, 25, 94, 66, 56, 62, 75, 103 };
+// Weights for the two Gaussians for the six channels (speech)
+static const int16_t kSpeechDataWeights[kTableSize] = {
+    48, 82, 45, 87, 50, 47, 80, 46, 83, 41, 78, 81 };
+// Means for the two Gaussians for the six channels (noise)
+static const int16_t kNoiseDataMeans[kTableSize] = {
+    6738, 4892, 7065, 6715, 6771, 3369, 7646, 3863, 7820, 7266, 5020, 4362 };
+// Means for the two Gaussians for the six channels (speech)
+static const int16_t kSpeechDataMeans[kTableSize] = {
+    8306, 10085, 10078, 11823, 11843, 6309, 9473, 9571, 10879, 7581, 8180, 7483
+};
+// Stds for the two Gaussians for the six channels (noise)
+static const int16_t kNoiseDataStds[kTableSize] = {
+    378, 1064, 493, 582, 688, 593, 474, 697, 475, 688, 421, 455 };
+// Stds for the two Gaussians for the six channels (speech)
+static const int16_t kSpeechDataStds[kTableSize] = {
+    555, 505, 567, 524, 585, 1231, 509, 828, 492, 1540, 1079, 850 };
+
+// Constants used in GmmProbability().
+//
+// Maximum number of counted speech (VAD = 1) frames in a row.
+static const int16_t kMaxSpeechFrames = 6;
+// Minimum standard deviation for both speech and noise.
+static const int16_t kMinStd = 384;
+
+// Constants in WebRtcVad_InitCore().
+// Default aggressiveness mode.
+static const short kDefaultMode = 0;
+static const int kInitCheck = 42;
+
+// Constants used in WebRtcVad_set_mode_core().
+//
+// Thresholds for different frame lengths (10 ms, 20 ms and 30 ms).
+//
+// Mode 0, Quality.
+static const int16_t kOverHangMax1Q[3] = { 8, 4, 3 };
+static const int16_t kOverHangMax2Q[3] = { 14, 7, 5 };
+static const int16_t kLocalThresholdQ[3] = { 24, 21, 24 };
+static const int16_t kGlobalThresholdQ[3] = { 57, 48, 57 };
+// Mode 1, Low bitrate.
+static const int16_t kOverHangMax1LBR[3] = { 8, 4, 3 };
+static const int16_t kOverHangMax2LBR[3] = { 14, 7, 5 };
+static const int16_t kLocalThresholdLBR[3] = { 37, 32, 37 };
+static const int16_t kGlobalThresholdLBR[3] = { 100, 80, 100 };
+// Mode 2, Aggressive.
+static const int16_t kOverHangMax1AGG[3] = { 6, 3, 2 };
+static const int16_t kOverHangMax2AGG[3] = { 9, 5, 3 };
+static const int16_t kLocalThresholdAGG[3] = { 82, 78, 82 };
+static const int16_t kGlobalThresholdAGG[3] = { 285, 260, 285 };
+// Mode 3, Very aggressive.
+static const int16_t kOverHangMax1VAG[3] = { 6, 3, 2 };
+static const int16_t kOverHangMax2VAG[3] = { 9, 5, 3 };
+static const int16_t kLocalThresholdVAG[3] = { 94, 94, 94 };
+static const int16_t kGlobalThresholdVAG[3] = { 1100, 1050, 1100 };
+
+// Calculates the weighted average w.r.t. number of Gaussians. The |data| are
+// updated with an |offset| before averaging.
+//
+// - data     [i/o] : Data to average.
+// - offset   [i]   : An offset added to |data|.
+// - weights  [i]   : Weights used for averaging.
+//
+// returns          : The weighted average.
+static int32_t WeightedAverage(int16_t* data, int16_t offset,
+                               const int16_t* weights) {
+  int k;
+  int32_t weighted_average = 0;
+
+  for (k = 0; k < kNumGaussians; k++) {
+    data[k * kNumChannels] += offset;
+    weighted_average += data[k * kNumChannels] * weights[k * kNumChannels];
+  }
+  return weighted_average;
+}
+
+// An s16 x s32 -> s32 multiplication that's allowed to overflow. (It's still
+// undefined behavior, so not a good idea; this just makes UBSan ignore the
+// violation, so that our old code can continue to do what it's always been
+// doing.)
+static inline int32_t RTC_NO_SANITIZE("signed-integer-overflow")
+    OverflowingMulS16ByS32ToS32(int16_t a, int32_t b) {
+  return a * b;
+}
+
+// Calculates the probabilities for both speech and background noise using
+// Gaussian Mixture Models (GMM). A hypothesis-test is performed to decide which
+// type of signal is most probable.
+//
+// - self           [i/o] : Pointer to VAD instance
+// - features       [i]   : Feature vector of length |kNumChannels|
+//                          = log10(energy in frequency band)
+// - total_power    [i]   : Total power in audio frame.
+// - frame_length   [i]   : Number of input samples
+//
+// - returns              : the VAD decision (0 - noise, 1 - speech).
+static int16_t GmmProbability(VadInstT* self, int16_t* features,
+                              int16_t total_power, size_t frame_length) {
+  int channel, k;
+  int16_t feature_minimum;
+  int16_t h0, h1;
+  int16_t log_likelihood_ratio;
+  int16_t vadflag = 0;
+  int16_t shifts_h0, shifts_h1;
+  int16_t tmp_s16, tmp1_s16, tmp2_s16;
+  int16_t diff;
+  int gaussian;
+  int16_t nmk, nmk2, nmk3, smk, smk2, nsk, ssk;
+  int16_t delt, ndelt;
+  int16_t maxspe, maxmu;
+  int16_t deltaN[kTableSize], deltaS[kTableSize];
+  int16_t ngprvec[kTableSize] = { 0 };  // Conditional probability = 0.
+  int16_t sgprvec[kTableSize] = { 0 };  // Conditional probability = 0.
+  int32_t h0_test, h1_test;
+  int32_t tmp1_s32, tmp2_s32;
+  int32_t sum_log_likelihood_ratios = 0;
+  int32_t noise_global_mean, speech_global_mean;
+  int32_t noise_probability[kNumGaussians], speech_probability[kNumGaussians];
+  int16_t overhead1, overhead2, individualTest, totalTest;
+
+  // Set various thresholds based on frame lengths (80, 160 or 240 samples).
+  if (frame_length == 80) {
+    overhead1 = self->over_hang_max_1[0];
+    overhead2 = self->over_hang_max_2[0];
+    individualTest = self->individual[0];
+    totalTest = self->total[0];
+  } else if (frame_length == 160) {
+    overhead1 = self->over_hang_max_1[1];
+    overhead2 = self->over_hang_max_2[1];
+    individualTest = self->individual[1];
+    totalTest = self->total[1];
+  } else {
+    overhead1 = self->over_hang_max_1[2];
+    overhead2 = self->over_hang_max_2[2];
+    individualTest = self->individual[2];
+    totalTest = self->total[2];
+  }
+
+  if (total_power > kMinEnergy) {
+    // The signal power of current frame is large enough for processing. The
+    // processing consists of two parts:
+    // 1) Calculating the likelihood of speech and thereby a VAD decision.
+    // 2) Updating the underlying model, w.r.t., the decision made.
+
+    // The detection scheme is an LRT with hypothesis
+    // H0: Noise
+    // H1: Speech
+    //
+    // We combine a global LRT with local tests, for each frequency sub-band,
+    // here defined as |channel|.
+    for (channel = 0; channel < kNumChannels; channel++) {
+      // For each channel we model the probability with a GMM consisting of
+      // |kNumGaussians|, with different means and standard deviations depending
+      // on H0 or H1.
+      h0_test = 0;
+      h1_test = 0;
+      for (k = 0; k < kNumGaussians; k++) {
+        gaussian = channel + k * kNumChannels;
+        // Probability under H0, that is, probability of frame being noise.
+        // Value given in Q27 = Q7 * Q20.
+        tmp1_s32 = WebRtcVad_GaussianProbability(features[channel],
+                                                 self->noise_means[gaussian],
+                                                 self->noise_stds[gaussian],
+                                                 &deltaN[gaussian]);
+        noise_probability[k] = kNoiseDataWeights[gaussian] * tmp1_s32;
+        h0_test += noise_probability[k];  // Q27
+
+        // Probability under H1, that is, probability of frame being speech.
+        // Value given in Q27 = Q7 * Q20.
+        tmp1_s32 = WebRtcVad_GaussianProbability(features[channel],
+                                                 self->speech_means[gaussian],
+                                                 self->speech_stds[gaussian],
+                                                 &deltaS[gaussian]);
+        speech_probability[k] = kSpeechDataWeights[gaussian] * tmp1_s32;
+        h1_test += speech_probability[k];  // Q27
+      }
+
+      // Calculate the log likelihood ratio: log2(Pr{X|H1} / Pr{X|H1}).
+      // Approximation:
+      // log2(Pr{X|H1} / Pr{X|H1}) = log2(Pr{X|H1}*2^Q) - log2(Pr{X|H1}*2^Q)
+      //                           = log2(h1_test) - log2(h0_test)
+      //                           = log2(2^(31-shifts_h1)*(1+b1))
+      //                             - log2(2^(31-shifts_h0)*(1+b0))
+      //                           = shifts_h0 - shifts_h1
+      //                             + log2(1+b1) - log2(1+b0)
+      //                          ~= shifts_h0 - shifts_h1
+      //
+      // Note that b0 and b1 are values less than 1, hence, 0 <= log2(1+b0) < 1.
+      // Further, b0 and b1 are independent and on the average the two terms
+      // cancel.
+      shifts_h0 = WebRtcSpl_NormW32(h0_test);
+      shifts_h1 = WebRtcSpl_NormW32(h1_test);
+      if (h0_test == 0) {
+        shifts_h0 = 31;
+      }
+      if (h1_test == 0) {
+        shifts_h1 = 31;
+      }
+      log_likelihood_ratio = shifts_h0 - shifts_h1;
+
+      // Update |sum_log_likelihood_ratios| with spectrum weighting. This is
+      // used for the global VAD decision.
+      sum_log_likelihood_ratios +=
+          (int32_t) (log_likelihood_ratio * kSpectrumWeight[channel]);
+
+      // Local VAD decision.
+      if ((log_likelihood_ratio * 4) > individualTest) {
+        vadflag = 1;
+      }
+
+      // TODO(bjornv): The conditional probabilities below are applied on the
+      // hard coded number of Gaussians set to two. Find a way to generalize.
+      // Calculate local noise probabilities used later when updating the GMM.
+      h0 = (int16_t) (h0_test >> 12);  // Q15
+      if (h0 > 0) {
+        // High probability of noise. Assign conditional probabilities for each
+        // Gaussian in the GMM.
+        tmp1_s32 = (noise_probability[0] & 0xFFFFF000) << 2;  // Q29
+        ngprvec[channel] = (int16_t) WebRtcSpl_DivW32W16(tmp1_s32, h0);  // Q14
+        ngprvec[channel + kNumChannels] = 16384 - ngprvec[channel];
+      } else {
+        // Low noise probability. Assign conditional probability 1 to the first
+        // Gaussian and 0 to the rest (which is already set at initialization).
+        ngprvec[channel] = 16384;
+      }
+
+      // Calculate local speech probabilities used later when updating the GMM.
+      h1 = (int16_t) (h1_test >> 12);  // Q15
+      if (h1 > 0) {
+        // High probability of speech. Assign conditional probabilities for each
+        // Gaussian in the GMM. Otherwise use the initialized values, i.e., 0.
+        tmp1_s32 = (speech_probability[0] & 0xFFFFF000) << 2;  // Q29
+        sgprvec[channel] = (int16_t) WebRtcSpl_DivW32W16(tmp1_s32, h1);  // Q14
+        sgprvec[channel + kNumChannels] = 16384 - sgprvec[channel];
+      }
+    }
+
+    // Make a global VAD decision.
+    vadflag |= (sum_log_likelihood_ratios >= totalTest);
+
+    // Update the model parameters.
+    maxspe = 12800;
+    for (channel = 0; channel < kNumChannels; channel++) {
+
+      // Get minimum value in past which is used for long term correction in Q4.
+      feature_minimum = WebRtcVad_FindMinimum(self, features[channel], channel);
+
+      // Compute the "global" mean, that is the sum of the two means weighted.
+      noise_global_mean = WeightedAverage(&self->noise_means[channel], 0,
+                                          &kNoiseDataWeights[channel]);
+      tmp1_s16 = (int16_t) (noise_global_mean >> 6);  // Q8
+
+      for (k = 0; k < kNumGaussians; k++) {
+        gaussian = channel + k * kNumChannels;
+
+        nmk = self->noise_means[gaussian];
+        smk = self->speech_means[gaussian];
+        nsk = self->noise_stds[gaussian];
+        ssk = self->speech_stds[gaussian];
+
+        // Update noise mean vector if the frame consists of noise only.
+        nmk2 = nmk;
+        if (!vadflag) {
+          // deltaN = (x-mu)/sigma^2
+          // ngprvec[k] = |noise_probability[k]| /
+          //   (|noise_probability[0]| + |noise_probability[1]|)
+
+          // (Q14 * Q11 >> 11) = Q14.
+          delt = (int16_t)((ngprvec[gaussian] * deltaN[gaussian]) >> 11);
+          // Q7 + (Q14 * Q15 >> 22) = Q7.
+          nmk2 = nmk + (int16_t)((delt * kNoiseUpdateConst) >> 22);
+        }
+
+        // Long term correction of the noise mean.
+        // Q8 - Q8 = Q8.
+        ndelt = (feature_minimum << 4) - tmp1_s16;
+        // Q7 + (Q8 * Q8) >> 9 = Q7.
+        nmk3 = nmk2 + (int16_t)((ndelt * kBackEta) >> 9);
+
+        // Control that the noise mean does not drift to much.
+        tmp_s16 = (int16_t) ((k + 5) << 7);
+        if (nmk3 < tmp_s16) {
+          nmk3 = tmp_s16;
+        }
+        tmp_s16 = (int16_t) ((72 + k - channel) << 7);
+        if (nmk3 > tmp_s16) {
+          nmk3 = tmp_s16;
+        }
+        self->noise_means[gaussian] = nmk3;
+
+        if (vadflag) {
+          // Update speech mean vector:
+          // |deltaS| = (x-mu)/sigma^2
+          // sgprvec[k] = |speech_probability[k]| /
+          //   (|speech_probability[0]| + |speech_probability[1]|)
+
+          // (Q14 * Q11) >> 11 = Q14.
+          delt = (int16_t)((sgprvec[gaussian] * deltaS[gaussian]) >> 11);
+          // Q14 * Q15 >> 21 = Q8.
+          tmp_s16 = (int16_t)((delt * kSpeechUpdateConst) >> 21);
+          // Q7 + (Q8 >> 1) = Q7. With rounding.
+          smk2 = smk + ((tmp_s16 + 1) >> 1);
+
+          // Control that the speech mean does not drift to much.
+          maxmu = maxspe + 640;
+          if (smk2 < kMinimumMean[k]) {
+            smk2 = kMinimumMean[k];
+          }
+          if (smk2 > maxmu) {
+            smk2 = maxmu;
+          }
+          self->speech_means[gaussian] = smk2;  // Q7.
+
+          // (Q7 >> 3) = Q4. With rounding.
+          tmp_s16 = ((smk + 4) >> 3);
+
+          tmp_s16 = features[channel] - tmp_s16;  // Q4
+          // (Q11 * Q4 >> 3) = Q12.
+          tmp1_s32 = (deltaS[gaussian] * tmp_s16) >> 3;
+          tmp2_s32 = tmp1_s32 - 4096;
+          tmp_s16 = sgprvec[gaussian] >> 2;
+          // (Q14 >> 2) * Q12 = Q24.
+          tmp1_s32 = tmp_s16 * tmp2_s32;
+
+          tmp2_s32 = tmp1_s32 >> 4;  // Q20
+
+          // 0.1 * Q20 / Q7 = Q13.
+          if (tmp2_s32 > 0) {
+            tmp_s16 = (int16_t) WebRtcSpl_DivW32W16(tmp2_s32, ssk * 10);
+          } else {
+            tmp_s16 = (int16_t) WebRtcSpl_DivW32W16(-tmp2_s32, ssk * 10);
+            tmp_s16 = -tmp_s16;
+          }
+          // Divide by 4 giving an update factor of 0.025 (= 0.1 / 4).
+          // Note that division by 4 equals shift by 2, hence,
+          // (Q13 >> 8) = (Q13 >> 6) / 4 = Q7.
+          tmp_s16 += 128;  // Rounding.
+          ssk += (tmp_s16 >> 8);
+          if (ssk < kMinStd) {
+            ssk = kMinStd;
+          }
+          self->speech_stds[gaussian] = ssk;
+        } else {
+          // Update GMM variance vectors.
+          // deltaN * (features[channel] - nmk) - 1
+          // Q4 - (Q7 >> 3) = Q4.
+          tmp_s16 = features[channel] - (nmk >> 3);
+          // (Q11 * Q4 >> 3) = Q12.
+          tmp1_s32 = (deltaN[gaussian] * tmp_s16) >> 3;
+          tmp1_s32 -= 4096;
+
+          // (Q14 >> 2) * Q12 = Q24.
+          tmp_s16 = (ngprvec[gaussian] + 2) >> 2;
+          tmp2_s32 = OverflowingMulS16ByS32ToS32(tmp_s16, tmp1_s32);
+          // Q20  * approx 0.001 (2^-10=0.0009766), hence,
+          // (Q24 >> 14) = (Q24 >> 4) / 2^10 = Q20.
+          tmp1_s32 = tmp2_s32 >> 14;
+
+          // Q20 / Q7 = Q13.
+          if (tmp1_s32 > 0) {
+            tmp_s16 = (int16_t) WebRtcSpl_DivW32W16(tmp1_s32, nsk);
+          } else {
+            tmp_s16 = (int16_t) WebRtcSpl_DivW32W16(-tmp1_s32, nsk);
+            tmp_s16 = -tmp_s16;
+          }
+          tmp_s16 += 32;  // Rounding
+          nsk += tmp_s16 >> 6;  // Q13 >> 6 = Q7.
+          if (nsk < kMinStd) {
+            nsk = kMinStd;
+          }
+          self->noise_stds[gaussian] = nsk;
+        }
+      }
+
+      // Separate models if they are too close.
+      // |noise_global_mean| in Q14 (= Q7 * Q7).
+      noise_global_mean = WeightedAverage(&self->noise_means[channel], 0,
+                                          &kNoiseDataWeights[channel]);
+
+      // |speech_global_mean| in Q14 (= Q7 * Q7).
+      speech_global_mean = WeightedAverage(&self->speech_means[channel], 0,
+                                           &kSpeechDataWeights[channel]);
+
+      // |diff| = "global" speech mean - "global" noise mean.
+      // (Q14 >> 9) - (Q14 >> 9) = Q5.
+      diff = (int16_t) (speech_global_mean >> 9) -
+          (int16_t) (noise_global_mean >> 9);
+      if (diff < kMinimumDifference[channel]) {
+        tmp_s16 = kMinimumDifference[channel] - diff;
+
+        // |tmp1_s16| = ~0.8 * (kMinimumDifference - diff) in Q7.
+        // |tmp2_s16| = ~0.2 * (kMinimumDifference - diff) in Q7.
+        tmp1_s16 = (int16_t)((13 * tmp_s16) >> 2);
+        tmp2_s16 = (int16_t)((3 * tmp_s16) >> 2);
+
+        // Move Gaussian means for speech model by |tmp1_s16| and update
+        // |speech_global_mean|. Note that |self->speech_means[channel]| is
+        // changed after the call.
+        speech_global_mean = WeightedAverage(&self->speech_means[channel],
+                                             tmp1_s16,
+                                             &kSpeechDataWeights[channel]);
+
+        // Move Gaussian means for noise model by -|tmp2_s16| and update
+        // |noise_global_mean|. Note that |self->noise_means[channel]| is
+        // changed after the call.
+        noise_global_mean = WeightedAverage(&self->noise_means[channel],
+                                            -tmp2_s16,
+                                            &kNoiseDataWeights[channel]);
+      }
+
+      // Control that the speech & noise means do not drift to much.
+      maxspe = kMaximumSpeech[channel];
+      tmp2_s16 = (int16_t) (speech_global_mean >> 7);
+      if (tmp2_s16 > maxspe) {
+        // Upper limit of speech model.
+        tmp2_s16 -= maxspe;
+
+        for (k = 0; k < kNumGaussians; k++) {
+          self->speech_means[channel + k * kNumChannels] -= tmp2_s16;
+        }
+      }
+
+      tmp2_s16 = (int16_t) (noise_global_mean >> 7);
+      if (tmp2_s16 > kMaximumNoise[channel]) {
+        tmp2_s16 -= kMaximumNoise[channel];
+
+        for (k = 0; k < kNumGaussians; k++) {
+          self->noise_means[channel + k * kNumChannels] -= tmp2_s16;
+        }
+      }
+    }
+    self->frame_counter++;
+  }
+
+  // Smooth with respect to transition hysteresis.
+  if (!vadflag) {
+    if (self->over_hang > 0) {
+      vadflag = 2 + self->over_hang;
+      self->over_hang--;
+    }
+    self->num_of_speech = 0;
+  } else {
+    self->num_of_speech++;
+    if (self->num_of_speech > kMaxSpeechFrames) {
+      self->num_of_speech = kMaxSpeechFrames;
+      self->over_hang = overhead2;
+    } else {
+      self->over_hang = overhead1;
+    }
+  }
+  return vadflag;
+}
+
+// Initialize the VAD. Set aggressiveness mode to default value.
+int WebRtcVad_InitCore(VadInstT* self) {
+  int i;
+
+  if (self == NULL) {
+    return -1;
+  }
+
+  // Initialization of general struct variables.
+  self->vad = 1;  // Speech active (=1).
+  self->frame_counter = 0;
+  self->over_hang = 0;
+  self->num_of_speech = 0;
+
+  // Initialization of downsampling filter state.
+  memset(self->downsampling_filter_states, 0,
+         sizeof(self->downsampling_filter_states));
+
+  // Initialization of 48 to 8 kHz downsampling.
+  WebRtcSpl_ResetResample48khzTo8khz(&self->state_48_to_8);
+
+  // Read initial PDF parameters.
+  for (i = 0; i < kTableSize; i++) {
+    self->noise_means[i] = kNoiseDataMeans[i];
+    self->speech_means[i] = kSpeechDataMeans[i];
+    self->noise_stds[i] = kNoiseDataStds[i];
+    self->speech_stds[i] = kSpeechDataStds[i];
+  }
+
+  // Initialize Index and Minimum value vectors.
+  for (i = 0; i < 16 * kNumChannels; i++) {
+    self->low_value_vector[i] = 10000;
+    self->index_vector[i] = 0;
+  }
+
+  // Initialize splitting filter states.
+  memset(self->upper_state, 0, sizeof(self->upper_state));
+  memset(self->lower_state, 0, sizeof(self->lower_state));
+
+  // Initialize high pass filter states.
+  memset(self->hp_filter_state, 0, sizeof(self->hp_filter_state));
+
+  // Initialize mean value memory, for WebRtcVad_FindMinimum().
+  for (i = 0; i < kNumChannels; i++) {
+    self->mean_value[i] = 1600;
+  }
+
+  // Set aggressiveness mode to default (=|kDefaultMode|).
+  if (WebRtcVad_set_mode_core(self, kDefaultMode) != 0) {
+    return -1;
+  }
+
+  self->init_flag = kInitCheck;
+
+  return 0;
+}
+
+// Set aggressiveness mode
+int WebRtcVad_set_mode_core(VadInstT* self, int mode) {
+  int return_value = 0;
+
+  switch (mode) {
+    case 0:
+      // Quality mode.
+      memcpy(self->over_hang_max_1, kOverHangMax1Q,
+             sizeof(self->over_hang_max_1));
+      memcpy(self->over_hang_max_2, kOverHangMax2Q,
+             sizeof(self->over_hang_max_2));
+      memcpy(self->individual, kLocalThresholdQ,
+             sizeof(self->individual));
+      memcpy(self->total, kGlobalThresholdQ,
+             sizeof(self->total));
+      break;
+    case 1:
+      // Low bitrate mode.
+      memcpy(self->over_hang_max_1, kOverHangMax1LBR,
+             sizeof(self->over_hang_max_1));
+      memcpy(self->over_hang_max_2, kOverHangMax2LBR,
+             sizeof(self->over_hang_max_2));
+      memcpy(self->individual, kLocalThresholdLBR,
+             sizeof(self->individual));
+      memcpy(self->total, kGlobalThresholdLBR,
+             sizeof(self->total));
+      break;
+    case 2:
+      // Aggressive mode.
+      memcpy(self->over_hang_max_1, kOverHangMax1AGG,
+             sizeof(self->over_hang_max_1));
+      memcpy(self->over_hang_max_2, kOverHangMax2AGG,
+             sizeof(self->over_hang_max_2));
+      memcpy(self->individual, kLocalThresholdAGG,
+             sizeof(self->individual));
+      memcpy(self->total, kGlobalThresholdAGG,
+             sizeof(self->total));
+      break;
+    case 3:
+      // Very aggressive mode.
+      memcpy(self->over_hang_max_1, kOverHangMax1VAG,
+             sizeof(self->over_hang_max_1));
+      memcpy(self->over_hang_max_2, kOverHangMax2VAG,
+             sizeof(self->over_hang_max_2));
+      memcpy(self->individual, kLocalThresholdVAG,
+             sizeof(self->individual));
+      memcpy(self->total, kGlobalThresholdVAG,
+             sizeof(self->total));
+      break;
+    default:
+      return_value = -1;
+      break;
+  }
+
+  return return_value;
+}
+
+// Calculate VAD decision by first extracting feature values and then calculate
+// probability for both speech and background noise.
+
+int WebRtcVad_CalcVad48khz(VadInstT* inst, const int16_t* speech_frame,
+                           size_t frame_length) {
+  int vad;
+  size_t i;
+  int16_t speech_nb[240];  // 30 ms in 8 kHz.
+  // |tmp_mem| is a temporary memory used by resample function, length is
+  // frame length in 10 ms (480 samples) + 256 extra.
+  int32_t tmp_mem[480 + 256] = { 0 };
+  const size_t kFrameLen10ms48khz = 480;
+  const size_t kFrameLen10ms8khz = 80;
+  size_t num_10ms_frames = frame_length / kFrameLen10ms48khz;
+
+  for (i = 0; i < num_10ms_frames; i++) {
+    WebRtcSpl_Resample48khzTo8khz(speech_frame,
+                                  &speech_nb[i * kFrameLen10ms8khz],
+                                  &inst->state_48_to_8,
+                                  tmp_mem);
+  }
+
+  // Do VAD on an 8 kHz signal
+  vad = WebRtcVad_CalcVad8khz(inst, speech_nb, frame_length / 6);
+
+  return vad;
+}
+
+int WebRtcVad_CalcVad32khz(VadInstT* inst, const int16_t* speech_frame,
+                           size_t frame_length)
+{
+    size_t len;
+    int vad;
+    int16_t speechWB[480]; // Downsampled speech frame: 960 samples (30ms in SWB)
+    int16_t speechNB[240]; // Downsampled speech frame: 480 samples (30ms in WB)
+
+
+    // Downsample signal 32->16->8 before doing VAD
+    WebRtcVad_Downsampling(speech_frame, speechWB, &(inst->downsampling_filter_states[2]),
+                           frame_length);
+    len = frame_length / 2;
+
+    WebRtcVad_Downsampling(speechWB, speechNB, inst->downsampling_filter_states, len);
+    len /= 2;
+
+    // Do VAD on an 8 kHz signal
+    vad = WebRtcVad_CalcVad8khz(inst, speechNB, len);
+
+    return vad;
+}
+
+int WebRtcVad_CalcVad16khz(VadInstT* inst, const int16_t* speech_frame,
+                           size_t frame_length)
+{
+    size_t len;
+    int vad;
+    int16_t speechNB[240]; // Downsampled speech frame: 480 samples (30ms in WB)
+
+    // Wideband: Downsample signal before doing VAD
+    WebRtcVad_Downsampling(speech_frame, speechNB, inst->downsampling_filter_states,
+                           frame_length);
+
+    len = frame_length / 2;
+    vad = WebRtcVad_CalcVad8khz(inst, speechNB, len);
+
+    return vad;
+}
+
+int WebRtcVad_CalcVad8khz(VadInstT* inst, const int16_t* speech_frame,
+                          size_t frame_length)
+{
+    int16_t feature_vector[kNumChannels], total_power;
+
+    // Get power in the bands
+    total_power = WebRtcVad_CalculateFeatures(inst, speech_frame, frame_length,
+                                              feature_vector);
+
+    // Make a VAD
+    inst->vad = GmmProbability(inst, feature_vector, total_power, frame_length);
+
+    return inst->vad;
+}
--- a/runtime/engine/asr/server/brpc/paraformerCPP/webrtc/common_audio/vad/vad_core.h
+++ b/runtime/engine/asr/server/brpc/paraformerCPP/webrtc/common_audio/vad/vad_core.h
@ -0,0 +1,114 @@
+/*
+ *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+/*
+ * This header file includes the descriptions of the core VAD calls.
+ */
+
+#ifndef COMMON_AUDIO_VAD_VAD_CORE_H_
+#define COMMON_AUDIO_VAD_VAD_CORE_H_
+
+#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
+
+enum { kNumChannels = 6 };   // Number of frequency bands (named channels).
+enum { kNumGaussians = 2 };  // Number of Gaussians per channel in the GMM.
+enum { kTableSize = kNumChannels * kNumGaussians };
+enum { kMinEnergy = 10 };  // Minimum energy required to trigger audio signal.
+
+typedef struct VadInstT_ {
+  int vad;
+  int32_t downsampling_filter_states[4];
+  WebRtcSpl_State48khzTo8khz state_48_to_8;
+  int16_t noise_means[kTableSize];
+  int16_t speech_means[kTableSize];
+  int16_t noise_stds[kTableSize];
+  int16_t speech_stds[kTableSize];
+  // TODO(bjornv): Change to |frame_count|.
+  int32_t frame_counter;
+  int16_t over_hang;  // Over Hang
+  int16_t num_of_speech;
+  // TODO(bjornv): Change to |age_vector|.
+  int16_t index_vector[16 * kNumChannels];
+  int16_t low_value_vector[16 * kNumChannels];
+  // TODO(bjornv): Change to |median|.
+  int16_t mean_value[kNumChannels];
+  int16_t upper_state[5];
+  int16_t lower_state[5];
+  int16_t hp_filter_state[4];
+  int16_t over_hang_max_1[3];
+  int16_t over_hang_max_2[3];
+  int16_t individual[3];
+  int16_t total[3];
+
+  int init_flag;
+} VadInstT;
+
+// Initializes the core VAD component. The default aggressiveness mode is
+// controlled by |kDefaultMode| in vad_core.c.
+//
+// - self [i/o] : Instance that should be initialized
+//
+// returns      : 0 (OK), -1 (null pointer in or if the default mode can't be
+//                set)
+int WebRtcVad_InitCore(VadInstT* self);
+
+/****************************************************************************
+ * WebRtcVad_set_mode_core(...)
+ *
+ * This function changes the VAD settings
+ *
+ * Input:
+ *      - inst      : VAD instance
+ *      - mode      : Aggressiveness degree
+ *                    0 (High quality) - 3 (Highly aggressive)
+ *
+ * Output:
+ *      - inst      : Changed  instance
+ *
+ * Return value     :  0 - Ok
+ *                    -1 - Error
+ */
+
+int WebRtcVad_set_mode_core(VadInstT* self, int mode);
+
+/****************************************************************************
+ * WebRtcVad_CalcVad48khz(...)
+ * WebRtcVad_CalcVad32khz(...)
+ * WebRtcVad_CalcVad16khz(...)
+ * WebRtcVad_CalcVad8khz(...)
+ *
+ * Calculate probability for active speech and make VAD decision.
+ *
+ * Input:
+ *      - inst          : Instance that should be initialized
+ *      - speech_frame  : Input speech frame
+ *      - frame_length  : Number of input samples
+ *
+ * Output:
+ *      - inst          : Updated filter states etc.
+ *
+ * Return value         : VAD decision
+ *                        0 - No active speech
+ *                        1-6 - Active speech
+ */
+int WebRtcVad_CalcVad48khz(VadInstT* inst,
+                           const int16_t* speech_frame,
+                           size_t frame_length);
+int WebRtcVad_CalcVad32khz(VadInstT* inst,
+                           const int16_t* speech_frame,
+                           size_t frame_length);
+int WebRtcVad_CalcVad16khz(VadInstT* inst,
+                           const int16_t* speech_frame,
+                           size_t frame_length);
+int WebRtcVad_CalcVad8khz(VadInstT* inst,
+                          const int16_t* speech_frame,
+                          size_t frame_length);
+
+#endif  // COMMON_AUDIO_VAD_VAD_CORE_H_
--- a/runtime/engine/asr/server/brpc/paraformerCPP/webrtc/common_audio/vad/vad_filterbank.c
+++ b/runtime/engine/asr/server/brpc/paraformerCPP/webrtc/common_audio/vad/vad_filterbank.c
@ -0,0 +1,329 @@
+/*
+ *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "webrtc/common_audio/vad/vad_filterbank.h"
+
+#include "webrtc/rtc_base/checks.h"
+#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
+
+// Constants used in LogOfEnergy().
+static const int16_t kLogConst = 24660;  // 160*log10(2) in Q9.
+static const int16_t kLogEnergyIntPart = 14336;  // 14 in Q10
+
+// Coefficients used by HighPassFilter, Q14.
+static const int16_t kHpZeroCoefs[3] = { 6631, -13262, 6631 };
+static const int16_t kHpPoleCoefs[3] = { 16384, -7756, 5620 };
+
+// Allpass filter coefficients, upper and lower, in Q15.
+// Upper: 0.64, Lower: 0.17
+static const int16_t kAllPassCoefsQ15[2] = { 20972, 5571 };
+
+// Adjustment for division with two in SplitFilter.
+static const int16_t kOffsetVector[6] = { 368, 368, 272, 176, 176, 176 };
+
+// High pass filtering, with a cut-off frequency at 80 Hz, if the |data_in| is
+// sampled at 500 Hz.
+//
+// - data_in      [i]   : Input audio data sampled at 500 Hz.
+// - data_length  [i]   : Length of input and output data.
+// - filter_state [i/o] : State of the filter.
+// - data_out     [o]   : Output audio data in the frequency interval
+//                        80 - 250 Hz.
+static void HighPassFilter(const int16_t* data_in, size_t data_length,
+                           int16_t* filter_state, int16_t* data_out) {
+  size_t i;
+  const int16_t* in_ptr = data_in;
+  int16_t* out_ptr = data_out;
+  int32_t tmp32 = 0;
+
+
+  // The sum of the absolute values of the impulse response:
+  // The zero/pole-filter has a max amplification of a single sample of: 1.4546
+  // Impulse response: 0.4047 -0.6179 -0.0266  0.1993  0.1035  -0.0194
+  // The all-zero section has a max amplification of a single sample of: 1.6189
+  // Impulse response: 0.4047 -0.8094  0.4047  0       0        0
+  // The all-pole section has a max amplification of a single sample of: 1.9931
+  // Impulse response: 1.0000  0.4734 -0.1189 -0.2187 -0.0627   0.04532
+
+  for (i = 0; i < data_length; i++) {
+    // All-zero section (filter coefficients in Q14).
+    tmp32 = kHpZeroCoefs[0] * *in_ptr;
+    tmp32 += kHpZeroCoefs[1] * filter_state[0];
+    tmp32 += kHpZeroCoefs[2] * filter_state[1];
+    filter_state[1] = filter_state[0];
+    filter_state[0] = *in_ptr++;
+
+    // All-pole section (filter coefficients in Q14).
+    tmp32 -= kHpPoleCoefs[1] * filter_state[2];
+    tmp32 -= kHpPoleCoefs[2] * filter_state[3];
+    filter_state[3] = filter_state[2];
+    filter_state[2] = (int16_t) (tmp32 >> 14);
+    *out_ptr++ = filter_state[2];
+  }
+}
+
+// All pass filtering of |data_in|, used before splitting the signal into two
+// frequency bands (low pass vs high pass).
+// Note that |data_in| and |data_out| can NOT correspond to the same address.
+//
+// - data_in            [i]   : Input audio signal given in Q0.
+// - data_length        [i]   : Length of input and output data.
+// - filter_coefficient [i]   : Given in Q15.
+// - filter_state       [i/o] : State of the filter given in Q(-1).
+// - data_out           [o]   : Output audio signal given in Q(-1).
+static void AllPassFilter(const int16_t* data_in, size_t data_length,
+                          int16_t filter_coefficient, int16_t* filter_state,
+                          int16_t* data_out) {
+  // The filter can only cause overflow (in the w16 output variable)
+  // if more than 4 consecutive input numbers are of maximum value and
+  // has the the same sign as the impulse responses first taps.
+  // First 6 taps of the impulse response:
+  // 0.6399 0.5905 -0.3779 0.2418 -0.1547 0.0990
+
+  size_t i;
+  int16_t tmp16 = 0;
+  int32_t tmp32 = 0;
+  int32_t state32 = ((int32_t) (*filter_state) * (1 << 16));  // Q15
+
+  for (i = 0; i < data_length; i++) {
+    tmp32 = state32 + filter_coefficient * *data_in;
+    tmp16 = (int16_t) (tmp32 >> 16);  // Q(-1)
+    *data_out++ = tmp16;
+    state32 = (*data_in * (1 << 14)) - filter_coefficient * tmp16;  // Q14
+    state32 *= 2;  // Q15.
+    data_in += 2;
+  }
+
+  *filter_state = (int16_t) (state32 >> 16);  // Q(-1)
+}
+
+// Splits |data_in| into |hp_data_out| and |lp_data_out| corresponding to
+// an upper (high pass) part and a lower (low pass) part respectively.
+//
+// - data_in      [i]   : Input audio data to be split into two frequency bands.
+// - data_length  [i]   : Length of |data_in|.
+// - upper_state  [i/o] : State of the upper filter, given in Q(-1).
+// - lower_state  [i/o] : State of the lower filter, given in Q(-1).
+// - hp_data_out  [o]   : Output audio data of the upper half of the spectrum.
+//                        The length is |data_length| / 2.
+// - lp_data_out  [o]   : Output audio data of the lower half of the spectrum.
+//                        The length is |data_length| / 2.
+static void SplitFilter(const int16_t* data_in, size_t data_length,
+                        int16_t* upper_state, int16_t* lower_state,
+                        int16_t* hp_data_out, int16_t* lp_data_out) {
+  size_t i;
+  size_t half_length = data_length >> 1;  // Downsampling by 2.
+  int16_t tmp_out;
+
+  // All-pass filtering upper branch.
+  AllPassFilter(&data_in[0], half_length, kAllPassCoefsQ15[0], upper_state,
+                hp_data_out);
+
+  // All-pass filtering lower branch.
+  AllPassFilter(&data_in[1], half_length, kAllPassCoefsQ15[1], lower_state,
+                lp_data_out);
+
+  // Make LP and HP signals.
+  for (i = 0; i < half_length; i++) {
+    tmp_out = *hp_data_out;
+    *hp_data_out++ -= *lp_data_out;
+    *lp_data_out++ += tmp_out;
+  }
+}
+
+// Calculates the energy of |data_in| in dB, and also updates an overall
+// |total_energy| if necessary.
+//
+// - data_in      [i]   : Input audio data for energy calculation.
+// - data_length  [i]   : Length of input data.
+// - offset       [i]   : Offset value added to |log_energy|.
+// - total_energy [i/o] : An external energy updated with the energy of
+//                        |data_in|.
+//                        NOTE: |total_energy| is only updated if
+//                        |total_energy| <= |kMinEnergy|.
+// - log_energy   [o]   : 10 * log10("energy of |data_in|") given in Q4.
+static void LogOfEnergy(const int16_t* data_in, size_t data_length,
+                        int16_t offset, int16_t* total_energy,
+                        int16_t* log_energy) {
+  // |tot_rshifts| accumulates the number of right shifts performed on |energy|.
+  int tot_rshifts = 0;
+  // The |energy| will be normalized to 15 bits. We use unsigned integer because
+  // we eventually will mask out the fractional part.
+  uint32_t energy = 0;
+
+  RTC_DCHECK(data_in);
+  RTC_DCHECK_GT(data_length, 0);
+
+  energy = (uint32_t) WebRtcSpl_Energy((int16_t*) data_in, data_length,
+                                       &tot_rshifts);
+
+  if (energy != 0) {
+    // By construction, normalizing to 15 bits is equivalent with 17 leading
+    // zeros of an unsigned 32 bit value.
+    int normalizing_rshifts = 17 - WebRtcSpl_NormU32(energy);
+    // In a 15 bit representation the leading bit is 2^14. log2(2^14) in Q10 is
+    // (14 << 10), which is what we initialize |log2_energy| with. For a more
+    // detailed derivations, see below.
+    int16_t log2_energy = kLogEnergyIntPart;
+
+    tot_rshifts += normalizing_rshifts;
+    // Normalize |energy| to 15 bits.
+    // |tot_rshifts| is now the total number of right shifts performed on
+    // |energy| after normalization. This means that |energy| is in
+    // Q(-tot_rshifts).
+    if (normalizing_rshifts < 0) {
+      energy <<= -normalizing_rshifts;
+    } else {
+      energy >>= normalizing_rshifts;
+    }
+
+    // Calculate the energy of |data_in| in dB, in Q4.
+    //
+    // 10 * log10("true energy") in Q4 = 2^4 * 10 * log10("true energy") =
+    // 160 * log10(|energy| * 2^|tot_rshifts|) =
+    // 160 * log10(2) * log2(|energy| * 2^|tot_rshifts|) =
+    // 160 * log10(2) * (log2(|energy|) + log2(2^|tot_rshifts|)) =
+    // (160 * log10(2)) * (log2(|energy|) + |tot_rshifts|) =
+    // |kLogConst| * (|log2_energy| + |tot_rshifts|)
+    //
+    // We know by construction that |energy| is normalized to 15 bits. Hence,
+    // |energy| = 2^14 + frac_Q15, where frac_Q15 is a fractional part in Q15.
+    // Further, we'd like |log2_energy| in Q10
+    // log2(|energy|) in Q10 = 2^10 * log2(2^14 + frac_Q15) =
+    // 2^10 * log2(2^14 * (1 + frac_Q15 * 2^-14)) =
+    // 2^10 * (14 + log2(1 + frac_Q15 * 2^-14)) ~=
+    // (14 << 10) + 2^10 * (frac_Q15 * 2^-14) =
+    // (14 << 10) + (frac_Q15 * 2^-4) = (14 << 10) + (frac_Q15 >> 4)
+    //
+    // Note that frac_Q15 = (|energy| & 0x00003FFF)
+
+    // Calculate and add the fractional part to |log2_energy|.
+    log2_energy += (int16_t) ((energy & 0x00003FFF) >> 4);
+
+    // |kLogConst| is in Q9, |log2_energy| in Q10 and |tot_rshifts| in Q0.
+    // Note that we in our derivation above have accounted for an output in Q4.
+    *log_energy = (int16_t)(((kLogConst * log2_energy) >> 19) +
+        ((tot_rshifts * kLogConst) >> 9));
+
+    if (*log_energy < 0) {
+      *log_energy = 0;
+    }
+  } else {
+    *log_energy = offset;
+    return;
+  }
+
+  *log_energy += offset;
+
+  // Update the approximate |total_energy| with the energy of |data_in|, if
+  // |total_energy| has not exceeded |kMinEnergy|. |total_energy| is used as an
+  // energy indicator in WebRtcVad_GmmProbability() in vad_core.c.
+  if (*total_energy <= kMinEnergy) {
+    if (tot_rshifts >= 0) {
+      // We know by construction that the |energy| > |kMinEnergy| in Q0, so add
+      // an arbitrary value such that |total_energy| exceeds |kMinEnergy|.
+      *total_energy += kMinEnergy + 1;
+    } else {
+      // By construction |energy| is represented by 15 bits, hence any number of
+      // right shifted |energy| will fit in an int16_t. In addition, adding the
+      // value to |total_energy| is wrap around safe as long as
+      // |kMinEnergy| < 8192.
+      *total_energy += (int16_t) (energy >> -tot_rshifts);  // Q0.
+    }
+  }
+}
+
+int16_t WebRtcVad_CalculateFeatures(VadInstT* self, const int16_t* data_in,
+                                    size_t data_length, int16_t* features) {
+  int16_t total_energy = 0;
+  // We expect |data_length| to be 80, 160 or 240 samples, which corresponds to
+  // 10, 20 or 30 ms in 8 kHz. Therefore, the intermediate downsampled data will
+  // have at most 120 samples after the first split and at most 60 samples after
+  // the second split.
+  int16_t hp_120[120], lp_120[120];
+  int16_t hp_60[60], lp_60[60];
+  const size_t half_data_length = data_length >> 1;
+  size_t length = half_data_length;  // |data_length| / 2, corresponds to
+                                     // bandwidth = 2000 Hz after downsampling.
+
+  // Initialize variables for the first SplitFilter().
+  int frequency_band = 0;
+  const int16_t* in_ptr = data_in;  // [0 - 4000] Hz.
+  int16_t* hp_out_ptr = hp_120;  // [2000 - 4000] Hz.
+  int16_t* lp_out_ptr = lp_120;  // [0 - 2000] Hz.
+
+  RTC_DCHECK_LE(data_length, 240);
+  RTC_DCHECK_LT(4, kNumChannels - 1);  // Checking maximum |frequency_band|.
+
+  // Split at 2000 Hz and downsample.
+  SplitFilter(in_ptr, data_length, &self->upper_state[frequency_band],
+              &self->lower_state[frequency_band], hp_out_ptr, lp_out_ptr);
+
+  // For the upper band (2000 Hz - 4000 Hz) split at 3000 Hz and downsample.
+  frequency_band = 1;
+  in_ptr = hp_120;  // [2000 - 4000] Hz.
+  hp_out_ptr = hp_60;  // [3000 - 4000] Hz.
+  lp_out_ptr = lp_60;  // [2000 - 3000] Hz.
+  SplitFilter(in_ptr, length, &self->upper_state[frequency_band],
+              &self->lower_state[frequency_band], hp_out_ptr, lp_out_ptr);
+
+  // Energy in 3000 Hz - 4000 Hz.
+  length >>= 1;  // |data_length| / 4 <=> bandwidth = 1000 Hz.
+
+  LogOfEnergy(hp_60, length, kOffsetVector[5], &total_energy, &features[5]);
+
+  // Energy in 2000 Hz - 3000 Hz.
+  LogOfEnergy(lp_60, length, kOffsetVector[4], &total_energy, &features[4]);
+
+  // For the lower band (0 Hz - 2000 Hz) split at 1000 Hz and downsample.
+  frequency_band = 2;
+  in_ptr = lp_120;  // [0 - 2000] Hz.
+  hp_out_ptr = hp_60;  // [1000 - 2000] Hz.
+  lp_out_ptr = lp_60;  // [0 - 1000] Hz.
+  length = half_data_length;  // |data_length| / 2 <=> bandwidth = 2000 Hz.
+  SplitFilter(in_ptr, length, &self->upper_state[frequency_band],
+              &self->lower_state[frequency_band], hp_out_ptr, lp_out_ptr);
+
+  // Energy in 1000 Hz - 2000 Hz.
+  length >>= 1;  // |data_length| / 4 <=> bandwidth = 1000 Hz.
+  LogOfEnergy(hp_60, length, kOffsetVector[3], &total_energy, &features[3]);
+
+  // For the lower band (0 Hz - 1000 Hz) split at 500 Hz and downsample.
+  frequency_band = 3;
+  in_ptr = lp_60;  // [0 - 1000] Hz.
+  hp_out_ptr = hp_120;  // [500 - 1000] Hz.
+  lp_out_ptr = lp_120;  // [0 - 500] Hz.
+  SplitFilter(in_ptr, length, &self->upper_state[frequency_band],
+              &self->lower_state[frequency_band], hp_out_ptr, lp_out_ptr);
+
+  // Energy in 500 Hz - 1000 Hz.
+  length >>= 1;  // |data_length| / 8 <=> bandwidth = 500 Hz.
+  LogOfEnergy(hp_120, length, kOffsetVector[2], &total_energy, &features[2]);
+
+  // For the lower band (0 Hz - 500 Hz) split at 250 Hz and downsample.
+  frequency_band = 4;
+  in_ptr = lp_120;  // [0 - 500] Hz.
+  hp_out_ptr = hp_60;  // [250 - 500] Hz.
+  lp_out_ptr = lp_60;  // [0 - 250] Hz.
+  SplitFilter(in_ptr, length, &self->upper_state[frequency_band],
+              &self->lower_state[frequency_band], hp_out_ptr, lp_out_ptr);
+
+  // Energy in 250 Hz - 500 Hz.
+  length >>= 1;  // |data_length| / 16 <=> bandwidth = 250 Hz.
+  LogOfEnergy(hp_60, length, kOffsetVector[1], &total_energy, &features[1]);
+
+  // Remove 0 Hz - 80 Hz, by high pass filtering the lower band.
+  HighPassFilter(lp_60, length, self->hp_filter_state, hp_120);
+
+  // Energy in 80 Hz - 250 Hz.
+  LogOfEnergy(hp_120, length, kOffsetVector[0], &total_energy, &features[0]);
+
+  return total_energy;
+}
--- a/runtime/engine/asr/server/brpc/paraformerCPP/webrtc/common_audio/vad/vad_filterbank.h
+++ b/runtime/engine/asr/server/brpc/paraformerCPP/webrtc/common_audio/vad/vad_filterbank.h
@ -0,0 +1,45 @@
+/*
+ *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+/*
+ * This file includes feature calculating functionality used in vad_core.c.
+ */
+
+#ifndef COMMON_AUDIO_VAD_VAD_FILTERBANK_H_
+#define COMMON_AUDIO_VAD_VAD_FILTERBANK_H_
+
+#include "webrtc/common_audio/vad/vad_core.h"
+
+// Takes |data_length| samples of |data_in| and calculates the logarithm of the
+// energy of each of the |kNumChannels| = 6 frequency bands used by the VAD:
+//        80 Hz - 250 Hz
+//        250 Hz - 500 Hz
+//        500 Hz - 1000 Hz
+//        1000 Hz - 2000 Hz
+//        2000 Hz - 3000 Hz
+//        3000 Hz - 4000 Hz
+//
+// The values are given in Q4 and written to |features|. Further, an approximate
+// overall energy is returned. The return value is used in
+// WebRtcVad_GmmProbability() as a signal indicator, hence it is arbitrary above
+// the threshold |kMinEnergy|.
+//
+// - self         [i/o] : State information of the VAD.
+// - data_in      [i]   : Input audio data, for feature extraction.
+// - data_length  [i]   : Audio data size, in number of samples.
+// - features     [o]   : 10 * log10(energy in each frequency band), Q4.
+// - returns            : Total energy of the signal (NOTE! This value is not
+//                        exact. It is only used in a comparison.)
+int16_t WebRtcVad_CalculateFeatures(VadInstT* self,
+                                    const int16_t* data_in,
+                                    size_t data_length,
+                                    int16_t* features);
+
+#endif  // COMMON_AUDIO_VAD_VAD_FILTERBANK_H_
--- a/runtime/engine/asr/server/brpc/paraformerCPP/webrtc/common_audio/vad/vad_gmm.c
+++ b/runtime/engine/asr/server/brpc/paraformerCPP/webrtc/common_audio/vad/vad_gmm.c
@ -0,0 +1,82 @@
+/*
+ *  Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "webrtc/common_audio/vad/vad_gmm.h"
+
+#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
+
+static const int32_t kCompVar = 22005;
+static const int16_t kLog2Exp = 5909;  // log2(exp(1)) in Q12.
+
+// For a normal distribution, the probability of |input| is calculated and
+// returned (in Q20). The formula for normal distributed probability is
+//
+// 1 / s * exp(-(x - m)^2 / (2 * s^2))
+//
+// where the parameters are given in the following Q domains:
+// m = |mean| (Q7)
+// s = |std| (Q7)
+// x = |input| (Q4)
+// in addition to the probability we output |delta| (in Q11) used when updating
+// the noise/speech model.
+int32_t WebRtcVad_GaussianProbability(int16_t input,
+                                      int16_t mean,
+                                      int16_t std,
+                                      int16_t* delta) {
+  int16_t tmp16, inv_std, inv_std2, exp_value = 0;
+  int32_t tmp32;
+
+  // Calculate |inv_std| = 1 / s, in Q10.
+  // 131072 = 1 in Q17, and (|std| >> 1) is for rounding instead of truncation.
+  // Q-domain: Q17 / Q7 = Q10.
+  tmp32 = (int32_t) 131072 + (int32_t) (std >> 1);
+  inv_std = (int16_t) WebRtcSpl_DivW32W16(tmp32, std);
+
+  // Calculate |inv_std2| = 1 / s^2, in Q14.
+  tmp16 = (inv_std >> 2);  // Q10 -> Q8.
+  // Q-domain: (Q8 * Q8) >> 2 = Q14.
+  inv_std2 = (int16_t)((tmp16 * tmp16) >> 2);
+  // TODO(bjornv): Investigate if changing to
+  // inv_std2 = (int16_t)((inv_std * inv_std) >> 6);
+  // gives better accuracy.
+
+  tmp16 = (input << 3);  // Q4 -> Q7
+  tmp16 = tmp16 - mean;  // Q7 - Q7 = Q7
+
+  // To be used later, when updating noise/speech model.
+  // |delta| = (x - m) / s^2, in Q11.
+  // Q-domain: (Q14 * Q7) >> 10 = Q11.
+  *delta = (int16_t)((inv_std2 * tmp16) >> 10);
+
+  // Calculate the exponent |tmp32| = (x - m)^2 / (2 * s^2), in Q10. Replacing
+  // division by two with one shift.
+  // Q-domain: (Q11 * Q7) >> 8 = Q10.
+  tmp32 = (*delta * tmp16) >> 9;
+
+  // If the exponent is small enough to give a non-zero probability we calculate
+  // |exp_value| ~= exp(-(x - m)^2 / (2 * s^2))
+  //             ~= exp2(-log2(exp(1)) * |tmp32|).
+  if (tmp32 < kCompVar) {
+    // Calculate |tmp16| = log2(exp(1)) * |tmp32|, in Q10.
+    // Q-domain: (Q12 * Q10) >> 12 = Q10.
+    tmp16 = (int16_t)((kLog2Exp * tmp32) >> 12);
+    tmp16 = -tmp16;
+    exp_value = (0x0400 | (tmp16 & 0x03FF));
+    tmp16 ^= 0xFFFF;
+    tmp16 >>= 10;
+    tmp16 += 1;
+    // Get |exp_value| = exp(-|tmp32|) in Q10.
+    exp_value >>= tmp16;
+  }
+
+  // Calculate and return (1 / s) * exp(-(x - m)^2 / (2 * s^2)), in Q20.
+  // Q-domain: Q10 * Q10 = Q20.
+  return inv_std * exp_value;
+}
--- a/runtime/engine/asr/server/brpc/paraformerCPP/webrtc/common_audio/vad/vad_gmm.h
+++ b/runtime/engine/asr/server/brpc/paraformerCPP/webrtc/common_audio/vad/vad_gmm.h
@ -0,0 +1,39 @@
+/*
+ *  Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+// Gaussian probability calculations internally used in vad_core.c.
+
+#ifndef COMMON_AUDIO_VAD_VAD_GMM_H_
+#define COMMON_AUDIO_VAD_VAD_GMM_H_
+
+#include <stdint.h>
+
+// Calculates the probability for |input|, given that |input| comes from a
+// normal distribution with mean and standard deviation (|mean|, |std|).
+//
+// Inputs:
+//      - input         : input sample in Q4.
+//      - mean          : mean input in the statistical model, Q7.
+//      - std           : standard deviation, Q7.
+//
+// Output:
+//
+//      - delta         : input used when updating the model, Q11.
+//                        |delta| = (|input| - |mean|) / |std|^2.
+//
+// Return:
+//   (probability for |input|) =
+//    1 / |std| * exp(-(|input| - |mean|)^2 / (2 * |std|^2));
+int32_t WebRtcVad_GaussianProbability(int16_t input,
+                                      int16_t mean,
+                                      int16_t std,
+                                      int16_t* delta);
+
+#endif  // COMMON_AUDIO_VAD_VAD_GMM_H_
--- a/runtime/engine/asr/server/brpc/paraformerCPP/webrtc/common_audio/vad/vad_sp.c
+++ b/runtime/engine/asr/server/brpc/paraformerCPP/webrtc/common_audio/vad/vad_sp.c
@ -0,0 +1,176 @@
+/*
+ *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "webrtc/common_audio/vad/vad_sp.h"
+
+#include "webrtc/rtc_base/checks.h"
+#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
+#include "webrtc/common_audio/vad/vad_core.h"
+
+// Allpass filter coefficients, upper and lower, in Q13.
+// Upper: 0.64, Lower: 0.17.
+static const int16_t kAllPassCoefsQ13[2] = { 5243, 1392 };  // Q13.
+static const int16_t kSmoothingDown = 6553;  // 0.2 in Q15.
+static const int16_t kSmoothingUp = 32439;  // 0.99 in Q15.
+
+// TODO(bjornv): Move this function to vad_filterbank.c.
+// Downsampling filter based on splitting filter and allpass functions.
+void WebRtcVad_Downsampling(const int16_t* signal_in,
+                            int16_t* signal_out,
+                            int32_t* filter_state,
+                            size_t in_length) {
+  int16_t tmp16_1 = 0, tmp16_2 = 0;
+  int32_t tmp32_1 = filter_state[0];
+  int32_t tmp32_2 = filter_state[1];
+  size_t n = 0;
+  // Downsampling by 2 gives half length.
+  size_t half_length = (in_length >> 1);
+
+  // Filter coefficients in Q13, filter state in Q0.
+  for (n = 0; n < half_length; n++) {
+    // All-pass filtering upper branch.
+    tmp16_1 = (int16_t) ((tmp32_1 >> 1) +
+        ((kAllPassCoefsQ13[0] * *signal_in) >> 14));
+    *signal_out = tmp16_1;
+    tmp32_1 = (int32_t)(*signal_in++) - ((kAllPassCoefsQ13[0] * tmp16_1) >> 12);
+
+    // All-pass filtering lower branch.
+    tmp16_2 = (int16_t) ((tmp32_2 >> 1) +
+        ((kAllPassCoefsQ13[1] * *signal_in) >> 14));
+    *signal_out++ += tmp16_2;
+    tmp32_2 = (int32_t)(*signal_in++) - ((kAllPassCoefsQ13[1] * tmp16_2) >> 12);
+  }
+  // Store the filter states.
+  filter_state[0] = tmp32_1;
+  filter_state[1] = tmp32_2;
+}
+
+// Inserts |feature_value| into |low_value_vector|, if it is one of the 16
+// smallest values the last 100 frames. Then calculates and returns the median
+// of the five smallest values.
+int16_t WebRtcVad_FindMinimum(VadInstT* self,
+                              int16_t feature_value,
+                              int channel) {
+  int i = 0, j = 0;
+  int position = -1;
+  // Offset to beginning of the 16 minimum values in memory.
+  const int offset = (channel << 4);
+  int16_t current_median = 1600;
+  int16_t alpha = 0;
+  int32_t tmp32 = 0;
+  // Pointer to memory for the 16 minimum values and the age of each value of
+  // the |channel|.
+  int16_t* age = &self->index_vector[offset];
+  int16_t* smallest_values = &self->low_value_vector[offset];
+
+  RTC_DCHECK_LT(channel, kNumChannels);
+
+  // Each value in |smallest_values| is getting 1 loop older. Update |age|, and
+  // remove old values.
+  for (i = 0; i < 16; i++) {
+    if (age[i] != 100) {
+      age[i]++;
+    } else {
+      // Too old value. Remove from memory and shift larger values downwards.
+      for (j = i; j < 15; j++) {
+        smallest_values[j] = smallest_values[j + 1];
+        age[j] = age[j + 1];
+      }
+      age[15] = 101;
+      smallest_values[15] = 10000;
+    }
+  }
+
+  // Check if |feature_value| is smaller than any of the values in
+  // |smallest_values|. If so, find the |position| where to insert the new value
+  // (|feature_value|).
+  if (feature_value < smallest_values[7]) {
+    if (feature_value < smallest_values[3]) {
+      if (feature_value < smallest_values[1]) {
+        if (feature_value < smallest_values[0]) {
+          position = 0;
+        } else {
+          position = 1;
+        }
+      } else if (feature_value < smallest_values[2]) {
+        position = 2;
+      } else {
+        position = 3;
+      }
+    } else if (feature_value < smallest_values[5]) {
+      if (feature_value < smallest_values[4]) {
+        position = 4;
+      } else {
+        position = 5;
+      }
+    } else if (feature_value < smallest_values[6]) {
+      position = 6;
+    } else {
+      position = 7;
+    }
+  } else if (feature_value < smallest_values[15]) {
+    if (feature_value < smallest_values[11]) {
+      if (feature_value < smallest_values[9]) {
+        if (feature_value < smallest_values[8]) {
+          position = 8;
+        } else {
+          position = 9;
+        }
+      } else if (feature_value < smallest_values[10]) {
+        position = 10;
+      } else {
+        position = 11;
+      }
+    } else if (feature_value < smallest_values[13]) {
+      if (feature_value < smallest_values[12]) {
+        position = 12;
+      } else {
+        position = 13;
+      }
+    } else if (feature_value < smallest_values[14]) {
+      position = 14;
+    } else {
+      position = 15;
+    }
+  }
+
+  // If we have detected a new small value, insert it at the correct position
+  // and shift larger values up.
+  if (position > -1) {
+    for (i = 15; i > position; i--) {
+      smallest_values[i] = smallest_values[i - 1];
+      age[i] = age[i - 1];
+    }
+    smallest_values[position] = feature_value;
+    age[position] = 1;
+  }
+
+  // Get |current_median|.
+  if (self->frame_counter > 2) {
+    current_median = smallest_values[2];
+  } else if (self->frame_counter > 0) {
+    current_median = smallest_values[0];
+  }
+
+  // Smooth the median value.
+  if (self->frame_counter > 0) {
+    if (current_median < self->mean_value[channel]) {
+      alpha = kSmoothingDown;  // 0.2 in Q15.
+    } else {
+      alpha = kSmoothingUp;  // 0.99 in Q15.
+    }
+  }
+  tmp32 = (alpha + 1) * self->mean_value[channel];
+  tmp32 += (WEBRTC_SPL_WORD16_MAX - alpha) * current_median;
+  tmp32 += 16384;
+  self->mean_value[channel] = (int16_t) (tmp32 >> 15);
+
+  return self->mean_value[channel];
+}
--- a/runtime/engine/asr/server/brpc/paraformerCPP/webrtc/common_audio/vad/vad_sp.h
+++ b/runtime/engine/asr/server/brpc/paraformerCPP/webrtc/common_audio/vad/vad_sp.h
@ -0,0 +1,54 @@
+/*
+ *  Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+// This file includes specific signal processing tools used in vad_core.c.
+
+#ifndef COMMON_AUDIO_VAD_VAD_SP_H_
+#define COMMON_AUDIO_VAD_VAD_SP_H_
+
+#include "webrtc/common_audio/vad/vad_core.h"
+
+// Downsamples the signal by a factor 2, eg. 32->16 or 16->8.
+//
+// Inputs:
+//      - signal_in     : Input signal.
+//      - in_length     : Length of input signal in samples.
+//
+// Input & Output:
+//      - filter_state  : Current filter states of the two all-pass filters. The
+//                        |filter_state| is updated after all samples have been
+//                        processed.
+//
+// Output:
+//      - signal_out    : Downsampled signal (of length |in_length| / 2).
+void WebRtcVad_Downsampling(const int16_t* signal_in,
+                            int16_t* signal_out,
+                            int32_t* filter_state,
+                            size_t in_length);
+
+// Updates and returns the smoothed feature minimum. As minimum we use the
+// median of the five smallest feature values in a 100 frames long window.
+// As long as |handle->frame_counter| is zero, that is, we haven't received any
+// "valid" data, FindMinimum() outputs the default value of 1600.
+//
+// Inputs:
+//      - feature_value : New feature value to update with.
+//      - channel       : Channel number.
+//
+// Input & Output:
+//      - handle        : State information of the VAD.
+//
+// Returns:
+//                      : Smoothed minimum value for a moving window.
+int16_t WebRtcVad_FindMinimum(VadInstT* handle,
+                              int16_t feature_value,
+                              int channel);
+
+#endif  // COMMON_AUDIO_VAD_VAD_SP_H_
--- a/runtime/engine/asr/server/brpc/paraformerCPP/webrtc/common_audio/vad/webrtc_vad.c
+++ b/runtime/engine/asr/server/brpc/paraformerCPP/webrtc/common_audio/vad/webrtc_vad.c
@ -0,0 +1,115 @@
+/*
+ *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "webrtc/common_audio/vad/include/webrtc_vad.h"
+
+#include <stdlib.h>
+#include <string.h>
+
+#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
+#include "webrtc/common_audio/vad/vad_core.h"
+
+static const int kInitCheck = 42;
+static const int kValidRates[] = { 8000, 16000, 32000, 48000 };
+static const size_t kRatesSize = sizeof(kValidRates) / sizeof(*kValidRates);
+static const int kMaxFrameLengthMs = 30;
+
+VadInst* WebRtcVad_Create() {
+  VadInstT* self = (VadInstT*)malloc(sizeof(VadInstT));
+
+  WebRtcSpl_Init();
+  self->init_flag = 0;
+
+  return (VadInst*)self;
+}
+
+void WebRtcVad_Free(VadInst* handle) {
+  free(handle);
+}
+
+// TODO(bjornv): Move WebRtcVad_InitCore() code here.
+int WebRtcVad_Init(VadInst* handle) {
+  // Initialize the core VAD component.
+  return WebRtcVad_InitCore((VadInstT*) handle);
+}
+
+// TODO(bjornv): Move WebRtcVad_set_mode_core() code here.
+int WebRtcVad_set_mode(VadInst* handle, int mode) {
+  VadInstT* self = (VadInstT*) handle;
+
+  if (handle == NULL) {
+    return -1;
+  }
+  if (self->init_flag != kInitCheck) {
+    return -1;
+  }
+
+  return WebRtcVad_set_mode_core(self, mode);
+}
+
+int WebRtcVad_Process(VadInst* handle, int fs, const int16_t* audio_frame,
+                      size_t frame_length) {
+  int vad = -1;
+  VadInstT* self = (VadInstT*) handle;
+
+  if (handle == NULL) {
+    return -1;
+  }
+
+  if (self->init_flag != kInitCheck) {
+    return -1;
+  }
+  if (audio_frame == NULL) {
+    return -1;
+  }
+  if (WebRtcVad_ValidRateAndFrameLength(fs, frame_length) != 0) {
+    return -1;
+  }
+
+  if (fs == 48000) {
+      vad = WebRtcVad_CalcVad48khz(self, audio_frame, frame_length);
+  } else if (fs == 32000) {
+    vad = WebRtcVad_CalcVad32khz(self, audio_frame, frame_length);
+  } else if (fs == 16000) {
+    vad = WebRtcVad_CalcVad16khz(self, audio_frame, frame_length);
+  } else if (fs == 8000) {
+    vad = WebRtcVad_CalcVad8khz(self, audio_frame, frame_length);
+  }
+
+  if (vad > 0) {
+    vad = 1;
+  }
+  return vad;
+}
+
+int WebRtcVad_ValidRateAndFrameLength(int rate, size_t frame_length) {
+  int return_value = -1;
+  size_t i;
+  int valid_length_ms;
+  size_t valid_length;
+
+  // We only allow 10, 20 or 30 ms frames. Loop through valid frame rates and
+  // see if we have a matching pair.
+  for (i = 0; i < kRatesSize; i++) {
+    if (kValidRates[i] == rate) {
+      for (valid_length_ms = 10; valid_length_ms <= kMaxFrameLengthMs;
+          valid_length_ms += 10) {
+        valid_length = (size_t)(kValidRates[i] / 1000 * valid_length_ms);
+        if (frame_length == valid_length) {
+          return_value = 0;
+          break;
+        }
+      }
+      break;
+    }
+  }
+
+  return return_value;
+}
--- a/runtime/engine/asr/server/brpc/paraformerCPP/webrtc/rtc_base/checks.cc
+++ b/runtime/engine/asr/server/brpc/paraformerCPP/webrtc/rtc_base/checks.cc
@ -0,0 +1,170 @@
+/*
+ *  Copyright 2006 The WebRTC Project Authors. All rights reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+// Most of this was borrowed (with minor modifications) from V8's and Chromium's
+// src/base/logging.cc.
+
+#include <cstdarg>
+#include <cstdio>
+#include <cstdlib>
+
+#if defined(WEBRTC_ANDROID)
+#define RTC_LOG_TAG_ANDROID "rtc"
+#include <android/log.h>  // NOLINT
+#endif
+
+#if defined(WEBRTC_WIN)
+#include <windows.h>
+#endif
+
+#include <errno.h>
+#define LAST_SYSTEM_ERROR (errno)
+
+// #if defined(WEBRTC_WIN)
+// #define LAST_SYSTEM_ERROR (::GetLastError())
+// #elif defined(__native_client__) && __native_client__
+// #define LAST_SYSTEM_ERROR (0)
+// #elif defined(WEBRTC_POSIX)
+// #include <errno.h>
+// #define LAST_SYSTEM_ERROR (errno)
+// #endif  // WEBRTC_WIN
+
+#include "webrtc/rtc_base/checks.h"
+
+namespace {
+#if defined(__GNUC__)
+__attribute__((__format__(__printf__, 2, 3)))
+#endif
+  void AppendFormat(std::string* s, const char* fmt, ...) {
+  va_list args, copy;
+  va_start(args, fmt);
+  va_copy(copy, args);
+  const int predicted_length = std::vsnprintf(nullptr, 0, fmt, copy);
+  va_end(copy);
+
+  if (predicted_length > 0) {
+    const size_t size = s->size();
+    s->resize(size + predicted_length);
+    // Pass "+ 1" to vsnprintf to include space for the '\0'.
+    std::vsnprintf(&((*s)[size]), predicted_length + 1, fmt, args);
+  }
+  va_end(args);
+}
+}
+
+namespace rtc {
+namespace webrtc_checks_impl {
+
+// Reads one argument from args, appends it to s and advances fmt.
+// Returns true iff an argument was sucessfully parsed.
+bool ParseArg(va_list* args, const CheckArgType** fmt, std::string* s) {
+  if (**fmt == CheckArgType::kEnd)
+    return false;
+
+  switch (**fmt) {
+    case CheckArgType::kInt:
+      AppendFormat(s, "%d", va_arg(*args, int));
+      break;
+    case CheckArgType::kLong:
+      AppendFormat(s, "%ld", va_arg(*args, long));
+      break;
+    case CheckArgType::kLongLong:
+      AppendFormat(s, "%lld", va_arg(*args, long long));
+      break;
+    case CheckArgType::kUInt:
+      AppendFormat(s, "%u", va_arg(*args, unsigned));
+      break;
+    case CheckArgType::kULong:
+      AppendFormat(s, "%lu", va_arg(*args, unsigned long));
+      break;
+    case CheckArgType::kULongLong:
+      AppendFormat(s, "%llu", va_arg(*args, unsigned long long));
+      break;
+    case CheckArgType::kDouble:
+      AppendFormat(s, "%g", va_arg(*args, double));
+      break;
+    case CheckArgType::kLongDouble:
+      AppendFormat(s, "%Lg", va_arg(*args, long double));
+      break;
+    case CheckArgType::kCharP:
+      s->append(va_arg(*args, const char*));
+      break;
+    case CheckArgType::kStdString:
+      s->append(*va_arg(*args, const std::string*));
+      break;
+    case CheckArgType::kVoidP:
+      AppendFormat(s, "%p", va_arg(*args, const void*));
+      break;
+    default:
+      s->append("[Invalid CheckArgType]");
+      return false;
+  }
+  (*fmt)++;
+  return true;
+}
+
+RTC_NORETURN void FatalLog(const char* file,
+                           int line,
+                           const char* message,
+                           const CheckArgType* fmt,
+                           ...) {
+  va_list args;
+  va_start(args, fmt);
+
+  std::string s;
+  AppendFormat(&s,
+               "\n\n"
+               "#\n"
+               "# Fatal error in: %s, line %d\n"
+               "# last system error: %u\n"
+               "# Check failed: %s",
+               file, line, LAST_SYSTEM_ERROR, message);
+
+  if (*fmt == CheckArgType::kCheckOp) {
+    // This log message was generated by RTC_CHECK_OP, so we have to complete
+    // the error message using the operands that have been passed as the first
+    // two arguments.
+    fmt++;
+
+    std::string s1, s2;
+    if (ParseArg(&args, &fmt, &s1) && ParseArg(&args, &fmt, &s2))
+      AppendFormat(&s, " (%s vs. %s)\n# ", s1.c_str(), s2.c_str());
+  } else {
+    s.append("\n# ");
+  }
+
+  // Append all the user-supplied arguments to the message.
+  while (ParseArg(&args, &fmt, &s))
+    ;
+
+  va_end(args);
+
+  const char* output = s.c_str();
+
+#if defined(WEBRTC_ANDROID)
+  __android_log_print(ANDROID_LOG_ERROR, RTC_LOG_TAG_ANDROID, "%s\n", output);
+#endif
+
+  fflush(stdout);
+  fprintf(stderr, "%s", output);
+  fflush(stderr);
+  abort();
+}
+
+}  // namespace webrtc_checks_impl
+}  // namespace rtc
+
+// Function to call from the C version of the RTC_CHECK and RTC_DCHECK macros.
+RTC_NORETURN void rtc_FatalMessage(const char* file, int line,
+                                   const char* msg) {
+  static constexpr rtc::webrtc_checks_impl::CheckArgType t[] = {
+      rtc::webrtc_checks_impl::CheckArgType::kEnd};
+  FatalLog(file, line, msg, t);
+}
--- a/runtime/engine/asr/server/brpc/paraformerCPP/webrtc/rtc_base/checks.h
+++ b/runtime/engine/asr/server/brpc/paraformerCPP/webrtc/rtc_base/checks.h
@ -0,0 +1,400 @@
+/*
+ *  Copyright 2006 The WebRTC Project Authors. All rights reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef RTC_BASE_CHECKS_H_
+#define RTC_BASE_CHECKS_H_
+
+// If you for some reson need to know if DCHECKs are on, test the value of
+// RTC_DCHECK_IS_ON. (Test its value, not if it's defined; it'll always be
+// defined, to either a true or a false value.)
+#if !defined(NDEBUG) || defined(DCHECK_ALWAYS_ON)
+#define RTC_DCHECK_IS_ON 1
+#else
+#define RTC_DCHECK_IS_ON 0
+#endif
+
+// Annotate a function that will not return control flow to the caller.
+#if defined(_MSC_VER)
+#define RTC_NORETURN __declspec(noreturn)
+#elif defined(__GNUC__)
+#define RTC_NORETURN __attribute__ ((__noreturn__))
+#else
+#define RTC_NORETURN
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+RTC_NORETURN void rtc_FatalMessage(const char* file, int line, const char* msg);
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
+#ifdef __cplusplus
+// C++ version.
+
+#include <string>
+
+#include "webrtc/rtc_base/numerics/safe_compare.h"
+#include "webrtc/rtc_base/system/inline.h"
+
+// The macros here print a message to stderr and abort under various
+// conditions. All will accept additional stream messages. For example:
+// RTC_DCHECK_EQ(foo, bar) << "I'm printed when foo != bar.";
+//
+// - RTC_CHECK(x) is an assertion that x is always true, and that if it isn't,
+//   it's better to terminate the process than to continue. During development,
+//   the reason that it's better to terminate might simply be that the error
+//   handling code isn't in place yet; in production, the reason might be that
+//   the author of the code truly believes that x will always be true, but that
+//   she recognizes that if she is wrong, abrupt and unpleasant process
+//   termination is still better than carrying on with the assumption violated.
+//
+//   RTC_CHECK always evaluates its argument, so it's OK for x to have side
+//   effects.
+//
+// - RTC_DCHECK(x) is the same as RTC_CHECK(x)---an assertion that x is always
+//   true---except that x will only be evaluated in debug builds; in production
+//   builds, x is simply assumed to be true. This is useful if evaluating x is
+//   expensive and the expected cost of failing to detect the violated
+//   assumption is acceptable. You should not handle cases where a production
+//   build fails to spot a violated condition, even those that would result in
+//   crashes. If the code needs to cope with the error, make it cope, but don't
+//   call RTC_DCHECK; if the condition really can't occur, but you'd sleep
+//   better at night knowing that the process will suicide instead of carrying
+//   on in case you were wrong, use RTC_CHECK instead of RTC_DCHECK.
+//
+//   RTC_DCHECK only evaluates its argument in debug builds, so if x has visible
+//   side effects, you need to write e.g.
+//     bool w = x; RTC_DCHECK(w);
+//
+// - RTC_CHECK_EQ, _NE, _GT, ..., and RTC_DCHECK_EQ, _NE, _GT, ... are
+//   specialized variants of RTC_CHECK and RTC_DCHECK that print prettier
+//   messages if the condition doesn't hold. Prefer them to raw RTC_CHECK and
+//   RTC_DCHECK.
+//
+// - FATAL() aborts unconditionally.
+//
+// TODO(ajm): Ideally, checks.h would be combined with logging.h, but
+// consolidation with system_wrappers/logging.h should happen first.
+
+namespace rtc {
+namespace webrtc_checks_impl {
+enum class CheckArgType : int8_t {
+  kEnd = 0,
+  kInt,
+  kLong,
+  kLongLong,
+  kUInt,
+  kULong,
+  kULongLong,
+  kDouble,
+  kLongDouble,
+  kCharP,
+  kStdString,
+  kVoidP,
+
+  // kCheckOp doesn't represent an argument type. Instead, it is sent as the
+  // first argument from RTC_CHECK_OP to make FatalLog use the next two
+  // arguments to build the special CHECK_OP error message
+  // (the "a == b (1 vs. 2)" bit).
+  kCheckOp,
+};
+
+RTC_NORETURN void FatalLog(const char* file,
+                           int line,
+                           const char* message,
+                           const CheckArgType* fmt,
+                           ...);
+
+// Wrapper for log arguments. Only ever make values of this type with the
+// MakeVal() functions.
+template <CheckArgType N, typename T>
+struct Val {
+  static constexpr CheckArgType Type() { return N; }
+  T GetVal() const { return val; }
+  T val;
+};
+
+inline Val<CheckArgType::kInt, int> MakeVal(int x) {
+  return {x};
+}
+inline Val<CheckArgType::kLong, long> MakeVal(long x) {
+  return {x};
+}
+inline Val<CheckArgType::kLongLong, long long> MakeVal(long long x) {
+  return {x};
+}
+inline Val<CheckArgType::kUInt, unsigned int> MakeVal(unsigned int x) {
+  return {x};
+}
+inline Val<CheckArgType::kULong, unsigned long> MakeVal(unsigned long x) {
+  return {x};
+}
+inline Val<CheckArgType::kULongLong, unsigned long long> MakeVal(
+    unsigned long long x) {
+  return {x};
+}
+
+inline Val<CheckArgType::kDouble, double> MakeVal(double x) {
+  return {x};
+}
+inline Val<CheckArgType::kLongDouble, long double> MakeVal(long double x) {
+  return {x};
+}
+
+inline Val<CheckArgType::kCharP, const char*> MakeVal(const char* x) {
+  return {x};
+}
+inline Val<CheckArgType::kStdString, const std::string*> MakeVal(
+    const std::string& x) {
+  return {&x};
+}
+
+inline Val<CheckArgType::kVoidP, const void*> MakeVal(const void* x) {
+  return {x};
+}
+
+// Ephemeral type that represents the result of the logging << operator.
+template <typename... Ts>
+class LogStreamer;
+
+// Base case: Before the first << argument.
+template <>
+class LogStreamer<> final {
+ public:
+  template <
+      typename U,
+      typename std::enable_if<std::is_arithmetic<U>::value>::type* = nullptr>
+  RTC_FORCE_INLINE LogStreamer<decltype(MakeVal(std::declval<U>()))> operator<<(
+      U arg) const {
+    return LogStreamer<decltype(MakeVal(std::declval<U>()))>(MakeVal(arg),
+                                                             this);
+  }
+
+  template <
+      typename U,
+      typename std::enable_if<!std::is_arithmetic<U>::value>::type* = nullptr>
+  RTC_FORCE_INLINE LogStreamer<decltype(MakeVal(std::declval<U>()))> operator<<(
+      const U& arg) const {
+    return LogStreamer<decltype(MakeVal(std::declval<U>()))>(MakeVal(arg),
+                                                             this);
+  }
+
+  template <typename... Us>
+  RTC_NORETURN RTC_FORCE_INLINE static void Call(const char* file,
+                                                 const int line,
+                                                 const char* message,
+                                                 const Us&... args) {
+    static constexpr CheckArgType t[] = {Us::Type()..., CheckArgType::kEnd};
+    FatalLog(file, line, message, t, args.GetVal()...);
+  }
+
+  template <typename... Us>
+  RTC_NORETURN RTC_FORCE_INLINE static void CallCheckOp(const char* file,
+                                                        const int line,
+                                                        const char* message,
+                                                        const Us&... args) {
+    static constexpr CheckArgType t[] = {CheckArgType::kCheckOp, Us::Type()...,
+                                         CheckArgType::kEnd};
+    FatalLog(file, line, message, t, args.GetVal()...);
+  }
+};
+
+// Inductive case: We've already seen at least one << argument. The most recent
+// one had type `T`, and the earlier ones had types `Ts`.
+template <typename T, typename... Ts>
+class LogStreamer<T, Ts...> final {
+ public:
+  RTC_FORCE_INLINE LogStreamer(T arg, const LogStreamer<Ts...>* prior)
+      : arg_(arg), prior_(prior) {}
+
+  template <
+      typename U,
+      typename std::enable_if<std::is_arithmetic<U>::value>::type* = nullptr>
+  RTC_FORCE_INLINE LogStreamer<decltype(MakeVal(std::declval<U>())), T, Ts...>
+  operator<<(U arg) const {
+    return LogStreamer<decltype(MakeVal(std::declval<U>())), T, Ts...>(
+        MakeVal(arg), this);
+  }
+
+  template <
+      typename U,
+      typename std::enable_if<!std::is_arithmetic<U>::value>::type* = nullptr>
+  RTC_FORCE_INLINE LogStreamer<decltype(MakeVal(std::declval<U>())), T, Ts...>
+  operator<<(const U& arg) const {
+    return LogStreamer<decltype(MakeVal(std::declval<U>())), T, Ts...>(
+        MakeVal(arg), this);
+  }
+
+  template <typename... Us>
+  RTC_NORETURN RTC_FORCE_INLINE void Call(const char* file,
+                                          const int line,
+                                          const char* message,
+                                          const Us&... args) const {
+    prior_->Call(file, line, message, arg_, args...);
+  }
+
+  template <typename... Us>
+  RTC_NORETURN RTC_FORCE_INLINE void CallCheckOp(const char* file,
+                                                 const int line,
+                                                 const char* message,
+                                                 const Us&... args) const {
+    prior_->CallCheckOp(file, line, message, arg_, args...);
+  }
+
+ private:
+  // The most recent argument.
+  T arg_;
+
+  // Earlier arguments.
+  const LogStreamer<Ts...>* prior_;
+};
+
+template <bool isCheckOp>
+class FatalLogCall final {
+ public:
+  FatalLogCall(const char* file, int line, const char* message)
+      : file_(file), line_(line), message_(message) {}
+
+  // This can be any binary operator with precedence lower than <<.
+  template <typename... Ts>
+  RTC_NORETURN RTC_FORCE_INLINE void operator&(
+      const LogStreamer<Ts...>& streamer) {
+    isCheckOp ? streamer.CallCheckOp(file_, line_, message_)
+              : streamer.Call(file_, line_, message_);
+  }
+
+ private:
+  const char* file_;
+  int line_;
+  const char* message_;
+};
+}  // namespace webrtc_checks_impl
+
+// The actual stream used isn't important. We reference |ignored| in the code
+// but don't evaluate it; this is to avoid "unused variable" warnings (we do so
+// in a particularly convoluted way with an extra ?: because that appears to be
+// the simplest construct that keeps Visual Studio from complaining about
+// condition being unused).
+#define RTC_EAT_STREAM_PARAMETERS(ignored)                        \
+  (true ? true : ((void)(ignored), true))                         \
+      ? static_cast<void>(0)                                      \
+      : rtc::webrtc_checks_impl::FatalLogCall<false>("", 0, "") & \
+            rtc::webrtc_checks_impl::LogStreamer<>()
+
+// Call RTC_EAT_STREAM_PARAMETERS with an argument that fails to compile if
+// values of the same types as |a| and |b| can't be compared with the given
+// operation, and that would evaluate |a| and |b| if evaluated.
+#define RTC_EAT_STREAM_PARAMETERS_OP(op, a, b) \
+  RTC_EAT_STREAM_PARAMETERS(((void)rtc::Safe##op(a, b)))
+
+// RTC_CHECK dies with a fatal error if condition is not true. It is *not*
+// controlled by NDEBUG or anything else, so the check will be executed
+// regardless of compilation mode.
+//
+// We make sure RTC_CHECK et al. always evaluates |condition|, as
+// doing RTC_CHECK(FunctionWithSideEffect()) is a common idiom.
+#define RTC_CHECK(condition)                                       \
+  while (!(condition))                                             \
+  rtc::webrtc_checks_impl::FatalLogCall<false>(__FILE__, __LINE__, \
+                                               #condition) &       \
+      rtc::webrtc_checks_impl::LogStreamer<>()
+
+// Helper macro for binary operators.
+// Don't use this macro directly in your code, use RTC_CHECK_EQ et al below.
+#define RTC_CHECK_OP(name, op, val1, val2)                               \
+  while (!rtc::Safe##name((val1), (val2)))                               \
+  rtc::webrtc_checks_impl::FatalLogCall<true>(__FILE__, __LINE__,        \
+                                              #val1 " " #op " " #val2) & \
+      rtc::webrtc_checks_impl::LogStreamer<>() << (val1) << (val2)
+
+#define RTC_CHECK_EQ(val1, val2) RTC_CHECK_OP(Eq, ==, val1, val2)
+#define RTC_CHECK_NE(val1, val2) RTC_CHECK_OP(Ne, !=, val1, val2)
+#define RTC_CHECK_LE(val1, val2) RTC_CHECK_OP(Le, <=, val1, val2)
+#define RTC_CHECK_LT(val1, val2) RTC_CHECK_OP(Lt, <, val1, val2)
+#define RTC_CHECK_GE(val1, val2) RTC_CHECK_OP(Ge, >=, val1, val2)
+#define RTC_CHECK_GT(val1, val2) RTC_CHECK_OP(Gt, >, val1, val2)
+
+// The RTC_DCHECK macro is equivalent to RTC_CHECK except that it only generates
+// code in debug builds. It does reference the condition parameter in all cases,
+// though, so callers won't risk getting warnings about unused variables.
+#if RTC_DCHECK_IS_ON
+#define RTC_DCHECK(condition) RTC_CHECK(condition)
+#define RTC_DCHECK_EQ(v1, v2) RTC_CHECK_EQ(v1, v2)
+#define RTC_DCHECK_NE(v1, v2) RTC_CHECK_NE(v1, v2)
+#define RTC_DCHECK_LE(v1, v2) RTC_CHECK_LE(v1, v2)
+#define RTC_DCHECK_LT(v1, v2) RTC_CHECK_LT(v1, v2)
+#define RTC_DCHECK_GE(v1, v2) RTC_CHECK_GE(v1, v2)
+#define RTC_DCHECK_GT(v1, v2) RTC_CHECK_GT(v1, v2)
+#else
+#define RTC_DCHECK(condition) RTC_EAT_STREAM_PARAMETERS(condition)
+#define RTC_DCHECK_EQ(v1, v2) RTC_EAT_STREAM_PARAMETERS_OP(Eq, v1, v2)
+#define RTC_DCHECK_NE(v1, v2) RTC_EAT_STREAM_PARAMETERS_OP(Ne, v1, v2)
+#define RTC_DCHECK_LE(v1, v2) RTC_EAT_STREAM_PARAMETERS_OP(Le, v1, v2)
+#define RTC_DCHECK_LT(v1, v2) RTC_EAT_STREAM_PARAMETERS_OP(Lt, v1, v2)
+#define RTC_DCHECK_GE(v1, v2) RTC_EAT_STREAM_PARAMETERS_OP(Ge, v1, v2)
+#define RTC_DCHECK_GT(v1, v2) RTC_EAT_STREAM_PARAMETERS_OP(Gt, v1, v2)
+#endif
+
+#define RTC_UNREACHABLE_CODE_HIT false
+#define RTC_NOTREACHED() RTC_DCHECK(RTC_UNREACHABLE_CODE_HIT)
+
+// TODO(bugs.webrtc.org/8454): Add an RTC_ prefix or rename differently.
+#define FATAL()                                                    \
+  rtc::webrtc_checks_impl::FatalLogCall<false>(__FILE__, __LINE__, \
+                                               "FATAL()") &        \
+      rtc::webrtc_checks_impl::LogStreamer<>()
+
+// Performs the integer division a/b and returns the result. CHECKs that the
+// remainder is zero.
+template <typename T>
+inline T CheckedDivExact(T a, T b) {
+  RTC_CHECK_EQ(a % b, 0) << a << " is not evenly divisible by " << b;
+  return a / b;
+}
+
+}  // namespace rtc
+
+#else  // __cplusplus not defined
+// C version. Lacks many features compared to the C++ version, but usage
+// guidelines are the same.
+
+#define RTC_CHECK(condition)                                             \
+  do {                                                                   \
+    if (!(condition)) {                                                  \
+      rtc_FatalMessage(__FILE__, __LINE__, "CHECK failed: " #condition); \
+    }                                                                    \
+  } while (0)
+
+#define RTC_CHECK_EQ(a, b) RTC_CHECK((a) == (b))
+#define RTC_CHECK_NE(a, b) RTC_CHECK((a) != (b))
+#define RTC_CHECK_LE(a, b) RTC_CHECK((a) <= (b))
+#define RTC_CHECK_LT(a, b) RTC_CHECK((a) < (b))
+#define RTC_CHECK_GE(a, b) RTC_CHECK((a) >= (b))
+#define RTC_CHECK_GT(a, b) RTC_CHECK((a) > (b))
+
+#define RTC_DCHECK(condition)                                             \
+  do {                                                                    \
+    if (RTC_DCHECK_IS_ON && !(condition)) {                               \
+      rtc_FatalMessage(__FILE__, __LINE__, "DCHECK failed: " #condition); \
+    }                                                                     \
+  } while (0)
+
+#define RTC_DCHECK_EQ(a, b) RTC_DCHECK((a) == (b))
+#define RTC_DCHECK_NE(a, b) RTC_DCHECK((a) != (b))
+#define RTC_DCHECK_LE(a, b) RTC_DCHECK((a) <= (b))
+#define RTC_DCHECK_LT(a, b) RTC_DCHECK((a) < (b))
+#define RTC_DCHECK_GE(a, b) RTC_DCHECK((a) >= (b))
+#define RTC_DCHECK_GT(a, b) RTC_DCHECK((a) > (b))
+
+#endif  // __cplusplus
+
+#endif  // RTC_BASE_CHECKS_H_
--- a/runtime/engine/asr/server/brpc/paraformerCPP/webrtc/rtc_base/compile_assert_c.h
+++ b/runtime/engine/asr/server/brpc/paraformerCPP/webrtc/rtc_base/compile_assert_c.h
@ -0,0 +1,25 @@
+/*
+ *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef RTC_BASE_COMPILE_ASSERT_C_H_
+#define RTC_BASE_COMPILE_ASSERT_C_H_
+
+// Use this macro to verify at compile time that certain restrictions are met.
+// The argument is the boolean expression to evaluate.
+// Example:
+//   RTC_COMPILE_ASSERT(sizeof(foo) < 128);
+// Note: In C++, use static_assert instead!
+#define RTC_COMPILE_ASSERT(expression) \
+  switch (0) {                         \
+    case 0:                            \
+    case expression:;                  \
+  }
+
+#endif  // RTC_BASE_COMPILE_ASSERT_C_H_
--- a/runtime/engine/asr/server/brpc/paraformerCPP/webrtc/rtc_base/numerics/safe_compare.h
+++ b/runtime/engine/asr/server/brpc/paraformerCPP/webrtc/rtc_base/numerics/safe_compare.h
@ -0,0 +1,176 @@
+/*
+ *  Copyright 2016 The WebRTC Project Authors. All rights reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+// This file defines six constexpr functions:
+//
+//   rtc::SafeEq  // ==
+//   rtc::SafeNe  // !=
+//   rtc::SafeLt  // <
+//   rtc::SafeLe  // <=
+//   rtc::SafeGt  // >
+//   rtc::SafeGe  // >=
+//
+// They each accept two arguments of arbitrary types, and in almost all cases,
+// they simply call the appropriate comparison operator. However, if both
+// arguments are integers, they don't compare them using C++'s quirky rules,
+// but instead adhere to the true mathematical definitions. It is as if the
+// arguments were first converted to infinite-range signed integers, and then
+// compared, although of course nothing expensive like that actually takes
+// place. In practice, for signed/signed and unsigned/unsigned comparisons and
+// some mixed-signed comparisons with a compile-time constant, the overhead is
+// zero; in the remaining cases, it is just a few machine instructions (no
+// branches).
+
+#ifndef RTC_BASE_NUMERICS_SAFE_COMPARE_H_
+#define RTC_BASE_NUMERICS_SAFE_COMPARE_H_
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <type_traits>
+#include <utility>
+
+#include "webrtc/rtc_base/type_traits.h"
+
+namespace rtc {
+
+namespace safe_cmp_impl {
+
+template <size_t N>
+struct LargerIntImpl : std::false_type {};
+template <>
+struct LargerIntImpl<sizeof(int8_t)> : std::true_type {
+  using type = int16_t;
+};
+template <>
+struct LargerIntImpl<sizeof(int16_t)> : std::true_type {
+  using type = int32_t;
+};
+template <>
+struct LargerIntImpl<sizeof(int32_t)> : std::true_type {
+  using type = int64_t;
+};
+
+// LargerInt<T1, T2>::value is true iff there's a signed type that's larger
+// than T1 (and no larger than the larger of T2 and int*, for performance
+// reasons); and if there is such a type, LargerInt<T1, T2>::type is an alias
+// for it.
+template <typename T1, typename T2>
+struct LargerInt
+    : LargerIntImpl<sizeof(T1) < sizeof(T2) || sizeof(T1) < sizeof(int*)
+                        ? sizeof(T1)
+                        : 0> {};
+
+template <typename T>
+constexpr typename std::make_unsigned<T>::type MakeUnsigned(T a) {
+  return static_cast<typename std::make_unsigned<T>::type>(a);
+}
+
+// Overload for when both T1 and T2 have the same signedness.
+template <typename Op,
+          typename T1,
+          typename T2,
+          typename std::enable_if<std::is_signed<T1>::value ==
+                                  std::is_signed<T2>::value>::type* = nullptr>
+constexpr bool Cmp(T1 a, T2 b) {
+  return Op::Op(a, b);
+}
+
+// Overload for signed - unsigned comparison that can be promoted to a bigger
+// signed type.
+template <typename Op,
+          typename T1,
+          typename T2,
+          typename std::enable_if<std::is_signed<T1>::value &&
+                                  std::is_unsigned<T2>::value &&
+                                  LargerInt<T2, T1>::value>::type* = nullptr>
+constexpr bool Cmp(T1 a, T2 b) {
+  return Op::Op(a, static_cast<typename LargerInt<T2, T1>::type>(b));
+}
+
+// Overload for unsigned - signed comparison that can be promoted to a bigger
+// signed type.
+template <typename Op,
+          typename T1,
+          typename T2,
+          typename std::enable_if<std::is_unsigned<T1>::value &&
+                                  std::is_signed<T2>::value &&
+                                  LargerInt<T1, T2>::value>::type* = nullptr>
+constexpr bool Cmp(T1 a, T2 b) {
+  return Op::Op(static_cast<typename LargerInt<T1, T2>::type>(a), b);
+}
+
+// Overload for signed - unsigned comparison that can't be promoted to a bigger
+// signed type.
+template <typename Op,
+          typename T1,
+          typename T2,
+          typename std::enable_if<std::is_signed<T1>::value &&
+                                  std::is_unsigned<T2>::value &&
+                                  !LargerInt<T2, T1>::value>::type* = nullptr>
+constexpr bool Cmp(T1 a, T2 b) {
+  return a < 0 ? Op::Op(-1, 0) : Op::Op(safe_cmp_impl::MakeUnsigned(a), b);
+}
+
+// Overload for unsigned - signed comparison that can't be promoted to a bigger
+// signed type.
+template <typename Op,
+          typename T1,
+          typename T2,
+          typename std::enable_if<std::is_unsigned<T1>::value &&
+                                  std::is_signed<T2>::value &&
+                                  !LargerInt<T1, T2>::value>::type* = nullptr>
+constexpr bool Cmp(T1 a, T2 b) {
+  return b < 0 ? Op::Op(0, -1) : Op::Op(a, safe_cmp_impl::MakeUnsigned(b));
+}
+
+#define RTC_SAFECMP_MAKE_OP(name, op)      \
+  struct name {                            \
+    template <typename T1, typename T2>    \
+    static constexpr bool Op(T1 a, T2 b) { \
+      return a op b;                       \
+    }                                      \
+  };
+RTC_SAFECMP_MAKE_OP(EqOp, ==)
+RTC_SAFECMP_MAKE_OP(NeOp, !=)
+RTC_SAFECMP_MAKE_OP(LtOp, <)
+RTC_SAFECMP_MAKE_OP(LeOp, <=)
+RTC_SAFECMP_MAKE_OP(GtOp, >)
+RTC_SAFECMP_MAKE_OP(GeOp, >=)
+#undef RTC_SAFECMP_MAKE_OP
+
+}  // namespace safe_cmp_impl
+
+#define RTC_SAFECMP_MAKE_FUN(name)                                            \
+  template <typename T1, typename T2>                                         \
+  constexpr                                                                   \
+      typename std::enable_if<IsIntlike<T1>::value && IsIntlike<T2>::value,   \
+                              bool>::type Safe##name(T1 a, T2 b) {            \
+    /* Unary plus here turns enums into real integral types. */               \
+    return safe_cmp_impl::Cmp<safe_cmp_impl::name##Op>(+a, +b);               \
+  }                                                                           \
+  template <typename T1, typename T2>                                         \
+  constexpr                                                                   \
+      typename std::enable_if<!IsIntlike<T1>::value || !IsIntlike<T2>::value, \
+                              bool>::type Safe##name(const T1& a,             \
+                                                     const T2& b) {           \
+    return safe_cmp_impl::name##Op::Op(a, b);                                 \
+  }
+RTC_SAFECMP_MAKE_FUN(Eq)
+RTC_SAFECMP_MAKE_FUN(Ne)
+RTC_SAFECMP_MAKE_FUN(Lt)
+RTC_SAFECMP_MAKE_FUN(Le)
+RTC_SAFECMP_MAKE_FUN(Gt)
+RTC_SAFECMP_MAKE_FUN(Ge)
+#undef RTC_SAFECMP_MAKE_FUN
+
+}  // namespace rtc
+
+#endif  // RTC_BASE_NUMERICS_SAFE_COMPARE_H_
--- a/runtime/engine/asr/server/brpc/paraformerCPP/webrtc/rtc_base/sanitizer.h
+++ b/runtime/engine/asr/server/brpc/paraformerCPP/webrtc/rtc_base/sanitizer.h
@ -0,0 +1,144 @@
+/*
+ *  Copyright 2016 The WebRTC Project Authors. All rights reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef RTC_BASE_SANITIZER_H_
+#define RTC_BASE_SANITIZER_H_
+
+#include <stddef.h>  // For size_t.
+
+#ifdef __cplusplus
+#include <type_traits>
+#endif
+
+#if defined(__has_feature)
+#if __has_feature(address_sanitizer)
+#define RTC_HAS_ASAN 1
+#endif
+#if __has_feature(memory_sanitizer)
+#define RTC_HAS_MSAN 1
+#endif
+#endif
+#ifndef RTC_HAS_ASAN
+#define RTC_HAS_ASAN 0
+#endif
+#ifndef RTC_HAS_MSAN
+#define RTC_HAS_MSAN 0
+#endif
+
+#if RTC_HAS_ASAN
+#include <sanitizer/asan_interface.h>
+#endif
+#if RTC_HAS_MSAN
+#include <sanitizer/msan_interface.h>
+#endif
+
+#ifdef __has_attribute
+#if __has_attribute(no_sanitize)
+#define RTC_NO_SANITIZE(what) __attribute__((no_sanitize(what)))
+#endif
+#endif
+#ifndef RTC_NO_SANITIZE
+#define RTC_NO_SANITIZE(what)
+#endif
+
+// Ask ASan to mark the memory range [ptr, ptr + element_size * num_elements)
+// as being unaddressable, so that reads and writes are not allowed. ASan may
+// narrow the range to the nearest alignment boundaries.
+static inline void rtc_AsanPoison(const volatile void* ptr,
+                                  size_t element_size,
+                                  size_t num_elements) {
+#if RTC_HAS_ASAN
+  ASAN_POISON_MEMORY_REGION(ptr, element_size * num_elements);
+#endif
+}
+
+// Ask ASan to mark the memory range [ptr, ptr + element_size * num_elements)
+// as being addressable, so that reads and writes are allowed. ASan may widen
+// the range to the nearest alignment boundaries.
+static inline void rtc_AsanUnpoison(const volatile void* ptr,
+                                    size_t element_size,
+                                    size_t num_elements) {
+#if RTC_HAS_ASAN
+  ASAN_UNPOISON_MEMORY_REGION(ptr, element_size * num_elements);
+#endif
+}
+
+// Ask MSan to mark the memory range [ptr, ptr + element_size * num_elements)
+// as being uninitialized.
+static inline void rtc_MsanMarkUninitialized(const volatile void* ptr,
+                                             size_t element_size,
+                                             size_t num_elements) {
+#if RTC_HAS_MSAN
+  __msan_poison(ptr, element_size * num_elements);
+#endif
+}
+
+// Force an MSan check (if any bits in the memory range [ptr, ptr +
+// element_size * num_elements) are uninitialized the call will crash with an
+// MSan report).
+static inline void rtc_MsanCheckInitialized(const volatile void* ptr,
+                                            size_t element_size,
+                                            size_t num_elements) {
+#if RTC_HAS_MSAN
+  __msan_check_mem_is_initialized(ptr, element_size * num_elements);
+#endif
+}
+
+#ifdef __cplusplus
+
+namespace rtc {
+namespace sanitizer_impl {
+
+template <typename T>
+constexpr bool IsTriviallyCopyable() {
+  return static_cast<bool>(std::is_trivially_copy_constructible<T>::value &&
+                           (std::is_trivially_copy_assignable<T>::value ||
+                            !std::is_copy_assignable<T>::value) &&
+                           std::is_trivially_destructible<T>::value);
+}
+
+}  // namespace sanitizer_impl
+
+template <typename T>
+inline void AsanPoison(const T& mem) {
+  rtc_AsanPoison(mem.data(), sizeof(mem.data()[0]), mem.size());
+}
+
+template <typename T>
+inline void AsanUnpoison(const T& mem) {
+  rtc_AsanUnpoison(mem.data(), sizeof(mem.data()[0]), mem.size());
+}
+
+template <typename T>
+inline void MsanMarkUninitialized(const T& mem) {
+  rtc_MsanMarkUninitialized(mem.data(), sizeof(mem.data()[0]), mem.size());
+}
+
+template <typename T>
+inline T MsanUninitialized(T t) {
+#if RTC_HAS_MSAN
+  // TODO(bugs.webrtc.org/8762): Switch to std::is_trivially_copyable when it
+  // becomes available in downstream projects.
+  static_assert(sanitizer_impl::IsTriviallyCopyable<T>(), "");
+#endif
+  rtc_MsanMarkUninitialized(&t, sizeof(T), 1);
+  return t;
+}
+
+template <typename T>
+inline void MsanCheckInitialized(const T& mem) {
+  rtc_MsanCheckInitialized(mem.data(), sizeof(mem.data()[0]), mem.size());
+}
+
+}  // namespace rtc
+
+#endif  // __cplusplus
+
+#endif  // RTC_BASE_SANITIZER_H_
--- a/runtime/engine/asr/server/brpc/paraformerCPP/webrtc/rtc_base/system/arch.h
+++ b/runtime/engine/asr/server/brpc/paraformerCPP/webrtc/rtc_base/system/arch.h
@ -0,0 +1,58 @@
+/*
+ *  Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+// This file contains platform-specific typedefs and defines.
+// Much of it is derived from Chromium's build/build_config.h.
+
+#ifndef RTC_BASE_SYSTEM_ARCH_H_
+#define RTC_BASE_SYSTEM_ARCH_H_
+
+// Processor architecture detection.  For more info on what's defined, see:
+//   http://msdn.microsoft.com/en-us/library/b0084kay.aspx
+//   http://www.agner.org/optimize/calling_conventions.pdf
+//   or with gcc, run: "echo | gcc -E -dM -"
+#if defined(_M_X64) || defined(__x86_64__)
+#define WEBRTC_ARCH_X86_FAMILY
+#define WEBRTC_ARCH_X86_64
+#define WEBRTC_ARCH_64_BITS
+#define WEBRTC_ARCH_LITTLE_ENDIAN
+#elif defined(__aarch64__)
+#define WEBRTC_ARCH_ARM_FAMILY
+#define WEBRTC_ARCH_64_BITS
+#define WEBRTC_ARCH_LITTLE_ENDIAN
+#elif defined(_M_IX86) || defined(__i386__)
+#define WEBRTC_ARCH_X86_FAMILY
+#define WEBRTC_ARCH_X86
+#define WEBRTC_ARCH_32_BITS
+#define WEBRTC_ARCH_LITTLE_ENDIAN
+#elif defined(__ARMEL__)
+#define WEBRTC_ARCH_ARM_FAMILY
+#define WEBRTC_ARCH_32_BITS
+#define WEBRTC_ARCH_LITTLE_ENDIAN
+#elif defined(__MIPSEL__)
+#define WEBRTC_ARCH_MIPS_FAMILY
+#if defined(__LP64__)
+#define WEBRTC_ARCH_64_BITS
+#else
+#define WEBRTC_ARCH_32_BITS
+#endif
+#define WEBRTC_ARCH_LITTLE_ENDIAN
+#elif defined(__pnacl__)
+#define WEBRTC_ARCH_32_BITS
+#define WEBRTC_ARCH_LITTLE_ENDIAN
+#else
+#error Please add support for your architecture in typedefs.h
+#endif
+
+#if !(defined(WEBRTC_ARCH_LITTLE_ENDIAN) ^ defined(WEBRTC_ARCH_BIG_ENDIAN))
+#error Define either WEBRTC_ARCH_LITTLE_ENDIAN or WEBRTC_ARCH_BIG_ENDIAN
+#endif
+
+#endif  // RTC_BASE_SYSTEM_ARCH_H_
--- a/runtime/engine/asr/server/brpc/paraformerCPP/webrtc/rtc_base/system/inline.h
+++ b/runtime/engine/asr/server/brpc/paraformerCPP/webrtc/rtc_base/system/inline.h
@ -0,0 +1,31 @@
+/*
+ *  Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef RTC_BASE_SYSTEM_INLINE_H_
+#define RTC_BASE_SYSTEM_INLINE_H_
+
+#if defined(_MSC_VER)
+
+#define RTC_FORCE_INLINE __forceinline
+#define RTC_NO_INLINE __declspec(noinline)
+
+#elif defined(__GNUC__)
+
+#define RTC_FORCE_INLINE __attribute__((__always_inline__))
+#define RTC_NO_INLINE __attribute__((__noinline__))
+
+#else
+
+#define RTC_FORCE_INLINE
+#define RTC_NO_INLINE
+
+#endif
+
+#endif  // RTC_BASE_SYSTEM_INLINE_H_
--- a/runtime/engine/asr/server/brpc/paraformerCPP/webrtc/rtc_base/type_traits.h
+++ b/runtime/engine/asr/server/brpc/paraformerCPP/webrtc/rtc_base/type_traits.h
@ -0,0 +1,140 @@
+/*
+ *  Copyright 2016 The WebRTC Project Authors. All rights reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef RTC_BASE_TYPE_TRAITS_H_
+#define RTC_BASE_TYPE_TRAITS_H_
+
+#include <cstddef>
+#include <type_traits>
+
+namespace rtc {
+
+// Determines if the given class has zero-argument .data() and .size() methods
+// whose return values are convertible to T* and size_t, respectively.
+template <typename DS, typename T>
+class HasDataAndSize {
+ private:
+  template <
+      typename C,
+      typename std::enable_if<
+          std::is_convertible<decltype(std::declval<C>().data()), T*>::value &&
+          std::is_convertible<decltype(std::declval<C>().size()),
+                              std::size_t>::value>::type* = nullptr>
+  static int Test(int);
+
+  template <typename>
+  static char Test(...);
+
+ public:
+  static constexpr bool value = std::is_same<decltype(Test<DS>(0)), int>::value;
+};
+
+namespace test_has_data_and_size {
+
+template <typename DR, typename SR>
+struct Test1 {
+  DR data();
+  SR size();
+};
+static_assert(HasDataAndSize<Test1<int*, int>, int>::value, "");
+static_assert(HasDataAndSize<Test1<int*, int>, const int>::value, "");
+static_assert(HasDataAndSize<Test1<const int*, int>, const int>::value, "");
+static_assert(!HasDataAndSize<Test1<const int*, int>, int>::value,
+              "implicit cast of const int* to int*");
+static_assert(!HasDataAndSize<Test1<char*, size_t>, int>::value,
+              "implicit cast of char* to int*");
+
+struct Test2 {
+  int* data;
+  size_t size;
+};
+static_assert(!HasDataAndSize<Test2, int>::value,
+              ".data and .size aren't functions");
+
+struct Test3 {
+  int* data();
+};
+static_assert(!HasDataAndSize<Test3, int>::value, ".size() is missing");
+
+class Test4 {
+  int* data();
+  size_t size();
+};
+static_assert(!HasDataAndSize<Test4, int>::value,
+              ".data() and .size() are private");
+
+}  // namespace test_has_data_and_size
+
+namespace type_traits_impl {
+
+// Determines if the given type is an enum that converts implicitly to
+// an integral type.
+template <typename T>
+struct IsIntEnum {
+ private:
+  // This overload is used if the type is an enum, and unary plus
+  // compiles and turns it into an integral type.
+  template <typename X,
+            typename std::enable_if<
+                std::is_enum<X>::value &&
+                std::is_integral<decltype(+std::declval<X>())>::value>::type* =
+                nullptr>
+  static int Test(int);
+
+  // Otherwise, this overload is used.
+  template <typename>
+  static char Test(...);
+
+ public:
+  static constexpr bool value =
+      std::is_same<decltype(Test<typename std::remove_reference<T>::type>(0)),
+                   int>::value;
+};
+
+}  // namespace type_traits_impl
+
+// Determines if the given type is integral, or an enum that
+// converts implicitly to an integral type.
+template <typename T>
+struct IsIntlike {
+ private:
+  using X = typename std::remove_reference<T>::type;
+
+ public:
+  static constexpr bool value =
+      std::is_integral<X>::value || type_traits_impl::IsIntEnum<X>::value;
+};
+
+namespace test_enum_intlike {
+
+enum E1 { e1 };
+enum { e2 };
+enum class E3 { e3 };
+struct S {};
+
+static_assert(type_traits_impl::IsIntEnum<E1>::value, "");
+static_assert(type_traits_impl::IsIntEnum<decltype(e2)>::value, "");
+static_assert(!type_traits_impl::IsIntEnum<E3>::value, "");
+static_assert(!type_traits_impl::IsIntEnum<int>::value, "");
+static_assert(!type_traits_impl::IsIntEnum<float>::value, "");
+static_assert(!type_traits_impl::IsIntEnum<S>::value, "");
+
+static_assert(IsIntlike<E1>::value, "");
+static_assert(IsIntlike<decltype(e2)>::value, "");
+static_assert(!IsIntlike<E3>::value, "");
+static_assert(IsIntlike<int>::value, "");
+static_assert(!IsIntlike<float>::value, "");
+static_assert(!IsIntlike<S>::value, "");
+
+}  // namespace test_enum_intlike
+
+}  // namespace rtc
+
+#endif  // RTC_BASE_TYPE_TRAITS_H_
--- a/runtime/engine/asr/server/brpc/paraformerCPP/webrtc/system_wrappers/include/cpu_features_wrapper.h
+++ b/runtime/engine/asr/server/brpc/paraformerCPP/webrtc/system_wrappers/include/cpu_features_wrapper.h
@ -0,0 +1,48 @@
+/*
+ *  Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef SYSTEM_WRAPPERS_INCLUDE_CPU_FEATURES_WRAPPER_H_
+#define SYSTEM_WRAPPERS_INCLUDE_CPU_FEATURES_WRAPPER_H_
+
+#include <stdint.h>
+
+#if defined(__cplusplus) || defined(c_plusplus)
+extern "C" {
+#endif
+
+// List of features in x86.
+typedef enum { kSSE2, kSSE3 } CPUFeature;
+
+// List of features in ARM.
+enum {
+  kCPUFeatureARMv7 = (1 << 0),
+  kCPUFeatureVFPv3 = (1 << 1),
+  kCPUFeatureNEON = (1 << 2),
+  kCPUFeatureLDREXSTREX = (1 << 3)
+};
+
+typedef int (*WebRtc_CPUInfo)(CPUFeature feature);
+
+// Returns true if the CPU supports the feature.
+extern WebRtc_CPUInfo WebRtc_GetCPUInfo;
+
+// No CPU feature is available => straight C path.
+extern WebRtc_CPUInfo WebRtc_GetCPUInfoNoASM;
+
+// Return the features in an ARM device.
+// It detects the features in the hardware platform, and returns supported
+// values in the above enum definition as a bitmask.
+extern uint64_t WebRtc_GetCPUFeaturesARM(void);
+
+#if defined(__cplusplus) || defined(c_plusplus)
+}  // extern "C"
+#endif
+
+#endif  // SYSTEM_WRAPPERS_INCLUDE_CPU_FEATURES_WRAPPER_H_
--- a/runtime/engine/asr/server/brpc/paraformerCPP/webrtc/typedefs.h
+++ b/runtime/engine/asr/server/brpc/paraformerCPP/webrtc/typedefs.h
@ -0,0 +1,149 @@
+/*
+ *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+// This file contains platform-specific typedefs and defines.
+// Much of it is derived from Chromium's build/build_config.h.
+
+#ifndef WEBRTC_TYPEDEFS_H_
+#define WEBRTC_TYPEDEFS_H_
+
+// Processor architecture detection.  For more info on what's defined, see:
+//   http://msdn.microsoft.com/en-us/library/b0084kay.aspx
+//   http://www.agner.org/optimize/calling_conventions.pdf
+//   or with gcc, run: "echo | gcc -E -dM -"
+#if defined(_M_X64) || defined(__x86_64__)
+#define WEBRTC_ARCH_X86_FAMILY
+#define WEBRTC_ARCH_X86_64
+#define WEBRTC_ARCH_64_BITS
+#define WEBRTC_ARCH_LITTLE_ENDIAN
+#elif defined(__aarch64__)
+#define WEBRTC_ARCH_64_BITS
+#define WEBRTC_ARCH_LITTLE_ENDIAN
+#elif defined(_M_IX86) || defined(__i386__)
+#define WEBRTC_ARCH_X86_FAMILY
+#define WEBRTC_ARCH_X86
+#define WEBRTC_ARCH_32_BITS
+#define WEBRTC_ARCH_LITTLE_ENDIAN
+#elif defined(__ARMEL__)
+// TODO(ajm): We'd prefer to control platform defines here, but this is
+// currently provided by the Android makefiles. Commented to avoid duplicate
+// definition warnings.
+//#define WEBRTC_ARCH_ARM
+// TODO(ajm): Chromium uses the following two defines. Should we switch?
+//#define WEBRTC_ARCH_ARM_FAMILY
+//#define WEBRTC_ARCH_ARMEL
+#define WEBRTC_ARCH_32_BITS
+#define WEBRTC_ARCH_LITTLE_ENDIAN
+#elif defined(__MIPSEL__)
+#define WEBRTC_ARCH_32_BITS
+#define WEBRTC_ARCH_LITTLE_ENDIAN
+#elif defined(__pnacl__)
+#define WEBRTC_ARCH_32_BITS
+#define WEBRTC_ARCH_LITTLE_ENDIAN
+#elif defined(__PPC__)
+#if defined(__PPC64__)
+#define WEBRTC_ARCH_64_BITS
+#else
+#define WEBRTC_ARCH_32_BITS
+#endif
+#define WEBRTC_ARCH_BIG_ENDIAN
+#else
+#error Please add support for your architecture in typedefs.h
+#endif
+
+#if !(defined(WEBRTC_ARCH_LITTLE_ENDIAN) ^ defined(WEBRTC_ARCH_BIG_ENDIAN))
+#error Define either WEBRTC_ARCH_LITTLE_ENDIAN or WEBRTC_ARCH_BIG_ENDIAN
+#endif
+
+#if (defined(WEBRTC_ARCH_X86_FAMILY) && !defined(__SSE2__)) ||  \
+    (defined(WEBRTC_ARCH_ARM_V7) && !defined(WEBRTC_ARCH_ARM_NEON))
+#define WEBRTC_CPU_DETECTION
+#endif
+
+#if !defined(_MSC_VER)
+#include <stdint.h>
+#else
+// Define C99 equivalent types, since pre-2010 MSVC doesn't provide stdint.h.
+typedef signed char         int8_t;
+typedef signed short        int16_t;
+typedef signed int          int32_t;
+typedef __int64             int64_t;
+typedef unsigned char       uint8_t;
+typedef unsigned short      uint16_t;
+typedef unsigned int        uint32_t;
+typedef unsigned __int64    uint64_t;
+#endif
+
+// Borrowed from Chromium's base/compiler_specific.h.
+// Annotate a virtual method indicating it must be overriding a virtual
+// method in the parent class.
+// Use like:
+//   virtual void foo() OVERRIDE;
+#if defined(_MSC_VER)
+#define OVERRIDE override
+#elif defined(__clang__)
+// Clang defaults to C++03 and warns about using override. Squelch that.
+// Intentionally no push/pop here so all users of OVERRIDE ignore the warning
+// too. This is like passing -Wno-c++11-extensions, except that GCC won't die
+// (because it won't see this pragma).
+#pragma clang diagnostic ignored "-Wc++11-extensions"
+#define OVERRIDE override
+#elif defined(__GNUC__) && __cplusplus >= 201103 && \
+    (__GNUC__ * 10000 + __GNUC_MINOR__ * 100) >= 40700
+// GCC 4.7 supports explicit virtual overrides when C++11 support is enabled.
+#define OVERRIDE override
+#else
+#define OVERRIDE
+#endif
+
+// Annotate a function indicating the caller must examine the return value.
+// Use like:
+//   int foo() WARN_UNUSED_RESULT;
+// TODO(ajm): Hack to avoid multiple definitions until the base/ of webrtc and
+// libjingle are merged.
+#if !defined(WARN_UNUSED_RESULT)
+#if defined(__GNUC__)
+#define WARN_UNUSED_RESULT __attribute__((warn_unused_result))
+#else
+#define WARN_UNUSED_RESULT
+#endif
+#endif  // WARN_UNUSED_RESULT
+
+// Put after a variable that might not be used, to prevent compiler warnings:
+//   int result ATTRIBUTE_UNUSED = DoSomething();
+//   assert(result == 17);
+#ifndef ATTRIBUTE_UNUSED
+#if defined(__GNUC__) || defined(__clang__)
+#define ATTRIBUTE_UNUSED __attribute__((unused))
+#else
+#define ATTRIBUTE_UNUSED
+#endif
+#endif
+
+// Macro to be used for switch-case fallthrough (required for enabling
+// -Wimplicit-fallthrough warning on Clang).
+#ifndef FALLTHROUGH
+#if defined(__clang__)
+#define FALLTHROUGH() [[clang::fallthrough]]
+#else
+#define FALLTHROUGH() do { } while (0)
+#endif
+#endif
+
+// Annotate a function that will not return control flow to the caller.
+#if defined(_MSC_VER)
+#define NO_RETURN __declspec(noreturn)
+#elif defined(__GNUC__)
+#define NO_RETURN __attribute__((noreturn))
+#else
+#define NO_RETURN
+#endif
+
+#endif  // WEBRTC_TYPEDEFS_H_
--- a/runtime/engine/asr/server/brpc/paraformerCPP/webrtc_vad.h
+++ b/runtime/engine/asr/server/brpc/paraformerCPP/webrtc_vad.h
@ -0,0 +1,87 @@
+/*
+ *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+/*
+ * This header file includes the VAD API calls. Specific function calls are
+ * given below.
+ */
+
+#ifndef COMMON_AUDIO_VAD_INCLUDE_WEBRTC_VAD_H_  // NOLINT
+#define COMMON_AUDIO_VAD_INCLUDE_WEBRTC_VAD_H_
+
+#include <stddef.h>
+#include <stdint.h>
+
+typedef struct WebRtcVadInst VadInst;
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+// Creates an instance to the VAD structure.
+VadInst* WebRtcVad_Create(void);
+
+// Frees the dynamic memory of a specified VAD instance.
+//
+// - handle [i] : Pointer to VAD instance that should be freed.
+void WebRtcVad_Free(VadInst* handle);
+
+// Initializes a VAD instance.
+//
+// - handle [i/o] : Instance that should be initialized.
+//
+// returns        : 0 - (OK),
+//                 -1 - (null pointer or Default mode could not be set).
+int WebRtcVad_Init(VadInst* handle);
+
+// Sets the VAD operating mode. A more aggressive (higher mode) VAD is more
+// restrictive in reporting speech. Put in other words the probability of being
+// speech when the VAD returns 1 is increased with increasing mode. As a
+// consequence also the missed detection rate goes up.
+//
+// - handle [i/o] : VAD instance.
+// - mode   [i]   : Aggressiveness mode (0, 1, 2, or 3).
+//
+// returns        : 0 - (OK),
+//                 -1 - (null pointer, mode could not be set or the VAD instance
+//                       has not been initialized).
+int WebRtcVad_set_mode(VadInst* handle, int mode);
+
+// Calculates a VAD decision for the |audio_frame|. For valid sampling rates
+// frame lengths, see the description of WebRtcVad_ValidRatesAndFrameLengths().
+//
+// - handle       [i/o] : VAD Instance. Needs to be initialized by
+//                        WebRtcVad_Init() before call.
+// - fs           [i]   : Sampling frequency (Hz): 8000, 16000, or 32000
+// - audio_frame  [i]   : Audio frame buffer.
+// - frame_length [i]   : Length of audio frame buffer in number of samples.
+//
+// returns              : 1 - (Active Voice),
+//                        0 - (Non-active Voice),
+//                       -1 - (Error)
+int WebRtcVad_Process(VadInst* handle,
+                      int fs,
+                      const int16_t* audio_frame,
+                      size_t frame_length);
+
+// Checks for valid combinations of |rate| and |frame_length|. We support 10,
+// 20 and 30 ms frames and the rates 8000, 16000 and 32000 Hz.
+//
+// - rate         [i] : Sampling frequency (Hz).
+// - frame_length [i] : Speech frame buffer length in number of samples.
+//
+// returns            : 0 - (valid combination), -1 - (invalid combination)
+int WebRtcVad_ValidRateAndFrameLength(int rate, size_t frame_length);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif  // COMMON_AUDIO_VAD_INCLUDE_WEBRTC_VAD_H_  // NOLINT
--- a/runtime/engine/asr/server/brpc/paraformerCPP/win_func.h
+++ b/runtime/engine/asr/server/brpc/paraformerCPP/win_func.h
@ -0,0 +1,28 @@
+#include <time.h>
+#ifdef WIN32
+#include <windows.h>
+#else
+#include <sys/time.h>
+#endif
+#ifdef WIN32
+int gettimeofday(struct timeval* tp, void* tzp)
+{
+	time_t clock;
+	struct tm tm;
+	SYSTEMTIME wtm;
+
+	GetLocalTime(&wtm);
+	tm.tm_year = wtm.wYear - 1900;
+	tm.tm_mon = wtm.wMonth - 1;
+	tm.tm_mday = wtm.wDay;
+	tm.tm_hour = wtm.wHour;
+	tm.tm_min = wtm.wMinute;
+	tm.tm_sec = wtm.wSecond;
+	tm.tm_isdst = -1;
+
+	clock = mktime(&tm);
+	tp->tv_sec = clock;
+	tp->tv_usec = wtm.wMilliseconds * 1000;
+	return (0);
+}
+#endif
--- a/runtime/engine/asr/server/brpc/proto/echo.proto
+++ b/runtime/engine/asr/server/brpc/proto/echo.proto
@ -0,0 +1,22 @@
+syntax="proto2";
+package example;
+
+option cc_generic_services = true;
+
+
+
+message AudioRequest {
+      required string audio = 1;
+      required string extra = 2; 
+};
+
+message AudioResponse {
+      required int32 err_no = 1;
+      required string err_msg = 2;
+      required string result = 3;
+      required float cost_time = 4;
+};
+
+service EchoService {
+      rpc audiorecognition(AudioRequest) returns (AudioResponse);
+};