diff --git a/runtime/engine/asr/server/brpc/BCLOUD b/runtime/engine/asr/server/brpc/BCLOUD new file mode 100644 index 00000000..423ec880 --- /dev/null +++ b/runtime/engine/asr/server/brpc/BCLOUD @@ -0,0 +1,22 @@ +#edit-mode: -*- python -*- +#coding:gbk + +WORKROOT('../../../') +GCC('gcc82') + +CPPFLAGS('-g -DNDEBUG -pipe -W -Wall -Werror -fPIC -Wno-deprecated -Wno-unused-parameter -fno-omit-frame-pointer -D__const__= -std=c++11 -D__STDC_FORMAT_MACROS -DBAIDU_RPC_ENABLE_CPU_PROFILER -DBAIDU_RPC_ENABLE_HEAP_PROFILER') +LDFLAGS('-lpthread -pthread -lrt -ldl -lz') + +CONFIGS('baidu/base/baidu-rpc@stable') +CONFIGS('baidu/third-party/openssl@openssl_V1.0.2.10_GCC820_6U3_K2_GEN_PD_BL@git_tag') +CONFIGS('baidu/third-party/tcmalloc@tcmalloc_V2.7.0.7_GCC820_4U3_K3_GEN_PD_BL@git_tag') +CONFIGS("baidu/third-party/protobuf@protobuf_V2.6.1.1_GCC820_4U3_K3_GEN_PD_BL@git_tag") +CONFIGS('baidu/base/ullib@stable') + +HEADERS(GLOB_GEN_SRCS('./proto/*.pb.h'), '$INC') +PROTOC(ENV.WorkRoot() + '/third-64/protobuf/bin/protoc') +PROTOFLAGS('-I../../../third-64/protobuf/include') +PROTOFLAGS('--proto_path=.', '--cpp_out=.') + + +Directory("paraformerCPP") \ No newline at end of file diff --git a/runtime/engine/asr/server/brpc/README.md b/runtime/engine/asr/server/brpc/README.md new file mode 100644 index 00000000..eba9752f --- /dev/null +++ b/runtime/engine/asr/server/brpc/README.md @@ -0,0 +1,4 @@ +## Extra Description +this codes are using BCLOUD to compile, and this method does not satisfy the rule of repos. + + diff --git a/runtime/engine/asr/server/brpc/paraformerCPP/Audio.cpp b/runtime/engine/asr/server/brpc/paraformerCPP/Audio.cpp new file mode 100644 index 00000000..dfe28c5c --- /dev/null +++ b/runtime/engine/asr/server/brpc/paraformerCPP/Audio.cpp @@ -0,0 +1,232 @@ +#include +#include +#include +#include +#include +#include +#include "ComDefine.h" +#include "Audio.h" + +using namespace std; + +class AudioWindow { + private: + int *window; + int in_idx; + int out_idx; + int sum; + int window_size = 0; + + public: + AudioWindow(int window_size) : window_size(window_size) + { + window = (int *)calloc(sizeof(int), window_size + 1); + in_idx = 0; + out_idx = 1; + sum = 0; + }; + ~AudioWindow(){ + free(window); + }; + int put(int val) + { + sum = sum + val - window[out_idx]; + window[in_idx] = val; + in_idx = in_idx == window_size ? 0 : in_idx + 1; + out_idx = out_idx == window_size ? 0 : out_idx + 1; + return sum; + }; +}; + +AudioFrame::AudioFrame(){}; +AudioFrame::AudioFrame(int len) : len(len) +{ + start = 0; +}; +AudioFrame::~AudioFrame(){}; +int AudioFrame::set_start(int val) +{ + start = val < 0 ? 0 : val; + return start; +}; + +int AudioFrame::set_end(int val, int max_len) +{ + + float num_samples = val - start; + float frame_length = 400; + float frame_shift = 160; + float num_new_samples = + ceil((num_samples - frame_length) / frame_shift) * frame_shift + frame_length; + + end = start + num_new_samples; + len = (int)num_new_samples; + if (end > max_len){ + printf("frame end > max_len!!!!!!!\n"); + } + + return end; +}; + +int AudioFrame::get_start() +{ + return start; +}; + +int AudioFrame::get_len() +{ + return len; +}; + +int AudioFrame::disp() +{ + printf("not imp!!!!\n"); + + return 0; +}; + +Audio::Audio(int data_type) : data_type(data_type) +{ + speech_buff = NULL; + align_size = 1360; +} + +Audio::Audio(int data_type, int size) : data_type(data_type) +{ + speech_buff = NULL; + align_size = (float)size; +} + +Audio::~Audio() +{ + if (speech_buff != NULL) { + free(speech_buff); + speech_data.clear(); + } +} + +void Audio::disp() +{ + printf("Audio time is %f s. len is %d\n", (float)speech_len / 16000, + speech_len); +} + +void Audio::loadwavfrommem(AudioFileaudio) +{ + if (speech_buff != NULL) { + free(speech_buff); + speech_data.clear(); + } + int wav_length = audio.getNumSamplesPerChannel(); + int channelNum = audio.getNumChannels(); + + speech_len = wav_length * channelNum; + printf("wav_length:%d, channelNum: %d", wav_length, channelNum); + + speech_align_len = (int)(ceil((float)speech_len / align_size) * align_size); + speech_buff = (int16_t *)malloc(sizeof(int16_t) * speech_align_len); + memset(speech_buff, 0, sizeof(int16_t) * speech_align_len); + + for (int i = 0; i < wav_length; i++) + { + for (int channel = 0; channel < channelNum; channel++) + { + speech_buff[i * channelNum + channel] = (int16_t)(audio.samples[channel][i] * 32768); + } + } + + for (int i = 0; i < speech_len; i++) { + float temp = (float)speech_buff[i]; + speech_data.emplace_back(temp); + } + + AudioFrame *frame = new AudioFrame(speech_len); + frame_queue.push(frame); +} + +int Audio::fetch(vector &dout, int &len, int &flag) +{ + if (frame_queue.size() > 0) { + AudioFrame *frame = frame_queue.front(); + frame_queue.pop(); + len = frame->get_len(); + int speech_len = speech_data.size(); + auto last = min(speech_len, frame->get_start() + len); + dout.insert(dout.begin(), speech_data.begin() + frame->get_start(), speech_data.begin() + last); + delete frame; + flag = S_END; + return 1; + } else { + return 0; + } +} + + +#define UNTRIGGERED 0 +#define TRIGGERED 1 + +#define SPEECH_LEN_5S (16000 * 5) +#define SPEECH_LEN_10S (16000 * 10) +#define SPEECH_LEN_15S (16000 * 15) +#define SPEECH_LEN_20S (16000 * 20) +#define SPEECH_LEN_30S (16000 * 30) +#define SPEECH_LEN_60S (16000 * 60) + +void Audio::split() +{ + VadInst *handle = WebRtcVad_Create(); + WebRtcVad_Init(handle); + WebRtcVad_set_mode(handle, 2); + int window_size = 10; + AudioWindow audiowindow(window_size); + int status = UNTRIGGERED; + int offset = 0; + int fs = 16000; + int step = 160; + + AudioFrame *frame; + + frame = frame_queue.front(); + frame_queue.pop(); + delete frame; + + while (offset < speech_len - step) { + int n = WebRtcVad_Process(handle, fs, speech_buff + offset, step); + + if (status == UNTRIGGERED && audiowindow.put(n) >= window_size - 1) { + frame = new AudioFrame(); + int start = offset - step * (window_size - 1); + frame->set_start(start); + status = TRIGGERED; + } else if (status == TRIGGERED) { + int win_weight = audiowindow.put(n); + int voice_len = (offset - frame->get_start()); + int gap = 0; + if (voice_len < SPEECH_LEN_5S) { + offset += step; + continue; + } else if (voice_len < SPEECH_LEN_10S) { + gap = 1; + } else if (voice_len < SPEECH_LEN_20S) { + gap = window_size / 5; + } else { + gap = window_size - 1; + } + + if (win_weight < gap || voice_len >= SPEECH_LEN_15S) { + status = UNTRIGGERED; + offset = frame->set_end(offset, speech_align_len); + frame_queue.push(frame); + frame = NULL; + } + } + offset += step; + } + + if (frame != NULL) { + frame->set_end(speech_len, speech_align_len); + frame_queue.push(frame); + frame = NULL; + } + WebRtcVad_Free(handle); +} diff --git a/runtime/engine/asr/server/brpc/paraformerCPP/Audio.h b/runtime/engine/asr/server/brpc/paraformerCPP/Audio.h new file mode 100644 index 00000000..77ebaa69 --- /dev/null +++ b/runtime/engine/asr/server/brpc/paraformerCPP/Audio.h @@ -0,0 +1,55 @@ + +#ifndef AUDIO_H +#define AUDIO_H + +#include +#include +#include +#include +using namespace std; + +class AudioFrame { + private: + int start; + int end; + int len; + + public: + AudioFrame(); + AudioFrame(int len); + + ~AudioFrame(); + int set_start(int val); + int set_end(int val, int max_len); + int get_start(); + int get_len(); + int disp(); +}; + +class Audio { + private: + vector speech_data; + int16_t *speech_buff; + int speech_len; + int speech_align_len; + int16_t sample_rate; + int offset; + float align_size; + int data_type; + queue frame_queue; + + public: + vector speech_vec; + Audio(int data_type); + Audio(int data_type, int size); + ~Audio(); + void disp(); + void loadwav(const char *filename); + void loadwavfrommem(AudioFileaudio); + int fetch_chunck(float *&dout, int len); + int fetch(vector &dout, int &len, int &flag); + void padding(); + void split(); +}; + +#endif diff --git a/runtime/engine/asr/server/brpc/paraformerCPP/AudioFile.h b/runtime/engine/asr/server/brpc/paraformerCPP/AudioFile.h new file mode 100644 index 00000000..c60bb945 --- /dev/null +++ b/runtime/engine/asr/server/brpc/paraformerCPP/AudioFile.h @@ -0,0 +1,1319 @@ +//======================================================================= +/** @file AudioFile.h + * @author Adam Stark + * @copyright Copyright (C) 2017 Adam Stark + * + * This file is part of the 'AudioFile' library + * + * MIT License + * + * Copyright (c) 2017 Adam Stark + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is furnished to do so, + * subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, + * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A + * PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +//======================================================================= + +#ifndef _AS_AudioFile_h +#define _AS_AudioFile_h + +#if defined (_MSC_VER) +#undef max +#undef min +#define NOMINMAX +#endif + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +// disable some warnings on Windows +#if defined (_MSC_VER) + __pragma(warning (push)) + __pragma(warning (disable : 4244)) + __pragma(warning (disable : 4457)) + __pragma(warning (disable : 4458)) + __pragma(warning (disable : 4389)) + __pragma(warning (disable : 4996)) +#elif defined (__GNUC__) + _Pragma("GCC diagnostic push") + _Pragma("GCC diagnostic ignored \"-Wconversion\"") + _Pragma("GCC diagnostic ignored \"-Wsign-compare\"") + _Pragma("GCC diagnostic ignored \"-Wshadow\"") +#endif + +//============================================================= +/** The different types of audio file, plus some other types to + * indicate a failure to load a file, or that one hasn't been + * loaded yet + */ +enum class AudioFileFormat +{ + Error, + NotLoaded, + Wave, + Aiff +}; + +//============================================================= +template +class AudioFile +{ +public: + + //============================================================= + typedef std::vector > AudioBuffer; + + //============================================================= + /** Constructor */ + AudioFile(); + + /** Constructor, using a given file path to load a file */ + AudioFile (std::string filePath); + + //============================================================= + /** Loads an audio file from a given file path. + * @Returns true if the file was successfully loaded + */ + bool load (std::string filePath); + + /** Saves an audio file to a given file path. + * @Returns true if the file was successfully saved + */ + bool save (std::string filePath, AudioFileFormat format = AudioFileFormat::Wave); + + //============================================================= + /** Loads an audio file from data in memory */ + bool loadFromMemory (std::vector& fileData); + + //============================================================= + /** @Returns the sample rate */ + uint32_t getSampleRate() const; + + /** @Returns the number of audio channels in the buffer */ + int getNumChannels() const; + + /** @Returns true if the audio file is mono */ + bool isMono() const; + + /** @Returns true if the audio file is stereo */ + bool isStereo() const; + + /** @Returns the bit depth of each sample */ + int getBitDepth() const; + + /** @Returns the number of samples per channel */ + int getNumSamplesPerChannel() const; + + /** @Returns the length in seconds of the audio file based on the number of samples and sample rate */ + double getLengthInSeconds() const; + + /** Prints a summary of the audio file to the console */ + void printSummary() const; + + //============================================================= + + /** Set the audio buffer for this AudioFile by copying samples from another buffer. + * @Returns true if the buffer was copied successfully. + */ + bool setAudioBuffer (AudioBuffer& newBuffer); + + /** Sets the audio buffer to a given number of channels and number of samples per channel. This will try to preserve + * the existing audio, adding zeros to any new channels or new samples in a given channel. + */ + void setAudioBufferSize (int numChannels, int numSamples); + + /** Sets the number of samples per channel in the audio buffer. This will try to preserve + * the existing audio, adding zeros to new samples in a given channel if the number of samples is increased. + */ + void setNumSamplesPerChannel (int numSamples); + + /** Sets the number of channels. New channels will have the correct number of samples and be initialised to zero */ + void setNumChannels (int numChannels); + + /** Sets the bit depth for the audio file. If you use the save() function, this bit depth rate will be used */ + void setBitDepth (int numBitsPerSample); + + /** Sets the sample rate for the audio file. If you use the save() function, this sample rate will be used */ + void setSampleRate (uint32_t newSampleRate); + + //============================================================= + /** Sets whether the library should log error messages to the console. By default this is true */ + void shouldLogErrorsToConsole (bool logErrors); + + //============================================================= + /** A vector of vectors holding the audio samples for the AudioFile. You can + * access the samples by channel and then by sample index, i.e: + * + * samples[channel][sampleIndex] + */ + AudioBuffer samples; + + //============================================================= + /** An optional iXML chunk that can be added to the AudioFile. + */ + std::string iXMLChunk; + +private: + + //============================================================= + enum class Endianness + { + LittleEndian, + BigEndian + }; + + //============================================================= + AudioFileFormat determineAudioFileFormat (std::vector& fileData); + bool decodeWaveFile (std::vector& fileData); + bool decodeAiffFile (std::vector& fileData); + + //============================================================= + bool saveToWaveFile (std::string filePath); + bool saveToAiffFile (std::string filePath); + + //============================================================= + void clearAudioBuffer(); + + //============================================================= + int32_t fourBytesToInt (std::vector& source, int startIndex, Endianness endianness = Endianness::LittleEndian); + int16_t twoBytesToInt (std::vector& source, int startIndex, Endianness endianness = Endianness::LittleEndian); + int getIndexOfString (std::vector& source, std::string s); + int getIndexOfChunk (std::vector& source, const std::string& chunkHeaderID, int startIndex, Endianness endianness = Endianness::LittleEndian); + + //============================================================= + T sixteenBitIntToSample (int16_t sample); + int16_t sampleToSixteenBitInt (T sample); + + //============================================================= + uint8_t sampleToSingleByte (T sample); + T singleByteToSample (uint8_t sample); + + uint32_t getAiffSampleRate (std::vector& fileData, int sampleRateStartIndex); + bool tenByteMatch (std::vector& v1, int startIndex1, std::vector& v2, int startIndex2); + void addSampleRateToAiffData (std::vector& fileData, uint32_t sampleRate); + T clamp (T v1, T minValue, T maxValue); + + //============================================================= + void addStringToFileData (std::vector& fileData, std::string s); + void addInt32ToFileData (std::vector& fileData, int32_t i, Endianness endianness = Endianness::LittleEndian); + void addInt16ToFileData (std::vector& fileData, int16_t i, Endianness endianness = Endianness::LittleEndian); + + //============================================================= + bool writeDataToFile (std::vector& fileData, std::string filePath); + + //============================================================= + void reportError (std::string errorMessage); + + //============================================================= + AudioFileFormat audioFileFormat; + uint32_t sampleRate; + int bitDepth; + bool logErrorsToConsole {true}; +}; + + +//============================================================= +// Pre-defined 10-byte representations of common sample rates +static std::unordered_map > aiffSampleRateTable = { + {8000, {64, 11, 250, 0, 0, 0, 0, 0, 0, 0}}, + {11025, {64, 12, 172, 68, 0, 0, 0, 0, 0, 0}}, + {16000, {64, 12, 250, 0, 0, 0, 0, 0, 0, 0}}, + {22050, {64, 13, 172, 68, 0, 0, 0, 0, 0, 0}}, + {32000, {64, 13, 250, 0, 0, 0, 0, 0, 0, 0}}, + {37800, {64, 14, 147, 168, 0, 0, 0, 0, 0, 0}}, + {44056, {64, 14, 172, 24, 0, 0, 0, 0, 0, 0}}, + {44100, {64, 14, 172, 68, 0, 0, 0, 0, 0, 0}}, + {47250, {64, 14, 184, 146, 0, 0, 0, 0, 0, 0}}, + {48000, {64, 14, 187, 128, 0, 0, 0, 0, 0, 0}}, + {50000, {64, 14, 195, 80, 0, 0, 0, 0, 0, 0}}, + {50400, {64, 14, 196, 224, 0, 0, 0, 0, 0, 0}}, + {88200, {64, 15, 172, 68, 0, 0, 0, 0, 0, 0}}, + {96000, {64, 15, 187, 128, 0, 0, 0, 0, 0, 0}}, + {176400, {64, 16, 172, 68, 0, 0, 0, 0, 0, 0}}, + {192000, {64, 16, 187, 128, 0, 0, 0, 0, 0, 0}}, + {352800, {64, 17, 172, 68, 0, 0, 0, 0, 0, 0}}, + {2822400, {64, 20, 172, 68, 0, 0, 0, 0, 0, 0}}, + {5644800, {64, 21, 172, 68, 0, 0, 0, 0, 0, 0}} +}; + +//============================================================= +enum WavAudioFormat +{ + PCM = 0x0001, + IEEEFloat = 0x0003, + ALaw = 0x0006, + MULaw = 0x0007, + Extensible = 0xFFFE +}; + +//============================================================= +enum AIFFAudioFormat +{ + Uncompressed, + Compressed, + Error +}; + +//============================================================= +/* IMPLEMENTATION */ +//============================================================= + +//============================================================= +template +AudioFile::AudioFile() +{ + static_assert(std::is_floating_point::value, "ERROR: This version of AudioFile only supports floating point sample formats"); + + bitDepth = 16; + sampleRate = 44100; + samples.resize (1); + samples[0].resize (0); + audioFileFormat = AudioFileFormat::NotLoaded; +} + +//============================================================= +template +AudioFile::AudioFile (std::string filePath) + : AudioFile() +{ + load (filePath); +} + +//============================================================= +template +uint32_t AudioFile::getSampleRate() const +{ + return sampleRate; +} + +//============================================================= +template +int AudioFile::getNumChannels() const +{ + return (int)samples.size(); +} + +//============================================================= +template +bool AudioFile::isMono() const +{ + return getNumChannels() == 1; +} + +//============================================================= +template +bool AudioFile::isStereo() const +{ + return getNumChannels() == 2; +} + +//============================================================= +template +int AudioFile::getBitDepth() const +{ + return bitDepth; +} + +//============================================================= +template +int AudioFile::getNumSamplesPerChannel() const +{ + if (samples.size() > 0) + return (int) samples[0].size(); + else + return 0; +} + +//============================================================= +template +double AudioFile::getLengthInSeconds() const +{ + return (double)getNumSamplesPerChannel() / (double)sampleRate; +} + +//============================================================= +template +void AudioFile::printSummary() const +{ + std::cout << "|======================================|" << std::endl; + std::cout << "Num Channels: " << getNumChannels() << std::endl; + std::cout << "Num Samples Per Channel: " << getNumSamplesPerChannel() << std::endl; + std::cout << "Sample Rate: " << sampleRate << std::endl; + std::cout << "Bit Depth: " << bitDepth << std::endl; + std::cout << "Length in Seconds: " << getLengthInSeconds() << std::endl; + std::cout << "|======================================|" << std::endl; +} + +//============================================================= +template +bool AudioFile::setAudioBuffer (AudioBuffer& newBuffer) +{ + int numChannels = (int)newBuffer.size(); + + if (numChannels <= 0) + { + assert (false && "The buffer your are trying to use has no channels"); + return false; + } + + size_t numSamples = newBuffer[0].size(); + + // set the number of channels + samples.resize (newBuffer.size()); + + for (int k = 0; k < getNumChannels(); k++) + { + assert (newBuffer[k].size() == numSamples); + + samples[k].resize (numSamples); + + for (size_t i = 0; i < numSamples; i++) + { + samples[k][i] = newBuffer[k][i]; + } + } + + return true; +} + +//============================================================= +template +void AudioFile::setAudioBufferSize (int numChannels, int numSamples) +{ + samples.resize (numChannels); + setNumSamplesPerChannel (numSamples); +} + +//============================================================= +template +void AudioFile::setNumSamplesPerChannel (int numSamples) +{ + int originalSize = getNumSamplesPerChannel(); + + for (int i = 0; i < getNumChannels();i++) + { + samples[i].resize (numSamples); + + // set any new samples to zero + if (numSamples > originalSize) + std::fill (samples[i].begin() + originalSize, samples[i].end(), (T)0.); + } +} + +//============================================================= +template +void AudioFile::setNumChannels (int numChannels) +{ + int originalNumChannels = getNumChannels(); + int originalNumSamplesPerChannel = getNumSamplesPerChannel(); + + samples.resize (numChannels); + + // make sure any new channels are set to the right size + // and filled with zeros + if (numChannels > originalNumChannels) + { + for (int i = originalNumChannels; i < numChannels; i++) + { + samples[i].resize (originalNumSamplesPerChannel); + std::fill (samples[i].begin(), samples[i].end(), (T)0.); + } + } +} + +//============================================================= +template +void AudioFile::setBitDepth (int numBitsPerSample) +{ + bitDepth = numBitsPerSample; +} + +//============================================================= +template +void AudioFile::setSampleRate (uint32_t newSampleRate) +{ + sampleRate = newSampleRate; +} + +//============================================================= +template +void AudioFile::shouldLogErrorsToConsole (bool logErrors) +{ + logErrorsToConsole = logErrors; +} + +//============================================================= +template +bool AudioFile::load (std::string filePath) +{ + std::ifstream file (filePath, std::ios::binary); + + // check the file exists + if (! file.good()) + { + reportError ("ERROR: File doesn't exist or otherwise can't load file\n" + filePath); + return false; + } + + std::vector fileData; + + file.unsetf (std::ios::skipws); + + file.seekg (0, std::ios::end); + size_t length = file.tellg(); + file.seekg (0, std::ios::beg); + + // allocate + fileData.resize (length); + + file.read(reinterpret_cast (fileData.data()), length); + file.close(); + + if (file.gcount() != length) + { + reportError ("ERROR: Couldn't read entire file\n" + filePath); + return false; + } + + return loadFromMemory (fileData); +} + +//============================================================= +template +bool AudioFile::loadFromMemory (std::vector& fileData) +{ + // get audio file format + audioFileFormat = determineAudioFileFormat (fileData); + + if (audioFileFormat == AudioFileFormat::Wave) + { + return decodeWaveFile (fileData); + } + else if (audioFileFormat == AudioFileFormat::Aiff) + { + return decodeAiffFile (fileData); + } + else + { + reportError ("Audio File Type: Error"); + return false; + } +} + +//============================================================= +template +bool AudioFile::decodeWaveFile (std::vector& fileData) +{ + // ----------------------------------------------------------- + // HEADER CHUNK + std::string headerChunkID (fileData.begin(), fileData.begin() + 4); + //int32_t fileSizeInBytes = fourBytesToInt (fileData, 4) + 8; + std::string format (fileData.begin() + 8, fileData.begin() + 12); + + // ----------------------------------------------------------- + // try and find the start points of key chunks + int indexOfDataChunk = getIndexOfChunk (fileData, "data", 12); + int indexOfFormatChunk = getIndexOfChunk (fileData, "fmt ", 12); + int indexOfXMLChunk = getIndexOfChunk (fileData, "iXML", 12); + + // if we can't find the data or format chunks, or the IDs/formats don't seem to be as expected + // then it is unlikely we'll able to read this file, so abort + if (indexOfDataChunk == -1 || indexOfFormatChunk == -1 || headerChunkID != "RIFF" || format != "WAVE") + { + reportError ("ERROR: this doesn't seem to be a valid .WAV file"); + return false; + } + + // ----------------------------------------------------------- + // FORMAT CHUNK + int f = indexOfFormatChunk; + std::string formatChunkID (fileData.begin() + f, fileData.begin() + f + 4); + //int32_t formatChunkSize = fourBytesToInt (fileData, f + 4); + uint16_t audioFormat = twoBytesToInt (fileData, f + 8); + uint16_t numChannels = twoBytesToInt (fileData, f + 10); + sampleRate = (uint32_t) fourBytesToInt (fileData, f + 12); + uint32_t numBytesPerSecond = fourBytesToInt (fileData, f + 16); + uint16_t numBytesPerBlock = twoBytesToInt (fileData, f + 20); + bitDepth = (int) twoBytesToInt (fileData, f + 22); + + uint16_t numBytesPerSample = static_cast (bitDepth) / 8; + + // check that the audio format is PCM or Float or extensible + if (audioFormat != WavAudioFormat::PCM && audioFormat != WavAudioFormat::IEEEFloat && audioFormat != WavAudioFormat::Extensible) + { + reportError ("ERROR: this .WAV file is encoded in a format that this library does not support at present"); + return false; + } + + // check the number of channels is mono or stereo + if (numChannels < 1 || numChannels > 128) + { + reportError ("ERROR: this WAV file seems to be an invalid number of channels (or corrupted?)"); + return false; + } + + // check header data is consistent + if (numBytesPerSecond != static_cast ((numChannels * sampleRate * bitDepth) / 8) || numBytesPerBlock != (numChannels * numBytesPerSample)) + { + reportError ("ERROR: the header data in this WAV file seems to be inconsistent"); + return false; + } + + // check bit depth is either 8, 16, 24 or 32 bit + if (bitDepth != 8 && bitDepth != 16 && bitDepth != 24 && bitDepth != 32) + { + reportError ("ERROR: this file has a bit depth that is not 8, 16, 24 or 32 bits"); + return false; + } + + // ----------------------------------------------------------- + // DATA CHUNK + int d = indexOfDataChunk; + std::string dataChunkID (fileData.begin() + d, fileData.begin() + d + 4); + int32_t dataChunkSize = fourBytesToInt (fileData, d + 4); + + int numSamples = dataChunkSize / (numChannels * bitDepth / 8); + int samplesStartIndex = indexOfDataChunk + 8; + + clearAudioBuffer(); + samples.resize (numChannels); + + for (int i = 0; i < numSamples; i++) + { + for (int channel = 0; channel < numChannels; channel++) + { + int sampleIndex = samplesStartIndex + (numBytesPerBlock * i) + channel * numBytesPerSample; + + if ((sampleIndex + (bitDepth / 8) - 1) >= fileData.size()) + { + reportError ("ERROR: read file error as the metadata indicates more samples than there are in the file data"); + return false; + } + + if (bitDepth == 8) + { + T sample = singleByteToSample (fileData[sampleIndex]); + samples[channel].push_back (sample); + } + else if (bitDepth == 16) + { + int16_t sampleAsInt = twoBytesToInt (fileData, sampleIndex); + T sample = sixteenBitIntToSample (sampleAsInt); + samples[channel].push_back (sample); + } + else if (bitDepth == 24) + { + int32_t sampleAsInt = 0; + sampleAsInt = (fileData[sampleIndex + 2] << 16) | (fileData[sampleIndex + 1] << 8) | fileData[sampleIndex]; + + if (sampleAsInt & 0x800000) // if the 24th bit is set, this is a negative number in 24-bit world + sampleAsInt = sampleAsInt | ~0xFFFFFF; // so make sure sign is extended to the 32 bit float + + T sample = (T)sampleAsInt / (T)8388608.; + samples[channel].push_back (sample); + } + else if (bitDepth == 32) + { + int32_t sampleAsInt = fourBytesToInt (fileData, sampleIndex); + T sample; + + if (audioFormat == WavAudioFormat::IEEEFloat) + sample = (T)reinterpret_cast (sampleAsInt); + else // assume PCM + sample = (T) sampleAsInt / static_cast (std::numeric_limits::max()); + + samples[channel].push_back (sample); + } + else + { + assert (false); + } + } + } + + // ----------------------------------------------------------- + // iXML CHUNK + if (indexOfXMLChunk != -1) + { + int32_t chunkSize = fourBytesToInt (fileData, indexOfXMLChunk + 4); + iXMLChunk = std::string ((const char*) &fileData[indexOfXMLChunk + 8], chunkSize); + } + + return true; +} + +//============================================================= +template +bool AudioFile::decodeAiffFile (std::vector& fileData) +{ + // ----------------------------------------------------------- + // HEADER CHUNK + std::string headerChunkID (fileData.begin(), fileData.begin() + 4); + //int32_t fileSizeInBytes = fourBytesToInt (fileData, 4, Endianness::BigEndian) + 8; + std::string format (fileData.begin() + 8, fileData.begin() + 12); + + int audioFormat = format == "AIFF" ? AIFFAudioFormat::Uncompressed : format == "AIFC" ? AIFFAudioFormat::Compressed : AIFFAudioFormat::Error; + + // ----------------------------------------------------------- + // try and find the start points of key chunks + int indexOfCommChunk = getIndexOfChunk (fileData, "COMM", 12, Endianness::BigEndian); + int indexOfSoundDataChunk = getIndexOfChunk (fileData, "SSND", 12, Endianness::BigEndian); + int indexOfXMLChunk = getIndexOfChunk (fileData, "iXML", 12, Endianness::BigEndian); + + // if we can't find the data or format chunks, or the IDs/formats don't seem to be as expected + // then it is unlikely we'll able to read this file, so abort + if (indexOfSoundDataChunk == -1 || indexOfCommChunk == -1 || headerChunkID != "FORM" || audioFormat == AIFFAudioFormat::Error) + { + reportError ("ERROR: this doesn't seem to be a valid AIFF file"); + return false; + } + + // ----------------------------------------------------------- + // COMM CHUNK + int p = indexOfCommChunk; + std::string commChunkID (fileData.begin() + p, fileData.begin() + p + 4); + //int32_t commChunkSize = fourBytesToInt (fileData, p + 4, Endianness::BigEndian); + int16_t numChannels = twoBytesToInt (fileData, p + 8, Endianness::BigEndian); + int32_t numSamplesPerChannel = fourBytesToInt (fileData, p + 10, Endianness::BigEndian); + bitDepth = (int) twoBytesToInt (fileData, p + 14, Endianness::BigEndian); + sampleRate = getAiffSampleRate (fileData, p + 16); + + // check the sample rate was properly decoded + if (sampleRate == 0) + { + reportError ("ERROR: this AIFF file has an unsupported sample rate"); + return false; + } + + // check the number of channels is mono or stereo + if (numChannels < 1 ||numChannels > 2) + { + reportError ("ERROR: this AIFF file seems to be neither mono nor stereo (perhaps multi-track, or corrupted?)"); + return false; + } + + // check bit depth is either 8, 16, 24 or 32-bit + if (bitDepth != 8 && bitDepth != 16 && bitDepth != 24 && bitDepth != 32) + { + reportError ("ERROR: this file has a bit depth that is not 8, 16, 24 or 32 bits"); + return false; + } + + // ----------------------------------------------------------- + // SSND CHUNK + int s = indexOfSoundDataChunk; + std::string soundDataChunkID (fileData.begin() + s, fileData.begin() + s + 4); + int32_t soundDataChunkSize = fourBytesToInt (fileData, s + 4, Endianness::BigEndian); + int32_t offset = fourBytesToInt (fileData, s + 8, Endianness::BigEndian); + //int32_t blockSize = fourBytesToInt (fileData, s + 12, Endianness::BigEndian); + + int numBytesPerSample = bitDepth / 8; + int numBytesPerFrame = numBytesPerSample * numChannels; + int totalNumAudioSampleBytes = numSamplesPerChannel * numBytesPerFrame; + int samplesStartIndex = s + 16 + (int)offset; + + // sanity check the data + if ((soundDataChunkSize - 8) != totalNumAudioSampleBytes || totalNumAudioSampleBytes > static_cast(fileData.size() - samplesStartIndex)) + { + reportError ("ERROR: the metadatafor this file doesn't seem right"); + return false; + } + + clearAudioBuffer(); + samples.resize (numChannels); + + for (int i = 0; i < numSamplesPerChannel; i++) + { + for (int channel = 0; channel < numChannels; channel++) + { + int sampleIndex = samplesStartIndex + (numBytesPerFrame * i) + channel * numBytesPerSample; + + if ((sampleIndex + (bitDepth / 8) - 1) >= fileData.size()) + { + reportError ("ERROR: read file error as the metadata indicates more samples than there are in the file data"); + return false; + } + + if (bitDepth == 8) + { + int8_t sampleAsSigned8Bit = (int8_t)fileData[sampleIndex]; + T sample = (T)sampleAsSigned8Bit / (T)128.; + samples[channel].push_back (sample); + } + else if (bitDepth == 16) + { + int16_t sampleAsInt = twoBytesToInt (fileData, sampleIndex, Endianness::BigEndian); + T sample = sixteenBitIntToSample (sampleAsInt); + samples[channel].push_back (sample); + } + else if (bitDepth == 24) + { + int32_t sampleAsInt = 0; + sampleAsInt = (fileData[sampleIndex] << 16) | (fileData[sampleIndex + 1] << 8) | fileData[sampleIndex + 2]; + + if (sampleAsInt & 0x800000) // if the 24th bit is set, this is a negative number in 24-bit world + sampleAsInt = sampleAsInt | ~0xFFFFFF; // so make sure sign is extended to the 32 bit float + + T sample = (T)sampleAsInt / (T)8388608.; + samples[channel].push_back (sample); + } + else if (bitDepth == 32) + { + int32_t sampleAsInt = fourBytesToInt (fileData, sampleIndex, Endianness::BigEndian); + T sample; + + if (audioFormat == AIFFAudioFormat::Compressed) + sample = (T)reinterpret_cast (sampleAsInt); + else // assume uncompressed + sample = (T) sampleAsInt / static_cast (std::numeric_limits::max()); + + samples[channel].push_back (sample); + } + else + { + assert (false); + } + } + } + + // ----------------------------------------------------------- + // iXML CHUNK + if (indexOfXMLChunk != -1) + { + int32_t chunkSize = fourBytesToInt (fileData, indexOfXMLChunk + 4); + iXMLChunk = std::string ((const char*) &fileData[indexOfXMLChunk + 8], chunkSize); + } + + return true; +} + +//============================================================= +template +uint32_t AudioFile::getAiffSampleRate (std::vector& fileData, int sampleRateStartIndex) +{ + for (auto it : aiffSampleRateTable) + { + if (tenByteMatch (fileData, sampleRateStartIndex, it.second, 0)) + return it.first; + } + + return 0; +} + +//============================================================= +template +bool AudioFile::tenByteMatch (std::vector& v1, int startIndex1, std::vector& v2, int startIndex2) +{ + for (int i = 0; i < 10; i++) + { + if (v1[startIndex1 + i] != v2[startIndex2 + i]) + return false; + } + + return true; +} + +//============================================================= +template +void AudioFile::addSampleRateToAiffData (std::vector& fileData, uint32_t sampleRate) +{ + if (aiffSampleRateTable.count (sampleRate) > 0) + { + for (int i = 0; i < 10; i++) + fileData.push_back (aiffSampleRateTable[sampleRate][i]); + } +} + +//============================================================= +template +bool AudioFile::save (std::string filePath, AudioFileFormat format) +{ + if (format == AudioFileFormat::Wave) + { + return saveToWaveFile (filePath); + } + else if (format == AudioFileFormat::Aiff) + { + return saveToAiffFile (filePath); + } + + return false; +} + +//============================================================= +template +bool AudioFile::saveToWaveFile (std::string filePath) +{ + std::vector fileData; + + int32_t dataChunkSize = getNumSamplesPerChannel() * (getNumChannels() * bitDepth / 8); + int16_t audioFormat = bitDepth == 32 ? WavAudioFormat::IEEEFloat : WavAudioFormat::PCM; + int32_t formatChunkSize = audioFormat == WavAudioFormat::PCM ? 16 : 18; + int32_t iXMLChunkSize = static_cast (iXMLChunk.size()); + + // ----------------------------------------------------------- + // HEADER CHUNK + addStringToFileData (fileData, "RIFF"); + + // The file size in bytes is the header chunk size (4, not counting RIFF and WAVE) + the format + // chunk size (24) + the metadata part of the data chunk plus the actual data chunk size + int32_t fileSizeInBytes = 4 + formatChunkSize + 8 + 8 + dataChunkSize; + if (iXMLChunkSize > 0) + { + fileSizeInBytes += (8 + iXMLChunkSize); + } + + addInt32ToFileData (fileData, fileSizeInBytes); + + addStringToFileData (fileData, "WAVE"); + + // ----------------------------------------------------------- + // FORMAT CHUNK + addStringToFileData (fileData, "fmt "); + addInt32ToFileData (fileData, formatChunkSize); // format chunk size (16 for PCM) + addInt16ToFileData (fileData, audioFormat); // audio format + addInt16ToFileData (fileData, (int16_t)getNumChannels()); // num channels + addInt32ToFileData (fileData, (int32_t)sampleRate); // sample rate + + int32_t numBytesPerSecond = (int32_t) ((getNumChannels() * sampleRate * bitDepth) / 8); + addInt32ToFileData (fileData, numBytesPerSecond); + + int16_t numBytesPerBlock = getNumChannels() * (bitDepth / 8); + addInt16ToFileData (fileData, numBytesPerBlock); + + addInt16ToFileData (fileData, (int16_t)bitDepth); + + if (audioFormat == WavAudioFormat::IEEEFloat) + addInt16ToFileData (fileData, 0); // extension size + + // ----------------------------------------------------------- + // DATA CHUNK + addStringToFileData (fileData, "data"); + addInt32ToFileData (fileData, dataChunkSize); + + for (int i = 0; i < getNumSamplesPerChannel(); i++) + { + for (int channel = 0; channel < getNumChannels(); channel++) + { + if (bitDepth == 8) + { + uint8_t byte = sampleToSingleByte (samples[channel][i]); + fileData.push_back (byte); + } + else if (bitDepth == 16) + { + int16_t sampleAsInt = sampleToSixteenBitInt (samples[channel][i]); + addInt16ToFileData (fileData, sampleAsInt); + } + else if (bitDepth == 24) + { + int32_t sampleAsIntAgain = (int32_t) (samples[channel][i] * (T)8388608.); + + uint8_t bytes[3]; + bytes[2] = (uint8_t) (sampleAsIntAgain >> 16) & 0xFF; + bytes[1] = (uint8_t) (sampleAsIntAgain >> 8) & 0xFF; + bytes[0] = (uint8_t) sampleAsIntAgain & 0xFF; + + fileData.push_back (bytes[0]); + fileData.push_back (bytes[1]); + fileData.push_back (bytes[2]); + } + else if (bitDepth == 32) + { + int32_t sampleAsInt; + + if (audioFormat == WavAudioFormat::IEEEFloat) + sampleAsInt = (int32_t) reinterpret_cast (samples[channel][i]); + else // assume PCM + sampleAsInt = (int32_t) (samples[channel][i] * std::numeric_limits::max()); + + addInt32ToFileData (fileData, sampleAsInt, Endianness::LittleEndian); + } + else + { + assert (false && "Trying to write a file with unsupported bit depth"); + return false; + } + } + } + + // ----------------------------------------------------------- + // iXML CHUNK + if (iXMLChunkSize > 0) + { + addStringToFileData (fileData, "iXML"); + addInt32ToFileData (fileData, iXMLChunkSize); + addStringToFileData (fileData, iXMLChunk); + } + + // check that the various sizes we put in the metadata are correct + if (fileSizeInBytes != static_cast (fileData.size() - 8) || dataChunkSize != (getNumSamplesPerChannel() * getNumChannels() * (bitDepth / 8))) + { + reportError ("ERROR: couldn't save file to " + filePath); + return false; + } + + // try to write the file + return writeDataToFile (fileData, filePath); +} + +//============================================================= +template +bool AudioFile::saveToAiffFile (std::string filePath) +{ + std::vector fileData; + + int32_t numBytesPerSample = bitDepth / 8; + int32_t numBytesPerFrame = numBytesPerSample * getNumChannels(); + int32_t totalNumAudioSampleBytes = getNumSamplesPerChannel() * numBytesPerFrame; + int32_t soundDataChunkSize = totalNumAudioSampleBytes + 8; + int32_t iXMLChunkSize = static_cast (iXMLChunk.size()); + + // ----------------------------------------------------------- + // HEADER CHUNK + addStringToFileData (fileData, "FORM"); + + // The file size in bytes is the header chunk size (4, not counting FORM and AIFF) + the COMM + // chunk size (26) + the metadata part of the SSND chunk plus the actual data chunk size + int32_t fileSizeInBytes = 4 + 26 + 16 + totalNumAudioSampleBytes; + if (iXMLChunkSize > 0) + { + fileSizeInBytes += (8 + iXMLChunkSize); + } + + addInt32ToFileData (fileData, fileSizeInBytes, Endianness::BigEndian); + + addStringToFileData (fileData, "AIFF"); + + // ----------------------------------------------------------- + // COMM CHUNK + addStringToFileData (fileData, "COMM"); + addInt32ToFileData (fileData, 18, Endianness::BigEndian); // commChunkSize + addInt16ToFileData (fileData, getNumChannels(), Endianness::BigEndian); // num channels + addInt32ToFileData (fileData, getNumSamplesPerChannel(), Endianness::BigEndian); // num samples per channel + addInt16ToFileData (fileData, bitDepth, Endianness::BigEndian); // bit depth + addSampleRateToAiffData (fileData, sampleRate); + + // ----------------------------------------------------------- + // SSND CHUNK + addStringToFileData (fileData, "SSND"); + addInt32ToFileData (fileData, soundDataChunkSize, Endianness::BigEndian); + addInt32ToFileData (fileData, 0, Endianness::BigEndian); // offset + addInt32ToFileData (fileData, 0, Endianness::BigEndian); // block size + + for (int i = 0; i < getNumSamplesPerChannel(); i++) + { + for (int channel = 0; channel < getNumChannels(); channel++) + { + if (bitDepth == 8) + { + uint8_t byte = sampleToSingleByte (samples[channel][i]); + fileData.push_back (byte); + } + else if (bitDepth == 16) + { + int16_t sampleAsInt = sampleToSixteenBitInt (samples[channel][i]); + addInt16ToFileData (fileData, sampleAsInt, Endianness::BigEndian); + } + else if (bitDepth == 24) + { + int32_t sampleAsIntAgain = (int32_t) (samples[channel][i] * (T)8388608.); + + uint8_t bytes[3]; + bytes[0] = (uint8_t) (sampleAsIntAgain >> 16) & 0xFF; + bytes[1] = (uint8_t) (sampleAsIntAgain >> 8) & 0xFF; + bytes[2] = (uint8_t) sampleAsIntAgain & 0xFF; + + fileData.push_back (bytes[0]); + fileData.push_back (bytes[1]); + fileData.push_back (bytes[2]); + } + else if (bitDepth == 32) + { + // write samples as signed integers (no implementation yet for floating point, but looking at WAV implementation should help) + int32_t sampleAsInt = (int32_t) (samples[channel][i] * std::numeric_limits::max()); + addInt32ToFileData (fileData, sampleAsInt, Endianness::BigEndian); + } + else + { + assert (false && "Trying to write a file with unsupported bit depth"); + return false; + } + } + } + + // ----------------------------------------------------------- + // iXML CHUNK + if (iXMLChunkSize > 0) + { + addStringToFileData (fileData, "iXML"); + addInt32ToFileData (fileData, iXMLChunkSize, Endianness::BigEndian); + addStringToFileData (fileData, iXMLChunk); + } + + // check that the various sizes we put in the metadata are correct + if (fileSizeInBytes != static_cast (fileData.size() - 8) || soundDataChunkSize != getNumSamplesPerChannel() * numBytesPerFrame + 8) + { + reportError ("ERROR: couldn't save file to " + filePath); + return false; + } + + // try to write the file + return writeDataToFile (fileData, filePath); +} + +//============================================================= +template +bool AudioFile::writeDataToFile (std::vector& fileData, std::string filePath) +{ + std::ofstream outputFile (filePath, std::ios::binary); + + if (outputFile.is_open()) + { + for (size_t i = 0; i < fileData.size(); i++) + { + char value = (char) fileData[i]; + outputFile.write (&value, sizeof (char)); + } + + outputFile.close(); + + return true; + } + + return false; +} + +//============================================================= +template +void AudioFile::addStringToFileData (std::vector& fileData, std::string s) +{ + for (size_t i = 0; i < s.length();i++) + fileData.push_back ((uint8_t) s[i]); +} + +//============================================================= +template +void AudioFile::addInt32ToFileData (std::vector& fileData, int32_t i, Endianness endianness) +{ + uint8_t bytes[4]; + + if (endianness == Endianness::LittleEndian) + { + bytes[3] = (i >> 24) & 0xFF; + bytes[2] = (i >> 16) & 0xFF; + bytes[1] = (i >> 8) & 0xFF; + bytes[0] = i & 0xFF; + } + else + { + bytes[0] = (i >> 24) & 0xFF; + bytes[1] = (i >> 16) & 0xFF; + bytes[2] = (i >> 8) & 0xFF; + bytes[3] = i & 0xFF; + } + + for (int i = 0; i < 4; i++) + fileData.push_back (bytes[i]); +} + +//============================================================= +template +void AudioFile::addInt16ToFileData (std::vector& fileData, int16_t i, Endianness endianness) +{ + uint8_t bytes[2]; + + if (endianness == Endianness::LittleEndian) + { + bytes[1] = (i >> 8) & 0xFF; + bytes[0] = i & 0xFF; + } + else + { + bytes[0] = (i >> 8) & 0xFF; + bytes[1] = i & 0xFF; + } + + fileData.push_back (bytes[0]); + fileData.push_back (bytes[1]); +} + +//============================================================= +template +void AudioFile::clearAudioBuffer() +{ + for (size_t i = 0; i < samples.size();i++) + { + samples[i].clear(); + } + + samples.clear(); +} + +//============================================================= +template +AudioFileFormat AudioFile::determineAudioFileFormat (std::vector& fileData) +{ + std::string header (fileData.begin(), fileData.begin() + 4); + + if (header == "RIFF") + return AudioFileFormat::Wave; + else if (header == "FORM") + return AudioFileFormat::Aiff; + else + return AudioFileFormat::Error; +} + +//============================================================= +template +int32_t AudioFile::fourBytesToInt (std::vector& source, int startIndex, Endianness endianness) +{ + int32_t result; + + if (endianness == Endianness::LittleEndian) + result = (source[startIndex + 3] << 24) | (source[startIndex + 2] << 16) | (source[startIndex + 1] << 8) | source[startIndex]; + else + result = (source[startIndex] << 24) | (source[startIndex + 1] << 16) | (source[startIndex + 2] << 8) | source[startIndex + 3]; + + return result; +} + +//============================================================= +template +int16_t AudioFile::twoBytesToInt (std::vector& source, int startIndex, Endianness endianness) +{ + int16_t result; + + if (endianness == Endianness::LittleEndian) + result = (source[startIndex + 1] << 8) | source[startIndex]; + else + result = (source[startIndex] << 8) | source[startIndex + 1]; + + return result; +} + +//============================================================= +template +int AudioFile::getIndexOfString (std::vector& source, std::string stringToSearchFor) +{ + int index = -1; + int stringLength = (int)stringToSearchFor.length(); + + for (size_t i = 0; i < source.size() - stringLength;i++) + { + std::string section (source.begin() + i, source.begin() + i + stringLength); + + if (section == stringToSearchFor) + { + index = static_cast (i); + break; + } + } + + return index; +} + +//============================================================= +template +int AudioFile::getIndexOfChunk (std::vector& source, const std::string& chunkHeaderID, int startIndex, Endianness endianness) +{ + constexpr int dataLen = 4; + if (chunkHeaderID.size() != dataLen) + { + assert (false && "Invalid chunk header ID string"); + return -1; + } + + int i = startIndex; + while (i < source.size() - dataLen) + { + if (memcmp (&source[i], chunkHeaderID.data(), dataLen) == 0) + { + return i; + } + + i += dataLen; + auto chunkSize = fourBytesToInt (source, i, endianness); + i += (dataLen + chunkSize); + } + + return -1; +} + +//============================================================= +template +T AudioFile::sixteenBitIntToSample (int16_t sample) +{ + return static_cast (sample) / static_cast (32768.); +} + +//============================================================= +template +int16_t AudioFile::sampleToSixteenBitInt (T sample) +{ + sample = clamp (sample, -1., 1.); + return static_cast (sample * 32767.); +} + +//============================================================= +template +uint8_t AudioFile::sampleToSingleByte (T sample) +{ + sample = clamp (sample, -1., 1.); + sample = (sample + 1.) / 2.; + return static_cast (sample * 255.); +} + +//============================================================= +template +T AudioFile::singleByteToSample (uint8_t sample) +{ + return static_cast (sample - 128) / static_cast (128.); +} + +//============================================================= +template +T AudioFile::clamp (T value, T minValue, T maxValue) +{ + value = std::min (value, maxValue); + value = std::max (value, minValue); + return value; +} + +//============================================================= +template +void AudioFile::reportError (std::string errorMessage) +{ + if (logErrorsToConsole) + std::cout << errorMessage << std::endl; +} + +#if defined (_MSC_VER) + __pragma(warning (pop)) +#elif defined (__GNUC__) + _Pragma("GCC diagnostic pop") +#endif + +#endif /* AudioFile_h */ \ No newline at end of file diff --git a/runtime/engine/asr/server/brpc/paraformerCPP/ComDefine.h b/runtime/engine/asr/server/brpc/paraformerCPP/ComDefine.h new file mode 100644 index 00000000..f131e5ec --- /dev/null +++ b/runtime/engine/asr/server/brpc/paraformerCPP/ComDefine.h @@ -0,0 +1,11 @@ + +#ifndef COMDEFINE_H +#define COMDEFINE_H + +#define S_BEGIN 0 +#define S_MIDDLE 1 +#define S_END 2 +#define S_ALL 3 +#define S_ERR 4 + +#endif diff --git a/runtime/engine/asr/server/brpc/paraformerCPP/Vocab.cpp b/runtime/engine/asr/server/brpc/paraformerCPP/Vocab.cpp new file mode 100644 index 00000000..bb0c3e9b --- /dev/null +++ b/runtime/engine/asr/server/brpc/paraformerCPP/Vocab.cpp @@ -0,0 +1,267 @@ +#include "Vocab.h" + +#include +#include +#include +#include +#include + +using namespace std; + +Vocab::Vocab(const char *filename) +{ + ifstream in(filename); + string line; + + if (in) // 有该文件 + { + while (getline(in, line)) // line中不包括每行的换行符 + { + vocab.push_back(line); + } + // cout << vocab[1719] << endl; + } + // else // 没有该文件 + //{ + // cout << "no such file" << endl; + // } +} +Vocab::~Vocab() +{ +} + +string Vocab::vector2string(vector in) +{ + stringstream ss; + for (auto it = in.begin(); it != in.end(); it++) { + ss << vocab[*it]; + } + + return ss.str(); +} + +int str2int(string str) +{ + const char *ch_array = str.c_str(); + if (((ch_array[0] & 0xf0) != 0xe0) || ((ch_array[1] & 0xc0) != 0x80) || + ((ch_array[2] & 0xc0) != 0x80)) + return 0; + + int val = ((ch_array[0] & 0x0f) << 12) | ((ch_array[1] & 0x3f) << 6) | + (ch_array[2] & 0x3f); + return val; +} + +bool Vocab::isChinese(string ch) +{ + if (ch.size() != 3) { + return false; + } + + int unicode = str2int(ch); + if (unicode >= 19968 && unicode <= 40959) { + return true; + } + + return false; +} + + +string Vocab::vector2stringV2(vector in) +{ + int i; + list words; + + int is_pre_english = false; + int pre_english_len = 0; + + int is_combining = false; + string combine = ""; + + for (auto it = in.begin(); it != in.end(); it++) { + string word = vocab[*it]; + + // step1 space character skips + if (word == "" || word == "" || word == "") + continue; + + // step2 combie phoneme to full word + { + int sub_word = !(word.find("@@") == string::npos); + + // process word start and middle part + if (sub_word) { + combine += word.erase(word.length() - 2); + is_combining = true; + continue; + } + // process word end part + else if (is_combining) { + combine += word; + is_combining = false; + word = combine; + combine = ""; + } + } + + // step3 process english word deal with space , turn abbreviation to upper case + { + + // input word is chinese, not need process + if (isChinese(word)) { + words.push_back(word); + is_pre_english = false; + } + // input word is english word + else { + + // pre word is chinese + if (!is_pre_english) { + word[0] = word[0] - 32; + words.push_back(word); + pre_english_len = word.size(); + + } + + // pre word is english word + else { + + // single letter turn to upper case + if (word.size() == 1) { + word[0] = word[0] - 32; + } + + if (pre_english_len > 1) { + words.push_back(" "); + words.push_back(word); + pre_english_len = word.size(); + } + else { + if (word.size() > 1) { + words.push_back(" "); + } + words.push_back(word); + pre_english_len = word.size(); + } + } + + is_pre_english = true; + + } + } + } + + // for (auto it = words.begin(); it != words.end(); it++) { + // cout << *it << endl; + // } + + stringstream ss; + for (auto it = words.begin(); it != words.end(); it++) { + ss << *it; + } + + return ss.str(); +} + +string Vocab::vector2stringV3(string in) +{ + int i; + list words; + words.push_back(in.c_str()); + + int is_pre_english = false; + int pre_english_len = 0; + + int is_combining = false; + string combine = ""; + + for (auto it = in.begin(); it != in.end(); it++) { + string word = vocab[*it]; + + // step1 space character skips + if (word == "" || word == "" || word == "") + continue; + + // step2 combie phoneme to full word + { + int sub_word = !(word.find("@@") == string::npos); + + // process word start and middle part + if (sub_word) { + combine += word.erase(word.length() - 2); + is_combining = true; + continue; + } + // process word end part + else if (is_combining) { + combine += word; + is_combining = false; + word = combine; + combine = ""; + } + } + + // step3 process english word deal with space , turn abbreviation to upper case + { + + // input word is chinese, not need process + if (isChinese(word)) { + words.push_back(word); + is_pre_english = false; + } + // input word is english word + else { + + // pre word is chinese + if (!is_pre_english) { + word[0] = word[0] - 32; + words.push_back(word); + pre_english_len = word.size(); + + } + + // pre word is english word + else { + + // single letter turn to upper case + if (word.size() == 1) { + word[0] = word[0] - 32; + } + + if (pre_english_len > 1) { + words.push_back(" "); + words.push_back(word); + pre_english_len = word.size(); + } + else { + if (word.size() > 1) { + words.push_back(" "); + } + words.push_back(word); + pre_english_len = word.size(); + } + } + + is_pre_english = true; + + } + } + } + + // for (auto it = words.begin(); it != words.end(); it++) { + // cout << *it << endl; + // } + + stringstream ss; + for (auto it = words.begin(); it != words.end(); it++) { + ss << *it; + } + + return ss.str(); +} + + +int Vocab::size() +{ + return vocab.size(); +} diff --git a/runtime/engine/asr/server/brpc/paraformerCPP/Vocab.h b/runtime/engine/asr/server/brpc/paraformerCPP/Vocab.h new file mode 100644 index 00000000..59468e41 --- /dev/null +++ b/runtime/engine/asr/server/brpc/paraformerCPP/Vocab.h @@ -0,0 +1,24 @@ +#ifndef VOCAB_H +#define VOCAB_H + +#include +#include +#include +using namespace std; + +class Vocab { + private: + vector vocab; + bool isChinese(string ch); + bool isEnglish(string ch); + + public: + Vocab(const char *filename); + ~Vocab(); + int size(); + string vector2string(vector in); + string vector2stringV2(vector in); + string vector2stringV2(string in); +}; + +#endif \ No newline at end of file diff --git a/runtime/engine/asr/server/brpc/paraformerCPP/asr.conf b/runtime/engine/asr/server/brpc/paraformerCPP/asr.conf new file mode 100644 index 00000000..328795bf --- /dev/null +++ b/runtime/engine/asr/server/brpc/paraformerCPP/asr.conf @@ -0,0 +1,49 @@ +# comlog related params +########################################## +# 进程名 +COMLOG_PROCNAME : asr-service + +#日志等级 +#[默认配置(uint)] +COMLOG_LEVEL : 4 + +#设备数目 +#[默认配置(uint), COMLOG_DEVICE_NUM : 2] +COMLOG_DEVICE_NUM : 2 +#设备0 名 +#[默认配置(字符串), COMLOG_DEVICE0 : FILE] +COMLOG_DEVICE0 : FILE +#设备0 名 +#[默认配置(字符串), COMLOG_DEVICE1 : TTY] +COMLOG_DEVICE1 : TTY + +#设备类型, ULLOG +#[默认配置(字符串)] +FILE_TYPE : ULLOG +FILE_SIZE : 2048 +FILE_SPLITE_TYPE : DATECUT +FILE_DATA_CRONOCUT : 1 +FILE_RESERVED1 : %Y%m%d%H +FILE_QUOTA_DAY : 30 +#日志名 +#[默认配置(字符串)] +FILE_NAME : asr-service.log +#日志路径 +#[默认配置(字符串), FILE_PATH : ./log] +FILE_PATH : ./log +#是否打开这个设备 +#[默认配置(uint), FILE_OPEN : 1] +FILE_OPEN : 1 + +#设备类型, TTY +#[默认配置(字符串), TTY_TYPE : TTY] +TTY_TYPE : TTY +#日志名 +#[默认配置(字符串), TTY_NAME : receiver] +TTY_NAME : asr-service +#日志路径 +#[默认配置(字符串), TTY_PATH : ./log] +TTY_PATH : ./log +#是否打开这个设备 +#[默认配置(uint), TTY_OPEN : 1] +TTY_OPEN : 1 \ No newline at end of file diff --git a/runtime/engine/asr/server/brpc/paraformerCPP/client.cc b/runtime/engine/asr/server/brpc/paraformerCPP/client.cc new file mode 100644 index 00000000..560068ee --- /dev/null +++ b/runtime/engine/asr/server/brpc/paraformerCPP/client.cc @@ -0,0 +1,34 @@ +// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "recognizer.h" +#include +#include + +using std::vector; + +int main(int argc, char* argv[]) { + + std::string model_file = argv[1]; + std::string word_symbol_file = argv[2]; + std::string wav_audio = argv[3]; + InitRecognizer(model_file, word_symbol_file); + int idx = AddRecognizerInstance(); // idx == 0 + for (int i = 0; i < 50; i++){ + AcceptWav(wav_audio, idx); + std::string result = GetResult(idx); + std::cout << "idx:" << idx << "result :" << result << std::endl; + Reset(idx); + } +} diff --git a/runtime/engine/asr/server/brpc/paraformerCPP/main.cc b/runtime/engine/asr/server/brpc/paraformerCPP/main.cc new file mode 100644 index 00000000..875468b2 --- /dev/null +++ b/runtime/engine/asr/server/brpc/paraformerCPP/main.cc @@ -0,0 +1,33 @@ +// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "recognizer.h" +#include +#include + +using std::vector; + +int main(int argc, char* argv[]) { + + std::string model_file = "model.onnx"; + std::string word_symbol_file = "words.txt"; + std::string wav_audio = argv[1]; + InitRecognizer(model_file, word_symbol_file); + + AcceptWav(wav_audio); + + std::string result = GetResult(); + std::cout << "result :" << result << std::endl; + Reset(); +} diff --git a/runtime/engine/asr/server/brpc/paraformerCPP/recognizer.h b/runtime/engine/asr/server/brpc/paraformerCPP/recognizer.h new file mode 100644 index 00000000..82216357 --- /dev/null +++ b/runtime/engine/asr/server/brpc/paraformerCPP/recognizer.h @@ -0,0 +1,28 @@ + +// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include +#include + +bool InitRecognizer(const std::string& model_file, + const std::string& word_symbol_table_file); +int AddRecognizerInstance(); // instance id is from 0 to size - 1 +int GetRecognizerInstanceSize(); // return size +void Accept(const std::vector& waves, int instance_id); +void AcceptWav(const std::string wav_file, int instance_id); +std::string GetResult(int instance_id); +void Reset(int instance_id); diff --git a/runtime/engine/asr/server/brpc/paraformerCPP/run.sh b/runtime/engine/asr/server/brpc/paraformerCPP/run.sh new file mode 100644 index 00000000..267e73c3 --- /dev/null +++ b/runtime/engine/asr/server/brpc/paraformerCPP/run.sh @@ -0,0 +1,2 @@ +export LD_LIBRARY_PATH=./fdlib/lib:$LD_LIBRARY_PATH +../bin/asrcpuserver --port 8765 \ No newline at end of file diff --git a/runtime/engine/asr/server/brpc/paraformerCPP/server.cpp b/runtime/engine/asr/server/brpc/paraformerCPP/server.cpp new file mode 100644 index 00000000..48196f6a --- /dev/null +++ b/runtime/engine/asr/server/brpc/paraformerCPP/server.cpp @@ -0,0 +1,243 @@ +// Copyright (c) 2014 baidu-rpc authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// A server to receive EchoRequest and send back EchoResponse. + +#include +#ifndef _WIN32 +#include +#else +#include +#endif + + +#include +#include +#include +#include +#include "base/base64.h" +#include +#include +#include "echo.pb.h" +#include +#include +#include +#include +#include +#include +#include "rapidjson/document.h" +#include "rapidjson/stringbuffer.h" +#include "rapidjson/writer.h" +#include "recognizer.h" +#include +#include +#include + +using namespace std; + +DEFINE_int32(port, 8857, "TCP Port of this server"); +DEFINE_string(modelpath, "/home/bae/sourcecode/paddlespeech_cli", "the path of model"); +DEFINE_int32(max_concurrency, 2, "Limit of request processing in parallel"); + + +namespace example { +struct subRes{ + int s; // start time + int e; // end time + string t; // text +}; +class AudioServiceImpl : public EchoService { +public: + AudioServiceImpl() {}; + virtual ~AudioServiceImpl() {}; + string to_string(rapidjson::Document& out_doc) { + rapidjson::StringBuffer out_buffer; + rapidjson::Writer out_writer(out_buffer); + out_doc.Accept(out_writer); + std::string out_params = out_buffer.GetString(); + return out_params; + } + + string convertvector(vector result){ + rapidjson::Document document; + document.SetArray(); + rapidjson::Document::AllocatorType& allocator = document.GetAllocator(); + for (const auto sub : result) { + rapidjson::Value obj(rapidjson::kObjectType); + obj.AddMember("s", sub.s, allocator); + obj.AddMember("e", sub.e, allocator); + obj.AddMember("t", sub.t.c_str(), allocator); + document.PushBack(obj, allocator); + } + rapidjson::StringBuffer strbuf; + rapidjson::Writer writer(strbuf); + document.Accept(writer); + return strbuf.GetString(); + } + + string convertvectorV2(vector result){ + rapidjson::Document document; + document.SetObject(); + rapidjson::Document::AllocatorType& allocator = document.GetAllocator(); + rapidjson::Value ObjectArray(rapidjson::kArrayType); + for (const auto sub : result) { + rapidjson::Value obj(rapidjson::kObjectType); + obj.AddMember("s", sub.s, allocator); + obj.AddMember("e", sub.e, allocator); + obj.AddMember("t", sub.t.c_str(), allocator); + ObjectArray.PushBack(obj, allocator); + } + document.AddMember("AllTrans", ObjectArray, allocator); + rapidjson::StringBuffer strbuf; + rapidjson::Writer writer(strbuf); + document.Accept(writer); + return strbuf.GetString(); + } + + virtual void audiorecognition(google::protobuf::RpcController* cntl_base, + const AudioRequest* request, + AudioResponse* response, + google::protobuf::Closure* done) { + // This object helps you to call done->Run() in RAII style. If you need + // to process the request asynchronously, pass done_guard.release(). + baidu::rpc::ClosureGuard done_guard(done); + + + string decode_audio_buffer; + base::Base64Decode(request->audio(), &decode_audio_buffer); + vector vec; + vec.assign(decode_audio_buffer.begin(), decode_audio_buffer.end()); + + + AudioFile a; + bool res = a.loadFromMemory(vec); + Audio audi(0); + audi.loadwavfrommem(a); + audi.split(); + + vector buff; + int len = 0; + int flag = 1; + vector results; + int tmp_len = 0; + while (audi.fetch(buff, len, flag) > 0) { + int do_idx = rand() % 2; //random number [0,1) + Accept(buff, do_idx); + std::string subtxt = GetResult(do_idx); + Reset(do_idx); + buff.clear(); + int start_time = (int)(tmp_len/16000.0 * 1000); + int end_time = (int)((tmp_len + len)/16000.0 * 1000); + struct subRes subres = { + start_time, + end_time, + subtxt.c_str(), + }; + tmp_len += len; + results.push_back(subres); + com_writelog(COMLOG_NOTICE, "using process: %d, start: %d, end: %d, result: %s", do_idx, start_time, end_time, subtxt.c_str()); + } + + // vector inputAudio; + // for (int i = 0; i < a.getNumSamplesPerChannel(); i++) + // { + // float tempval = 0.0; + // for (int channel = 0; channel < a.getNumChannels(); channel++) + // { + // tempval += a.samples[channel][i] * 32768; + // } + // inputAudio.emplace_back(tempval); + // } + + // int do_idx = rand() % 2; //random number [0,1) + // Accept(inputAudio, do_idx); + // std::string result = GetResult(do_idx); + // Reset(do_idx); + // com_writelog(COMLOG_NOTICE, "using process: %d, result: %s", do_idx, result.c_str()); + // std::cout << "Result: " << result << std::endl; + + response->set_err_no(0); + response->set_err_msg(""); + string jsonresult = convertvector(results); + response->set_result(jsonresult); + response->set_cost_time(0); + results.clear(); + } +}; +} // namespace example + + + +int main(int argc, char* argv[]) { + // Parse gflags. We recommend you to use gflags as well. + google::ParseCommandLineFlags(&argc, &argv, true); + bool flag_auth = false; + // Setup for `GianoAuthenticator'. + std::unique_ptr auth; + if (flag_auth) { + if (baas::BAAS_Init() != 0) { + LOG(ERROR) << "Fail to init BAAS"; + return -1; + } + baas::CredentialVerifier + ver = baas::ServerUtility::CreateCredentialVerifier(); + auth.reset(new baidu::rpc::policy::GianoAuthenticator(NULL, &ver)); + } + + int ret = com_loadlog("./", "asr.conf"); + if (ret != 0) + { + fprintf(stderr, "load err\n"); + return -1; + } + // 打印日志,线程安全 + com_writelog(COMLOG_NOTICE, "server start1"); + + + // Generally you only need one Server. + baidu::rpc::Server server; + + + // Instance of your service. + example::AudioServiceImpl audio_service_impl; + + // Add the service into server. Notice the second parameter, because the + // service is put on stack, we don't want server to delete it, otherwise + // use baidu::rpc::SERVER_OWNS_SERVICE. + if (server.AddService(&audio_service_impl, + baidu::rpc::SERVER_DOESNT_OWN_SERVICE, + "/v1/audiorecognition => audiorecognition") != 0) { + LOG(ERROR) << "Fail to add service"; + return -1; + } + InitRecognizer("model.onnx", "words.txt"); + for (int i =0 ;i<= 1; i++){ + int idx = AddRecognizerInstance(); + } + + srand((unsigned)time(NULL)); + // Start the server. + baidu::rpc::ServerOptions options; + options.idle_timeout_sec = -1; + options.auth = auth.get(); + options.max_concurrency = FLAGS_max_concurrency; + if (server.Start(FLAGS_port, &options) != 0) { + LOG(ERROR) << "Fail to start EchoServer"; + return -1; + } + + // Wait until Ctrl-C is pressed, then Stop() and Join() the server. + server.RunUntilAskedToQuit(); + return 0; +} \ No newline at end of file diff --git a/runtime/engine/asr/server/brpc/paraformerCPP/webrtc/CMakeLists.txt b/runtime/engine/asr/server/brpc/paraformerCPP/webrtc/CMakeLists.txt new file mode 100644 index 00000000..51812eb7 --- /dev/null +++ b/runtime/engine/asr/server/brpc/paraformerCPP/webrtc/CMakeLists.txt @@ -0,0 +1,16 @@ + + +if(WIN32) + add_definitions(-DWEBRTC_WIN) +else() + add_definitions(-DWEBRTC_POSIX) +endif() + + +include_directories("..") + +file(GLOB_RECURSE files "*.c" "rtc_base/checks.cc") + +message("${files}") + +add_library(webrtcvad ${files}) diff --git a/runtime/engine/asr/server/brpc/paraformerCPP/webrtc/common_audio/signal_processing/complex_bit_reverse.c b/runtime/engine/asr/server/brpc/paraformerCPP/webrtc/common_audio/signal_processing/complex_bit_reverse.c new file mode 100644 index 00000000..c8bd2dc4 --- /dev/null +++ b/runtime/engine/asr/server/brpc/paraformerCPP/webrtc/common_audio/signal_processing/complex_bit_reverse.c @@ -0,0 +1,108 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h" + +/* Tables for data buffer indexes that are bit reversed and thus need to be + * swapped. Note that, index_7[{0, 2, 4, ...}] are for the left side of the swap + * operations, while index_7[{1, 3, 5, ...}] are for the right side of the + * operation. Same for index_8. + */ + +/* Indexes for the case of stages == 7. */ +static const int16_t index_7[112] = { + 1, 64, 2, 32, 3, 96, 4, 16, 5, 80, 6, 48, 7, 112, 9, 72, 10, 40, 11, 104, + 12, 24, 13, 88, 14, 56, 15, 120, 17, 68, 18, 36, 19, 100, 21, 84, 22, 52, + 23, 116, 25, 76, 26, 44, 27, 108, 29, 92, 30, 60, 31, 124, 33, 66, 35, 98, + 37, 82, 38, 50, 39, 114, 41, 74, 43, 106, 45, 90, 46, 58, 47, 122, 49, 70, + 51, 102, 53, 86, 55, 118, 57, 78, 59, 110, 61, 94, 63, 126, 67, 97, 69, + 81, 71, 113, 75, 105, 77, 89, 79, 121, 83, 101, 87, 117, 91, 109, 95, 125, + 103, 115, 111, 123 +}; + +/* Indexes for the case of stages == 8. */ +static const int16_t index_8[240] = { + 1, 128, 2, 64, 3, 192, 4, 32, 5, 160, 6, 96, 7, 224, 8, 16, 9, 144, 10, 80, + 11, 208, 12, 48, 13, 176, 14, 112, 15, 240, 17, 136, 18, 72, 19, 200, 20, + 40, 21, 168, 22, 104, 23, 232, 25, 152, 26, 88, 27, 216, 28, 56, 29, 184, + 30, 120, 31, 248, 33, 132, 34, 68, 35, 196, 37, 164, 38, 100, 39, 228, 41, + 148, 42, 84, 43, 212, 44, 52, 45, 180, 46, 116, 47, 244, 49, 140, 50, 76, + 51, 204, 53, 172, 54, 108, 55, 236, 57, 156, 58, 92, 59, 220, 61, 188, 62, + 124, 63, 252, 65, 130, 67, 194, 69, 162, 70, 98, 71, 226, 73, 146, 74, 82, + 75, 210, 77, 178, 78, 114, 79, 242, 81, 138, 83, 202, 85, 170, 86, 106, 87, + 234, 89, 154, 91, 218, 93, 186, 94, 122, 95, 250, 97, 134, 99, 198, 101, + 166, 103, 230, 105, 150, 107, 214, 109, 182, 110, 118, 111, 246, 113, 142, + 115, 206, 117, 174, 119, 238, 121, 158, 123, 222, 125, 190, 127, 254, 131, + 193, 133, 161, 135, 225, 137, 145, 139, 209, 141, 177, 143, 241, 147, 201, + 149, 169, 151, 233, 155, 217, 157, 185, 159, 249, 163, 197, 167, 229, 171, + 213, 173, 181, 175, 245, 179, 205, 183, 237, 187, 221, 191, 253, 199, 227, + 203, 211, 207, 243, 215, 235, 223, 251, 239, 247 +}; + +void WebRtcSpl_ComplexBitReverse(int16_t* __restrict complex_data, int stages) { + /* For any specific value of stages, we know exactly the indexes that are + * bit reversed. Currently (Feb. 2012) in WebRTC the only possible values of + * stages are 7 and 8, so we use tables to save unnecessary iterations and + * calculations for these two cases. + */ + if (stages == 7 || stages == 8) { + int m = 0; + int length = 112; + const int16_t* index = index_7; + + if (stages == 8) { + length = 240; + index = index_8; + } + + /* Decimation in time. Swap the elements with bit-reversed indexes. */ + for (m = 0; m < length; m += 2) { + /* We declare a int32_t* type pointer, to load both the 16-bit real + * and imaginary elements from complex_data in one instruction, reducing + * complexity. + */ + int32_t* complex_data_ptr = (int32_t*)complex_data; + int32_t temp = 0; + + temp = complex_data_ptr[index[m]]; /* Real and imaginary */ + complex_data_ptr[index[m]] = complex_data_ptr[index[m + 1]]; + complex_data_ptr[index[m + 1]] = temp; + } + } + else { + int m = 0, mr = 0, l = 0; + int n = 1 << stages; + int nn = n - 1; + + /* Decimation in time - re-order data */ + for (m = 1; m <= nn; ++m) { + int32_t* complex_data_ptr = (int32_t*)complex_data; + int32_t temp = 0; + + /* Find out indexes that are bit-reversed. */ + l = n; + do { + l >>= 1; + } while (l > nn - mr); + mr = (mr & (l - 1)) + l; + + if (mr <= m) { + continue; + } + + /* Swap the elements with bit-reversed indexes. + * This is similar to the loop in the stages == 7 or 8 cases. + */ + temp = complex_data_ptr[m]; /* Real and imaginary */ + complex_data_ptr[m] = complex_data_ptr[mr]; + complex_data_ptr[mr] = temp; + } + } +} diff --git a/runtime/engine/asr/server/brpc/paraformerCPP/webrtc/common_audio/signal_processing/complex_fft.c b/runtime/engine/asr/server/brpc/paraformerCPP/webrtc/common_audio/signal_processing/complex_fft.c new file mode 100644 index 00000000..32808723 --- /dev/null +++ b/runtime/engine/asr/server/brpc/paraformerCPP/webrtc/common_audio/signal_processing/complex_fft.c @@ -0,0 +1,299 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + + +/* + * This file contains the function WebRtcSpl_ComplexFFT(). + * The description header can be found in signal_processing_library.h + * + */ + +#include "webrtc/common_audio/signal_processing/complex_fft_tables.h" +#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h" +#include "webrtc/rtc_base/system/arch.h" + +#define CFFTSFT 14 +#define CFFTRND 1 +#define CFFTRND2 16384 + +#define CIFFTSFT 14 +#define CIFFTRND 1 + + +int WebRtcSpl_ComplexFFT(int16_t frfi[], int stages, int mode) +{ + int i, j, l, k, istep, n, m; + int16_t wr, wi; + int32_t tr32, ti32, qr32, qi32; + + /* The 1024-value is a constant given from the size of kSinTable1024[], + * and should not be changed depending on the input parameter 'stages' + */ + n = 1 << stages; + if (n > 1024) + return -1; + + l = 1; + k = 10 - 1; /* Constant for given kSinTable1024[]. Do not change + depending on the input parameter 'stages' */ + + if (mode == 0) + { + // mode==0: Low-complexity and Low-accuracy mode + while (l < n) + { + istep = l << 1; + + for (m = 0; m < l; ++m) + { + j = m << k; + + /* The 256-value is a constant given as 1/4 of the size of + * kSinTable1024[], and should not be changed depending on the input + * parameter 'stages'. It will result in 0 <= j < N_SINE_WAVE/2 + */ + wr = kSinTable1024[j + 256]; + wi = -kSinTable1024[j]; + + for (i = m; i < n; i += istep) + { + j = i + l; + + tr32 = (wr * frfi[2 * j] - wi * frfi[2 * j + 1]) >> 15; + + ti32 = (wr * frfi[2 * j + 1] + wi * frfi[2 * j]) >> 15; + + qr32 = (int32_t)frfi[2 * i]; + qi32 = (int32_t)frfi[2 * i + 1]; + frfi[2 * j] = (int16_t)((qr32 - tr32) >> 1); + frfi[2 * j + 1] = (int16_t)((qi32 - ti32) >> 1); + frfi[2 * i] = (int16_t)((qr32 + tr32) >> 1); + frfi[2 * i + 1] = (int16_t)((qi32 + ti32) >> 1); + } + } + + --k; + l = istep; + + } + + } else + { + // mode==1: High-complexity and High-accuracy mode + while (l < n) + { + istep = l << 1; + + for (m = 0; m < l; ++m) + { + j = m << k; + + /* The 256-value is a constant given as 1/4 of the size of + * kSinTable1024[], and should not be changed depending on the input + * parameter 'stages'. It will result in 0 <= j < N_SINE_WAVE/2 + */ + wr = kSinTable1024[j + 256]; + wi = -kSinTable1024[j]; + +#ifdef WEBRTC_ARCH_ARM_V7 + int32_t wri = 0; + __asm __volatile("pkhbt %0, %1, %2, lsl #16" : "=r"(wri) : + "r"((int32_t)wr), "r"((int32_t)wi)); +#endif + + for (i = m; i < n; i += istep) + { + j = i + l; + +#ifdef WEBRTC_ARCH_ARM_V7 + register int32_t frfi_r; + __asm __volatile( + "pkhbt %[frfi_r], %[frfi_even], %[frfi_odd]," + " lsl #16\n\t" + "smlsd %[tr32], %[wri], %[frfi_r], %[cfftrnd]\n\t" + "smladx %[ti32], %[wri], %[frfi_r], %[cfftrnd]\n\t" + :[frfi_r]"=&r"(frfi_r), + [tr32]"=&r"(tr32), + [ti32]"=r"(ti32) + :[frfi_even]"r"((int32_t)frfi[2*j]), + [frfi_odd]"r"((int32_t)frfi[2*j +1]), + [wri]"r"(wri), + [cfftrnd]"r"(CFFTRND)); +#else + tr32 = wr * frfi[2 * j] - wi * frfi[2 * j + 1] + CFFTRND; + + ti32 = wr * frfi[2 * j + 1] + wi * frfi[2 * j] + CFFTRND; +#endif + + tr32 >>= 15 - CFFTSFT; + ti32 >>= 15 - CFFTSFT; + + qr32 = ((int32_t)frfi[2 * i]) * (1 << CFFTSFT); + qi32 = ((int32_t)frfi[2 * i + 1]) * (1 << CFFTSFT); + + frfi[2 * j] = (int16_t)( + (qr32 - tr32 + CFFTRND2) >> (1 + CFFTSFT)); + frfi[2 * j + 1] = (int16_t)( + (qi32 - ti32 + CFFTRND2) >> (1 + CFFTSFT)); + frfi[2 * i] = (int16_t)( + (qr32 + tr32 + CFFTRND2) >> (1 + CFFTSFT)); + frfi[2 * i + 1] = (int16_t)( + (qi32 + ti32 + CFFTRND2) >> (1 + CFFTSFT)); + } + } + + --k; + l = istep; + } + } + return 0; +} + +int WebRtcSpl_ComplexIFFT(int16_t frfi[], int stages, int mode) +{ + size_t i, j, l, istep, n, m; + int k, scale, shift; + int16_t wr, wi; + int32_t tr32, ti32, qr32, qi32; + int32_t tmp32, round2; + + /* The 1024-value is a constant given from the size of kSinTable1024[], + * and should not be changed depending on the input parameter 'stages' + */ + n = ((size_t)1) << stages; + if (n > 1024) + return -1; + + scale = 0; + + l = 1; + k = 10 - 1; /* Constant for given kSinTable1024[]. Do not change + depending on the input parameter 'stages' */ + + while (l < n) + { + // variable scaling, depending upon data + shift = 0; + round2 = 8192; + + tmp32 = WebRtcSpl_MaxAbsValueW16(frfi, 2 * n); + if (tmp32 > 13573) + { + shift++; + scale++; + round2 <<= 1; + } + if (tmp32 > 27146) + { + shift++; + scale++; + round2 <<= 1; + } + + istep = l << 1; + + if (mode == 0) + { + // mode==0: Low-complexity and Low-accuracy mode + for (m = 0; m < l; ++m) + { + j = m << k; + + /* The 256-value is a constant given as 1/4 of the size of + * kSinTable1024[], and should not be changed depending on the input + * parameter 'stages'. It will result in 0 <= j < N_SINE_WAVE/2 + */ + wr = kSinTable1024[j + 256]; + wi = kSinTable1024[j]; + + for (i = m; i < n; i += istep) + { + j = i + l; + + tr32 = (wr * frfi[2 * j] - wi * frfi[2 * j + 1]) >> 15; + + ti32 = (wr * frfi[2 * j + 1] + wi * frfi[2 * j]) >> 15; + + qr32 = (int32_t)frfi[2 * i]; + qi32 = (int32_t)frfi[2 * i + 1]; + frfi[2 * j] = (int16_t)((qr32 - tr32) >> shift); + frfi[2 * j + 1] = (int16_t)((qi32 - ti32) >> shift); + frfi[2 * i] = (int16_t)((qr32 + tr32) >> shift); + frfi[2 * i + 1] = (int16_t)((qi32 + ti32) >> shift); + } + } + } else + { + // mode==1: High-complexity and High-accuracy mode + + for (m = 0; m < l; ++m) + { + j = m << k; + + /* The 256-value is a constant given as 1/4 of the size of + * kSinTable1024[], and should not be changed depending on the input + * parameter 'stages'. It will result in 0 <= j < N_SINE_WAVE/2 + */ + wr = kSinTable1024[j + 256]; + wi = kSinTable1024[j]; + +#ifdef WEBRTC_ARCH_ARM_V7 + int32_t wri = 0; + __asm __volatile("pkhbt %0, %1, %2, lsl #16" : "=r"(wri) : + "r"((int32_t)wr), "r"((int32_t)wi)); +#endif + + for (i = m; i < n; i += istep) + { + j = i + l; + +#ifdef WEBRTC_ARCH_ARM_V7 + register int32_t frfi_r; + __asm __volatile( + "pkhbt %[frfi_r], %[frfi_even], %[frfi_odd], lsl #16\n\t" + "smlsd %[tr32], %[wri], %[frfi_r], %[cifftrnd]\n\t" + "smladx %[ti32], %[wri], %[frfi_r], %[cifftrnd]\n\t" + :[frfi_r]"=&r"(frfi_r), + [tr32]"=&r"(tr32), + [ti32]"=r"(ti32) + :[frfi_even]"r"((int32_t)frfi[2*j]), + [frfi_odd]"r"((int32_t)frfi[2*j +1]), + [wri]"r"(wri), + [cifftrnd]"r"(CIFFTRND) + ); +#else + + tr32 = wr * frfi[2 * j] - wi * frfi[2 * j + 1] + CIFFTRND; + + ti32 = wr * frfi[2 * j + 1] + wi * frfi[2 * j] + CIFFTRND; +#endif + tr32 >>= 15 - CIFFTSFT; + ti32 >>= 15 - CIFFTSFT; + + qr32 = ((int32_t)frfi[2 * i]) * (1 << CIFFTSFT); + qi32 = ((int32_t)frfi[2 * i + 1]) * (1 << CIFFTSFT); + + frfi[2 * j] = (int16_t)( + (qr32 - tr32 + round2) >> (shift + CIFFTSFT)); + frfi[2 * j + 1] = (int16_t)( + (qi32 - ti32 + round2) >> (shift + CIFFTSFT)); + frfi[2 * i] = (int16_t)( + (qr32 + tr32 + round2) >> (shift + CIFFTSFT)); + frfi[2 * i + 1] = (int16_t)( + (qi32 + ti32 + round2) >> (shift + CIFFTSFT)); + } + } + + } + --k; + l = istep; + } + return scale; +} diff --git a/runtime/engine/asr/server/brpc/paraformerCPP/webrtc/common_audio/signal_processing/complex_fft_tables.h b/runtime/engine/asr/server/brpc/paraformerCPP/webrtc/common_audio/signal_processing/complex_fft_tables.h new file mode 100644 index 00000000..90fac072 --- /dev/null +++ b/runtime/engine/asr/server/brpc/paraformerCPP/webrtc/common_audio/signal_processing/complex_fft_tables.h @@ -0,0 +1,132 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef COMMON_AUDIO_SIGNAL_PROCESSING_COMPLEX_FFT_TABLES_H_ +#define COMMON_AUDIO_SIGNAL_PROCESSING_COMPLEX_FFT_TABLES_H_ + +#include + +static const int16_t kSinTable1024[] = { + 0, 201, 402, 603, 804, 1005, 1206, 1406, 1607, + 1808, 2009, 2209, 2410, 2610, 2811, 3011, 3211, 3411, + 3611, 3811, 4011, 4210, 4409, 4608, 4807, 5006, 5205, + 5403, 5601, 5799, 5997, 6195, 6392, 6589, 6786, 6982, + 7179, 7375, 7571, 7766, 7961, 8156, 8351, 8545, 8739, + 8932, 9126, 9319, 9511, 9703, 9895, 10087, 10278, 10469, + 10659, 10849, 11038, 11227, 11416, 11604, 11792, 11980, 12166, + 12353, 12539, 12724, 12909, 13094, 13278, 13462, 13645, 13827, + 14009, 14191, 14372, 14552, 14732, 14911, 15090, 15268, 15446, + 15623, 15799, 15975, 16150, 16325, 16499, 16672, 16845, 17017, + 17189, 17360, 17530, 17699, 17868, 18036, 18204, 18371, 18537, + 18702, 18867, 19031, 19194, 19357, 19519, 19680, 19840, 20000, + 20159, 20317, 20474, 20631, 20787, 20942, 21096, 21249, 21402, + 21554, 21705, 21855, 22004, 22153, 22301, 22448, 22594, 22739, + 22883, 23027, 23169, 23311, 23452, 23592, 23731, 23869, 24006, + 24143, 24278, 24413, 24546, 24679, 24811, 24942, 25072, 25201, + 25329, 25456, 25582, 25707, 25831, 25954, 26077, 26198, 26318, + 26437, 26556, 26673, 26789, 26905, 27019, 27132, 27244, 27355, + 27466, 27575, 27683, 27790, 27896, 28001, 28105, 28208, 28309, + 28410, 28510, 28608, 28706, 28802, 28897, 28992, 29085, 29177, + 29268, 29358, 29446, 29534, 29621, 29706, 29790, 29873, 29955, + 30036, 30116, 30195, 30272, 30349, 30424, 30498, 30571, 30643, + 30713, 30783, 30851, 30918, 30984, 31049, 31113, 31175, 31236, + 31297, 31356, 31413, 31470, 31525, 31580, 31633, 31684, 31735, + 31785, 31833, 31880, 31926, 31970, 32014, 32056, 32097, 32137, + 32176, 32213, 32249, 32284, 32318, 32350, 32382, 32412, 32441, + 32468, 32495, 32520, 32544, 32567, 32588, 32609, 32628, 32646, + 32662, 32678, 32692, 32705, 32717, 32727, 32736, 32744, 32751, + 32757, 32761, 32764, 32766, 32767, 32766, 32764, 32761, 32757, + 32751, 32744, 32736, 32727, 32717, 32705, 32692, 32678, 32662, + 32646, 32628, 32609, 32588, 32567, 32544, 32520, 32495, 32468, + 32441, 32412, 32382, 32350, 32318, 32284, 32249, 32213, 32176, + 32137, 32097, 32056, 32014, 31970, 31926, 31880, 31833, 31785, + 31735, 31684, 31633, 31580, 31525, 31470, 31413, 31356, 31297, + 31236, 31175, 31113, 31049, 30984, 30918, 30851, 30783, 30713, + 30643, 30571, 30498, 30424, 30349, 30272, 30195, 30116, 30036, + 29955, 29873, 29790, 29706, 29621, 29534, 29446, 29358, 29268, + 29177, 29085, 28992, 28897, 28802, 28706, 28608, 28510, 28410, + 28309, 28208, 28105, 28001, 27896, 27790, 27683, 27575, 27466, + 27355, 27244, 27132, 27019, 26905, 26789, 26673, 26556, 26437, + 26318, 26198, 26077, 25954, 25831, 25707, 25582, 25456, 25329, + 25201, 25072, 24942, 24811, 24679, 24546, 24413, 24278, 24143, + 24006, 23869, 23731, 23592, 23452, 23311, 23169, 23027, 22883, + 22739, 22594, 22448, 22301, 22153, 22004, 21855, 21705, 21554, + 21402, 21249, 21096, 20942, 20787, 20631, 20474, 20317, 20159, + 20000, 19840, 19680, 19519, 19357, 19194, 19031, 18867, 18702, + 18537, 18371, 18204, 18036, 17868, 17699, 17530, 17360, 17189, + 17017, 16845, 16672, 16499, 16325, 16150, 15975, 15799, 15623, + 15446, 15268, 15090, 14911, 14732, 14552, 14372, 14191, 14009, + 13827, 13645, 13462, 13278, 13094, 12909, 12724, 12539, 12353, + 12166, 11980, 11792, 11604, 11416, 11227, 11038, 10849, 10659, + 10469, 10278, 10087, 9895, 9703, 9511, 9319, 9126, 8932, + 8739, 8545, 8351, 8156, 7961, 7766, 7571, 7375, 7179, + 6982, 6786, 6589, 6392, 6195, 5997, 5799, 5601, 5403, + 5205, 5006, 4807, 4608, 4409, 4210, 4011, 3811, 3611, + 3411, 3211, 3011, 2811, 2610, 2410, 2209, 2009, 1808, + 1607, 1406, 1206, 1005, 804, 603, 402, 201, 0, + -201, -402, -603, -804, -1005, -1206, -1406, -1607, -1808, + -2009, -2209, -2410, -2610, -2811, -3011, -3211, -3411, -3611, + -3811, -4011, -4210, -4409, -4608, -4807, -5006, -5205, -5403, + -5601, -5799, -5997, -6195, -6392, -6589, -6786, -6982, -7179, + -7375, -7571, -7766, -7961, -8156, -8351, -8545, -8739, -8932, + -9126, -9319, -9511, -9703, -9895, -10087, -10278, -10469, -10659, + -10849, -11038, -11227, -11416, -11604, -11792, -11980, -12166, -12353, + -12539, -12724, -12909, -13094, -13278, -13462, -13645, -13827, -14009, + -14191, -14372, -14552, -14732, -14911, -15090, -15268, -15446, -15623, + -15799, -15975, -16150, -16325, -16499, -16672, -16845, -17017, -17189, + -17360, -17530, -17699, -17868, -18036, -18204, -18371, -18537, -18702, + -18867, -19031, -19194, -19357, -19519, -19680, -19840, -20000, -20159, + -20317, -20474, -20631, -20787, -20942, -21096, -21249, -21402, -21554, + -21705, -21855, -22004, -22153, -22301, -22448, -22594, -22739, -22883, + -23027, -23169, -23311, -23452, -23592, -23731, -23869, -24006, -24143, + -24278, -24413, -24546, -24679, -24811, -24942, -25072, -25201, -25329, + -25456, -25582, -25707, -25831, -25954, -26077, -26198, -26318, -26437, + -26556, -26673, -26789, -26905, -27019, -27132, -27244, -27355, -27466, + -27575, -27683, -27790, -27896, -28001, -28105, -28208, -28309, -28410, + -28510, -28608, -28706, -28802, -28897, -28992, -29085, -29177, -29268, + -29358, -29446, -29534, -29621, -29706, -29790, -29873, -29955, -30036, + -30116, -30195, -30272, -30349, -30424, -30498, -30571, -30643, -30713, + -30783, -30851, -30918, -30984, -31049, -31113, -31175, -31236, -31297, + -31356, -31413, -31470, -31525, -31580, -31633, -31684, -31735, -31785, + -31833, -31880, -31926, -31970, -32014, -32056, -32097, -32137, -32176, + -32213, -32249, -32284, -32318, -32350, -32382, -32412, -32441, -32468, + -32495, -32520, -32544, -32567, -32588, -32609, -32628, -32646, -32662, + -32678, -32692, -32705, -32717, -32727, -32736, -32744, -32751, -32757, + -32761, -32764, -32766, -32767, -32766, -32764, -32761, -32757, -32751, + -32744, -32736, -32727, -32717, -32705, -32692, -32678, -32662, -32646, + -32628, -32609, -32588, -32567, -32544, -32520, -32495, -32468, -32441, + -32412, -32382, -32350, -32318, -32284, -32249, -32213, -32176, -32137, + -32097, -32056, -32014, -31970, -31926, -31880, -31833, -31785, -31735, + -31684, -31633, -31580, -31525, -31470, -31413, -31356, -31297, -31236, + -31175, -31113, -31049, -30984, -30918, -30851, -30783, -30713, -30643, + -30571, -30498, -30424, -30349, -30272, -30195, -30116, -30036, -29955, + -29873, -29790, -29706, -29621, -29534, -29446, -29358, -29268, -29177, + -29085, -28992, -28897, -28802, -28706, -28608, -28510, -28410, -28309, + -28208, -28105, -28001, -27896, -27790, -27683, -27575, -27466, -27355, + -27244, -27132, -27019, -26905, -26789, -26673, -26556, -26437, -26318, + -26198, -26077, -25954, -25831, -25707, -25582, -25456, -25329, -25201, + -25072, -24942, -24811, -24679, -24546, -24413, -24278, -24143, -24006, + -23869, -23731, -23592, -23452, -23311, -23169, -23027, -22883, -22739, + -22594, -22448, -22301, -22153, -22004, -21855, -21705, -21554, -21402, + -21249, -21096, -20942, -20787, -20631, -20474, -20317, -20159, -20000, + -19840, -19680, -19519, -19357, -19194, -19031, -18867, -18702, -18537, + -18371, -18204, -18036, -17868, -17699, -17530, -17360, -17189, -17017, + -16845, -16672, -16499, -16325, -16150, -15975, -15799, -15623, -15446, + -15268, -15090, -14911, -14732, -14552, -14372, -14191, -14009, -13827, + -13645, -13462, -13278, -13094, -12909, -12724, -12539, -12353, -12166, + -11980, -11792, -11604, -11416, -11227, -11038, -10849, -10659, -10469, + -10278, -10087, -9895, -9703, -9511, -9319, -9126, -8932, -8739, + -8545, -8351, -8156, -7961, -7766, -7571, -7375, -7179, -6982, + -6786, -6589, -6392, -6195, -5997, -5799, -5601, -5403, -5205, + -5006, -4807, -4608, -4409, -4210, -4011, -3811, -3611, -3411, + -3211, -3011, -2811, -2610, -2410, -2209, -2009, -1808, -1607, + -1406, -1206, -1005, -804, -603, -402, -201}; + +#endif // COMMON_AUDIO_SIGNAL_PROCESSING_COMPLEX_FFT_TABLES_H_ diff --git a/runtime/engine/asr/server/brpc/paraformerCPP/webrtc/common_audio/signal_processing/cross_correlation.c b/runtime/engine/asr/server/brpc/paraformerCPP/webrtc/common_audio/signal_processing/cross_correlation.c new file mode 100644 index 00000000..d7c9f2b9 --- /dev/null +++ b/runtime/engine/asr/server/brpc/paraformerCPP/webrtc/common_audio/signal_processing/cross_correlation.c @@ -0,0 +1,30 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h" + +/* C version of WebRtcSpl_CrossCorrelation() for generic platforms. */ +void WebRtcSpl_CrossCorrelationC(int32_t* cross_correlation, + const int16_t* seq1, + const int16_t* seq2, + size_t dim_seq, + size_t dim_cross_correlation, + int right_shifts, + int step_seq2) { + size_t i = 0, j = 0; + + for (i = 0; i < dim_cross_correlation; i++) { + int32_t corr = 0; + for (j = 0; j < dim_seq; j++) + corr += (seq1[j] * seq2[j]) >> right_shifts; + seq2 += step_seq2; + *cross_correlation++ = corr; + } +} diff --git a/runtime/engine/asr/server/brpc/paraformerCPP/webrtc/common_audio/signal_processing/division_operations.c b/runtime/engine/asr/server/brpc/paraformerCPP/webrtc/common_audio/signal_processing/division_operations.c new file mode 100644 index 00000000..2d420525 --- /dev/null +++ b/runtime/engine/asr/server/brpc/paraformerCPP/webrtc/common_audio/signal_processing/division_operations.c @@ -0,0 +1,141 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + + +/* + * This file contains implementations of the divisions + * WebRtcSpl_DivU32U16() + * WebRtcSpl_DivW32W16() + * WebRtcSpl_DivW32W16ResW16() + * WebRtcSpl_DivResultInQ31() + * WebRtcSpl_DivW32HiLow() + * + * The description header can be found in signal_processing_library.h + * + */ + +#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h" +#include "webrtc/rtc_base/sanitizer.h" + +uint32_t WebRtcSpl_DivU32U16(uint32_t num, uint16_t den) +{ + // Guard against division with 0 + if (den != 0) + { + return (uint32_t)(num / den); + } else + { + return (uint32_t)0xFFFFFFFF; + } +} + +int32_t WebRtcSpl_DivW32W16(int32_t num, int16_t den) +{ + // Guard against division with 0 + if (den != 0) + { + return (int32_t)(num / den); + } else + { + return (int32_t)0x7FFFFFFF; + } +} + +int16_t WebRtcSpl_DivW32W16ResW16(int32_t num, int16_t den) +{ + // Guard against division with 0 + if (den != 0) + { + return (int16_t)(num / den); + } else + { + return (int16_t)0x7FFF; + } +} + +int32_t WebRtcSpl_DivResultInQ31(int32_t num, int32_t den) +{ + int32_t L_num = num; + int32_t L_den = den; + int32_t div = 0; + int k = 31; + int change_sign = 0; + + if (num == 0) + return 0; + + if (num < 0) + { + change_sign++; + L_num = -num; + } + if (den < 0) + { + change_sign++; + L_den = -den; + } + while (k--) + { + div <<= 1; + L_num <<= 1; + if (L_num >= L_den) + { + L_num -= L_den; + div++; + } + } + if (change_sign == 1) + { + div = -div; + } + return div; +} + +int32_t RTC_NO_SANITIZE("signed-integer-overflow") // bugs.webrtc.org/5486 +WebRtcSpl_DivW32HiLow(int32_t num, int16_t den_hi, int16_t den_low) +{ + int16_t approx, tmp_hi, tmp_low, num_hi, num_low; + int32_t tmpW32; + + approx = (int16_t)WebRtcSpl_DivW32W16((int32_t)0x1FFFFFFF, den_hi); + // result in Q14 (Note: 3FFFFFFF = 0.5 in Q30) + + // tmpW32 = 1/den = approx * (2.0 - den * approx) (in Q30) + tmpW32 = (den_hi * approx << 1) + ((den_low * approx >> 15) << 1); + // tmpW32 = den * approx + + tmpW32 = (int32_t)0x7fffffffL - tmpW32; // result in Q30 (tmpW32 = 2.0-(den*approx)) + // UBSan: 2147483647 - -2 cannot be represented in type 'int' + + // Store tmpW32 in hi and low format + tmp_hi = (int16_t)(tmpW32 >> 16); + tmp_low = (int16_t)((tmpW32 - ((int32_t)tmp_hi << 16)) >> 1); + + // tmpW32 = 1/den in Q29 + tmpW32 = (tmp_hi * approx + (tmp_low * approx >> 15)) << 1; + + // 1/den in hi and low format + tmp_hi = (int16_t)(tmpW32 >> 16); + tmp_low = (int16_t)((tmpW32 - ((int32_t)tmp_hi << 16)) >> 1); + + // Store num in hi and low format + num_hi = (int16_t)(num >> 16); + num_low = (int16_t)((num - ((int32_t)num_hi << 16)) >> 1); + + // num * (1/den) by 32 bit multiplication (result in Q28) + + tmpW32 = num_hi * tmp_hi + (num_hi * tmp_low >> 15) + + (num_low * tmp_hi >> 15); + + // Put result in Q31 (convert from Q28) + tmpW32 = WEBRTC_SPL_LSHIFT_W32(tmpW32, 3); + + return tmpW32; +} diff --git a/runtime/engine/asr/server/brpc/paraformerCPP/webrtc/common_audio/signal_processing/dot_product_with_scale.cc b/runtime/engine/asr/server/brpc/paraformerCPP/webrtc/common_audio/signal_processing/dot_product_with_scale.cc new file mode 100644 index 00000000..d9661af1 --- /dev/null +++ b/runtime/engine/asr/server/brpc/paraformerCPP/webrtc/common_audio/signal_processing/dot_product_with_scale.cc @@ -0,0 +1,34 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "webrtc/common_audio/signal_processing/dot_product_with_scale.h" + +#include "webrtc/rtc_base/numerics/safe_conversions.h" + +int32_t WebRtcSpl_DotProductWithScale(const int16_t* vector1, + const int16_t* vector2, + size_t length, + int scaling) { + int64_t sum = 0; + size_t i = 0; + + /* Unroll the loop to improve performance. */ + for (i = 0; i + 3 < length; i += 4) { + sum += (vector1[i + 0] * vector2[i + 0]) >> scaling; + sum += (vector1[i + 1] * vector2[i + 1]) >> scaling; + sum += (vector1[i + 2] * vector2[i + 2]) >> scaling; + sum += (vector1[i + 3] * vector2[i + 3]) >> scaling; + } + for (; i < length; i++) { + sum += (vector1[i] * vector2[i]) >> scaling; + } + + return rtc::saturated_cast(sum); +} diff --git a/runtime/engine/asr/server/brpc/paraformerCPP/webrtc/common_audio/signal_processing/dot_product_with_scale.h b/runtime/engine/asr/server/brpc/paraformerCPP/webrtc/common_audio/signal_processing/dot_product_with_scale.h new file mode 100644 index 00000000..bb892d40 --- /dev/null +++ b/runtime/engine/asr/server/brpc/paraformerCPP/webrtc/common_audio/signal_processing/dot_product_with_scale.h @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef COMMON_AUDIO_SIGNAL_PROCESSING_DOT_PRODUCT_WITH_SCALE_H_ +#define COMMON_AUDIO_SIGNAL_PROCESSING_DOT_PRODUCT_WITH_SCALE_H_ + +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +// Calculates the dot product between two (int16_t) vectors. +// +// Input: +// - vector1 : Vector 1 +// - vector2 : Vector 2 +// - vector_length : Number of samples used in the dot product +// - scaling : The number of right bit shifts to apply on each term +// during calculation to avoid overflow, i.e., the +// output will be in Q(-|scaling|) +// +// Return value : The dot product in Q(-scaling) +int32_t WebRtcSpl_DotProductWithScale(const int16_t* vector1, + const int16_t* vector2, + size_t length, + int scaling); + +#ifdef __cplusplus +} +#endif // __cplusplus +#endif // COMMON_AUDIO_SIGNAL_PROCESSING_DOT_PRODUCT_WITH_SCALE_H_ diff --git a/runtime/engine/asr/server/brpc/paraformerCPP/webrtc/common_audio/signal_processing/downsample_fast.c b/runtime/engine/asr/server/brpc/paraformerCPP/webrtc/common_audio/signal_processing/downsample_fast.c new file mode 100644 index 00000000..e575861e --- /dev/null +++ b/runtime/engine/asr/server/brpc/paraformerCPP/webrtc/common_audio/signal_processing/downsample_fast.c @@ -0,0 +1,65 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h" + +#include "webrtc/rtc_base/checks.h" +#include "webrtc/rtc_base/sanitizer.h" + +// TODO(Bjornv): Change the function parameter order to WebRTC code style. +// C version of WebRtcSpl_DownsampleFast() for generic platforms. +int WebRtcSpl_DownsampleFastC(const int16_t* data_in, + size_t data_in_length, + int16_t* data_out, + size_t data_out_length, + const int16_t* __restrict coefficients, + size_t coefficients_length, + int factor, + size_t delay) { + int16_t* const original_data_out = data_out; + size_t i = 0; + size_t j = 0; + int32_t out_s32 = 0; + size_t endpos = delay + factor * (data_out_length - 1) + 1; + + // Return error if any of the running conditions doesn't meet. + if (data_out_length == 0 || coefficients_length == 0 + || data_in_length < endpos) { + return -1; + } + + rtc_MsanCheckInitialized(coefficients, sizeof(coefficients[0]), + coefficients_length); + + for (i = delay; i < endpos; i += factor) { + out_s32 = 2048; // Round value, 0.5 in Q12. + + for (j = 0; j < coefficients_length; j++) { + // Negative overflow is permitted here, because this is + // auto-regressive filters, and the state for each batch run is + // stored in the "negative" positions of the output vector. + rtc_MsanCheckInitialized(&data_in[(ptrdiff_t) i - (ptrdiff_t) j], + sizeof(data_in[0]), 1); + // out_s32 is in Q12 domain. + out_s32 += coefficients[j] * data_in[(ptrdiff_t) i - (ptrdiff_t) j]; + } + + out_s32 >>= 12; // Q0. + + // Saturate and store the output. + *data_out++ = WebRtcSpl_SatW32ToW16(out_s32); + } + + RTC_DCHECK_EQ(original_data_out + data_out_length, data_out); + rtc_MsanCheckInitialized(original_data_out, sizeof(original_data_out[0]), + data_out_length); + + return 0; +} diff --git a/runtime/engine/asr/server/brpc/paraformerCPP/webrtc/common_audio/signal_processing/energy.c b/runtime/engine/asr/server/brpc/paraformerCPP/webrtc/common_audio/signal_processing/energy.c new file mode 100644 index 00000000..e83f1a69 --- /dev/null +++ b/runtime/engine/asr/server/brpc/paraformerCPP/webrtc/common_audio/signal_processing/energy.c @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + + +/* + * This file contains the function WebRtcSpl_Energy(). + * The description header can be found in signal_processing_library.h + * + */ + +#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h" + +int32_t WebRtcSpl_Energy(int16_t* vector, + size_t vector_length, + int* scale_factor) +{ + int32_t en = 0; + size_t i; + int scaling = + WebRtcSpl_GetScalingSquare(vector, vector_length, vector_length); + size_t looptimes = vector_length; + int16_t *vectorptr = vector; + + for (i = 0; i < looptimes; i++) + { + en += (*vectorptr * *vectorptr) >> scaling; + vectorptr++; + } + *scale_factor = scaling; + + return en; +} diff --git a/runtime/engine/asr/server/brpc/paraformerCPP/webrtc/common_audio/signal_processing/get_scaling_square.c b/runtime/engine/asr/server/brpc/paraformerCPP/webrtc/common_audio/signal_processing/get_scaling_square.c new file mode 100644 index 00000000..82e3c8b0 --- /dev/null +++ b/runtime/engine/asr/server/brpc/paraformerCPP/webrtc/common_audio/signal_processing/get_scaling_square.c @@ -0,0 +1,46 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + + +/* + * This file contains the function WebRtcSpl_GetScalingSquare(). + * The description header can be found in signal_processing_library.h + * + */ + +#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h" + +int16_t WebRtcSpl_GetScalingSquare(int16_t* in_vector, + size_t in_vector_length, + size_t times) +{ + int16_t nbits = WebRtcSpl_GetSizeInBits((uint32_t)times); + size_t i; + int16_t smax = -1; + int16_t sabs; + int16_t *sptr = in_vector; + int16_t t; + size_t looptimes = in_vector_length; + + for (i = looptimes; i > 0; i--) + { + sabs = (*sptr > 0 ? *sptr++ : -*sptr++); + smax = (sabs > smax ? sabs : smax); + } + t = WebRtcSpl_NormW32(WEBRTC_SPL_MUL(smax, smax)); + + if (smax == 0) + { + return 0; // Since norm(0) returns 0 + } else + { + return (t > nbits) ? 0 : nbits - t; + } +} diff --git a/runtime/engine/asr/server/brpc/paraformerCPP/webrtc/common_audio/signal_processing/include/real_fft.h b/runtime/engine/asr/server/brpc/paraformerCPP/webrtc/common_audio/signal_processing/include/real_fft.h new file mode 100644 index 00000000..84450667 --- /dev/null +++ b/runtime/engine/asr/server/brpc/paraformerCPP/webrtc/common_audio/signal_processing/include/real_fft.h @@ -0,0 +1,96 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef COMMON_AUDIO_SIGNAL_PROCESSING_INCLUDE_REAL_FFT_H_ +#define COMMON_AUDIO_SIGNAL_PROCESSING_INCLUDE_REAL_FFT_H_ + +#include + +// For ComplexFFT(), the maximum fft order is 10; +// WebRTC APM uses orders of only 7 and 8. +enum { kMaxFFTOrder = 10 }; + +struct RealFFT; + +#ifdef __cplusplus +extern "C" { +#endif + +struct RealFFT* WebRtcSpl_CreateRealFFT(int order); +void WebRtcSpl_FreeRealFFT(struct RealFFT* self); + +// Compute an FFT for a real-valued signal of length of 2^order, +// where 1 < order <= MAX_FFT_ORDER. Transform length is determined by the +// specification structure, which must be initialized prior to calling the FFT +// function with WebRtcSpl_CreateRealFFT(). +// The relationship between the input and output sequences can +// be expressed in terms of the DFT, i.e.: +// x[n] = (2^(-scalefactor)/N) . SUM[k=0,...,N-1] X[k].e^(jnk.2.pi/N) +// n=0,1,2,...N-1 +// N=2^order. +// The conjugate-symmetric output sequence is represented using a CCS vector, +// which is of length N+2, and is organized as follows: +// Index: 0 1 2 3 4 5 . . . N-2 N-1 N N+1 +// Component: R0 0 R1 I1 R2 I2 . . . R[N/2-1] I[N/2-1] R[N/2] 0 +// where R[n] and I[n], respectively, denote the real and imaginary components +// for FFT bin 'n'. Bins are numbered from 0 to N/2, where N is the FFT length. +// Bin index 0 corresponds to the DC component, and bin index N/2 corresponds to +// the foldover frequency. +// +// Input Arguments: +// self - pointer to preallocated and initialized FFT specification structure. +// real_data_in - the input signal. For an ARM Neon platform, it must be +// aligned on a 32-byte boundary. +// +// Output Arguments: +// complex_data_out - the output complex signal with (2^order + 2) 16-bit +// elements. For an ARM Neon platform, it must be different +// from real_data_in, and aligned on a 32-byte boundary. +// +// Return Value: +// 0 - FFT calculation is successful. +// -1 - Error with bad arguments (null pointers). +int WebRtcSpl_RealForwardFFT(struct RealFFT* self, + const int16_t* real_data_in, + int16_t* complex_data_out); + +// Compute the inverse FFT for a conjugate-symmetric input sequence of length of +// 2^order, where 1 < order <= MAX_FFT_ORDER. Transform length is determined by +// the specification structure, which must be initialized prior to calling the +// FFT function with WebRtcSpl_CreateRealFFT(). +// For a transform of length M, the input sequence is represented using a packed +// CCS vector of length M+2, which is explained in the comments for +// WebRtcSpl_RealForwardFFTC above. +// +// Input Arguments: +// self - pointer to preallocated and initialized FFT specification structure. +// complex_data_in - the input complex signal with (2^order + 2) 16-bit +// elements. For an ARM Neon platform, it must be aligned on +// a 32-byte boundary. +// +// Output Arguments: +// real_data_out - the output real signal. For an ARM Neon platform, it must +// be different to complex_data_in, and aligned on a 32-byte +// boundary. +// +// Return Value: +// 0 or a positive number - a value that the elements in the |real_data_out| +// should be shifted left with in order to get +// correct physical values. +// -1 - Error with bad arguments (null pointers). +int WebRtcSpl_RealInverseFFT(struct RealFFT* self, + const int16_t* complex_data_in, + int16_t* real_data_out); + +#ifdef __cplusplus +} +#endif + +#endif // COMMON_AUDIO_SIGNAL_PROCESSING_INCLUDE_REAL_FFT_H_ diff --git a/runtime/engine/asr/server/brpc/paraformerCPP/webrtc/common_audio/signal_processing/include/signal_processing_library.h b/runtime/engine/asr/server/brpc/paraformerCPP/webrtc/common_audio/signal_processing/include/signal_processing_library.h new file mode 100644 index 00000000..ccbb306d --- /dev/null +++ b/runtime/engine/asr/server/brpc/paraformerCPP/webrtc/common_audio/signal_processing/include/signal_processing_library.h @@ -0,0 +1,1612 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/* + * This header file includes all of the fix point signal processing library + * (SPL) function descriptions and declarations. For specific function calls, + * see bottom of file. + */ + +#ifndef COMMON_AUDIO_SIGNAL_PROCESSING_INCLUDE_SIGNAL_PROCESSING_LIBRARY_H_ +#define COMMON_AUDIO_SIGNAL_PROCESSING_INCLUDE_SIGNAL_PROCESSING_LIBRARY_H_ + +#include +#include "webrtc/common_audio/signal_processing/dot_product_with_scale.h" + +// Macros specific for the fixed point implementation +#define WEBRTC_SPL_WORD16_MAX 32767 +#define WEBRTC_SPL_WORD16_MIN -32768 +#define WEBRTC_SPL_WORD32_MAX (int32_t)0x7fffffff +#define WEBRTC_SPL_WORD32_MIN (int32_t)0x80000000 +#define WEBRTC_SPL_MAX_LPC_ORDER 14 +#define WEBRTC_SPL_MIN(A, B) (A < B ? A : B) // Get min value +#define WEBRTC_SPL_MAX(A, B) (A > B ? A : B) // Get max value +// TODO(kma/bjorn): For the next two macros, investigate how to correct the code +// for inputs of a = WEBRTC_SPL_WORD16_MIN or WEBRTC_SPL_WORD32_MIN. +#define WEBRTC_SPL_ABS_W16(a) (((int16_t)a >= 0) ? ((int16_t)a) : -((int16_t)a)) +#define WEBRTC_SPL_ABS_W32(a) (((int32_t)a >= 0) ? ((int32_t)a) : -((int32_t)a)) + +#define WEBRTC_SPL_MUL(a, b) ((int32_t)((int32_t)(a) * (int32_t)(b))) +#define WEBRTC_SPL_UMUL(a, b) ((uint32_t)((uint32_t)(a) * (uint32_t)(b))) +#define WEBRTC_SPL_UMUL_32_16(a, b) ((uint32_t)((uint32_t)(a) * (uint16_t)(b))) +#define WEBRTC_SPL_MUL_16_U16(a, b) ((int32_t)(int16_t)(a) * (uint16_t)(b)) + +// clang-format off +// clang-format would choose some identation +// leading to presubmit error (cpplint.py) +#ifndef WEBRTC_ARCH_ARM_V7 +// For ARMv7 platforms, these are inline functions in spl_inl_armv7.h +#ifndef MIPS32_LE +// For MIPS platforms, these are inline functions in spl_inl_mips.h +#define WEBRTC_SPL_MUL_16_16(a, b) ((int32_t)(((int16_t)(a)) * ((int16_t)(b)))) +#define WEBRTC_SPL_MUL_16_32_RSFT16(a, b) \ + (WEBRTC_SPL_MUL_16_16(a, b >> 16) + \ + ((WEBRTC_SPL_MUL_16_16(a, (b & 0xffff) >> 1) + 0x4000) >> 15)) +#endif +#endif + +#define WEBRTC_SPL_MUL_16_32_RSFT11(a, b) \ + (WEBRTC_SPL_MUL_16_16(a, (b) >> 16) * (1 << 5) + \ + (((WEBRTC_SPL_MUL_16_U16(a, (uint16_t)(b)) >> 1) + 0x0200) >> 10)) +#define WEBRTC_SPL_MUL_16_32_RSFT14(a, b) \ + (WEBRTC_SPL_MUL_16_16(a, (b) >> 16) * (1 << 2) + \ + (((WEBRTC_SPL_MUL_16_U16(a, (uint16_t)(b)) >> 1) + 0x1000) >> 13)) +#define WEBRTC_SPL_MUL_16_32_RSFT15(a, b) \ + ((WEBRTC_SPL_MUL_16_16(a, (b) >> 16) * (1 << 1)) + \ + (((WEBRTC_SPL_MUL_16_U16(a, (uint16_t)(b)) >> 1) + 0x2000) >> 14)) +// clang-format on + +#define WEBRTC_SPL_MUL_16_16_RSFT(a, b, c) (WEBRTC_SPL_MUL_16_16(a, b) >> (c)) + +#define WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(a, b, c) \ + ((WEBRTC_SPL_MUL_16_16(a, b) + ((int32_t)(((int32_t)1) << ((c)-1)))) >> (c)) + +// C + the 32 most significant bits of A * B +#define WEBRTC_SPL_SCALEDIFF32(A, B, C) \ + (C + (B >> 16) * A + (((uint32_t)(B & 0x0000FFFF) * A) >> 16)) + +#define WEBRTC_SPL_SAT(a, b, c) (b > a ? a : b < c ? c : b) + +// Shifting with negative numbers allowed +// Positive means left shift +#define WEBRTC_SPL_SHIFT_W32(x, c) ((c) >= 0 ? (x) * (1 << (c)) : (x) >> -(c)) + +// Shifting with negative numbers not allowed +// We cannot do casting here due to signed/unsigned problem +#define WEBRTC_SPL_LSHIFT_W32(x, c) ((x) << (c)) + +#define WEBRTC_SPL_RSHIFT_U32(x, c) ((uint32_t)(x) >> (c)) + +#define WEBRTC_SPL_RAND(a) ((int16_t)((((int16_t)a * 18816) >> 7) & 0x00007fff)) + +#ifdef __cplusplus +extern "C" { +#endif + +#define WEBRTC_SPL_MEMCPY_W16(v1, v2, length) \ + memcpy(v1, v2, (length) * sizeof(int16_t)) + +// inline functions: +#include "webrtc/common_audio/signal_processing/include/spl_inl.h" + +// third party math functions +#include "webrtc/common_audio/third_party/spl_sqrt_floor/spl_sqrt_floor.h" + +// Initialize SPL. Currently it contains only function pointer initialization. +// If the underlying platform is known to be ARM-Neon (WEBRTC_HAS_NEON defined), +// the pointers will be assigned to code optimized for Neon; otherwise, generic +// C code will be assigned. +// Note that this function MUST be called in any application that uses SPL +// functions. +void WebRtcSpl_Init(void); + +int16_t WebRtcSpl_GetScalingSquare(int16_t* in_vector, + size_t in_vector_length, + size_t times); + +// Copy and set operations. Implementation in copy_set_operations.c. +// Descriptions at bottom of file. +void WebRtcSpl_MemSetW16(int16_t* vector, + int16_t set_value, + size_t vector_length); +void WebRtcSpl_MemSetW32(int32_t* vector, + int32_t set_value, + size_t vector_length); +void WebRtcSpl_MemCpyReversedOrder(int16_t* out_vector, + int16_t* in_vector, + size_t vector_length); +void WebRtcSpl_CopyFromEndW16(const int16_t* in_vector, + size_t in_vector_length, + size_t samples, + int16_t* out_vector); +void WebRtcSpl_ZerosArrayW16(int16_t* vector, size_t vector_length); +void WebRtcSpl_ZerosArrayW32(int32_t* vector, size_t vector_length); +// End: Copy and set operations. + +// Minimum and maximum operation functions and their pointers. +// Implementation in min_max_operations.c. + +// Returns the largest absolute value in a signed 16-bit vector. +// +// Input: +// - vector : 16-bit input vector. +// - length : Number of samples in vector. +// +// Return value : Maximum absolute value in vector. +typedef int16_t (*MaxAbsValueW16)(const int16_t* vector, size_t length); +extern MaxAbsValueW16 WebRtcSpl_MaxAbsValueW16; +int16_t WebRtcSpl_MaxAbsValueW16C(const int16_t* vector, size_t length); +#if defined(WEBRTC_HAS_NEON) +int16_t WebRtcSpl_MaxAbsValueW16Neon(const int16_t* vector, size_t length); +#endif +#if defined(MIPS32_LE) +int16_t WebRtcSpl_MaxAbsValueW16_mips(const int16_t* vector, size_t length); +#endif + +// Returns the largest absolute value in a signed 32-bit vector. +// +// Input: +// - vector : 32-bit input vector. +// - length : Number of samples in vector. +// +// Return value : Maximum absolute value in vector. +typedef int32_t (*MaxAbsValueW32)(const int32_t* vector, size_t length); +extern MaxAbsValueW32 WebRtcSpl_MaxAbsValueW32; +int32_t WebRtcSpl_MaxAbsValueW32C(const int32_t* vector, size_t length); +#if defined(WEBRTC_HAS_NEON) +int32_t WebRtcSpl_MaxAbsValueW32Neon(const int32_t* vector, size_t length); +#endif +#if defined(MIPS_DSP_R1_LE) +int32_t WebRtcSpl_MaxAbsValueW32_mips(const int32_t* vector, size_t length); +#endif + +// Returns the maximum value of a 16-bit vector. +// +// Input: +// - vector : 16-bit input vector. +// - length : Number of samples in vector. +// +// Return value : Maximum sample value in |vector|. +typedef int16_t (*MaxValueW16)(const int16_t* vector, size_t length); +extern MaxValueW16 WebRtcSpl_MaxValueW16; +int16_t WebRtcSpl_MaxValueW16C(const int16_t* vector, size_t length); +#if defined(WEBRTC_HAS_NEON) +int16_t WebRtcSpl_MaxValueW16Neon(const int16_t* vector, size_t length); +#endif +#if defined(MIPS32_LE) +int16_t WebRtcSpl_MaxValueW16_mips(const int16_t* vector, size_t length); +#endif + +// Returns the maximum value of a 32-bit vector. +// +// Input: +// - vector : 32-bit input vector. +// - length : Number of samples in vector. +// +// Return value : Maximum sample value in |vector|. +typedef int32_t (*MaxValueW32)(const int32_t* vector, size_t length); +extern MaxValueW32 WebRtcSpl_MaxValueW32; +int32_t WebRtcSpl_MaxValueW32C(const int32_t* vector, size_t length); +#if defined(WEBRTC_HAS_NEON) +int32_t WebRtcSpl_MaxValueW32Neon(const int32_t* vector, size_t length); +#endif +#if defined(MIPS32_LE) +int32_t WebRtcSpl_MaxValueW32_mips(const int32_t* vector, size_t length); +#endif + +// Returns the minimum value of a 16-bit vector. +// +// Input: +// - vector : 16-bit input vector. +// - length : Number of samples in vector. +// +// Return value : Minimum sample value in |vector|. +typedef int16_t (*MinValueW16)(const int16_t* vector, size_t length); +extern MinValueW16 WebRtcSpl_MinValueW16; +int16_t WebRtcSpl_MinValueW16C(const int16_t* vector, size_t length); +#if defined(WEBRTC_HAS_NEON) +int16_t WebRtcSpl_MinValueW16Neon(const int16_t* vector, size_t length); +#endif +#if defined(MIPS32_LE) +int16_t WebRtcSpl_MinValueW16_mips(const int16_t* vector, size_t length); +#endif + +// Returns the minimum value of a 32-bit vector. +// +// Input: +// - vector : 32-bit input vector. +// - length : Number of samples in vector. +// +// Return value : Minimum sample value in |vector|. +typedef int32_t (*MinValueW32)(const int32_t* vector, size_t length); +extern MinValueW32 WebRtcSpl_MinValueW32; +int32_t WebRtcSpl_MinValueW32C(const int32_t* vector, size_t length); +#if defined(WEBRTC_HAS_NEON) +int32_t WebRtcSpl_MinValueW32Neon(const int32_t* vector, size_t length); +#endif +#if defined(MIPS32_LE) +int32_t WebRtcSpl_MinValueW32_mips(const int32_t* vector, size_t length); +#endif + +// Returns the vector index to the largest absolute value of a 16-bit vector. +// +// Input: +// - vector : 16-bit input vector. +// - length : Number of samples in vector. +// +// Return value : Index to the maximum absolute value in vector. +// If there are multiple equal maxima, return the index of the +// first. -32768 will always have precedence over 32767 (despite +// -32768 presenting an int16 absolute value of 32767). +size_t WebRtcSpl_MaxAbsIndexW16(const int16_t* vector, size_t length); + +// Returns the vector index to the maximum sample value of a 16-bit vector. +// +// Input: +// - vector : 16-bit input vector. +// - length : Number of samples in vector. +// +// Return value : Index to the maximum value in vector (if multiple +// indexes have the maximum, return the first). +size_t WebRtcSpl_MaxIndexW16(const int16_t* vector, size_t length); + +// Returns the vector index to the maximum sample value of a 32-bit vector. +// +// Input: +// - vector : 32-bit input vector. +// - length : Number of samples in vector. +// +// Return value : Index to the maximum value in vector (if multiple +// indexes have the maximum, return the first). +size_t WebRtcSpl_MaxIndexW32(const int32_t* vector, size_t length); + +// Returns the vector index to the minimum sample value of a 16-bit vector. +// +// Input: +// - vector : 16-bit input vector. +// - length : Number of samples in vector. +// +// Return value : Index to the mimimum value in vector (if multiple +// indexes have the minimum, return the first). +size_t WebRtcSpl_MinIndexW16(const int16_t* vector, size_t length); + +// Returns the vector index to the minimum sample value of a 32-bit vector. +// +// Input: +// - vector : 32-bit input vector. +// - length : Number of samples in vector. +// +// Return value : Index to the mimimum value in vector (if multiple +// indexes have the minimum, return the first). +size_t WebRtcSpl_MinIndexW32(const int32_t* vector, size_t length); + +// End: Minimum and maximum operations. + +// Vector scaling operations. Implementation in vector_scaling_operations.c. +// Description at bottom of file. +void WebRtcSpl_VectorBitShiftW16(int16_t* out_vector, + size_t vector_length, + const int16_t* in_vector, + int16_t right_shifts); +void WebRtcSpl_VectorBitShiftW32(int32_t* out_vector, + size_t vector_length, + const int32_t* in_vector, + int16_t right_shifts); +void WebRtcSpl_VectorBitShiftW32ToW16(int16_t* out_vector, + size_t vector_length, + const int32_t* in_vector, + int right_shifts); +void WebRtcSpl_ScaleVector(const int16_t* in_vector, + int16_t* out_vector, + int16_t gain, + size_t vector_length, + int16_t right_shifts); +void WebRtcSpl_ScaleVectorWithSat(const int16_t* in_vector, + int16_t* out_vector, + int16_t gain, + size_t vector_length, + int16_t right_shifts); +void WebRtcSpl_ScaleAndAddVectors(const int16_t* in_vector1, + int16_t gain1, + int right_shifts1, + const int16_t* in_vector2, + int16_t gain2, + int right_shifts2, + int16_t* out_vector, + size_t vector_length); + +// The functions (with related pointer) perform the vector operation: +// out_vector[k] = ((scale1 * in_vector1[k]) + (scale2 * in_vector2[k]) +// + round_value) >> right_shifts, +// where round_value = (1 << right_shifts) >> 1. +// +// Input: +// - in_vector1 : Input vector 1 +// - in_vector1_scale : Gain to be used for vector 1 +// - in_vector2 : Input vector 2 +// - in_vector2_scale : Gain to be used for vector 2 +// - right_shifts : Number of right bit shifts to be applied +// - length : Number of elements in the input vectors +// +// Output: +// - out_vector : Output vector +// Return value : 0 if OK, -1 if (in_vector1 == null +// || in_vector2 == null || out_vector == null +// || length <= 0 || right_shift < 0). +typedef int (*ScaleAndAddVectorsWithRound)(const int16_t* in_vector1, + int16_t in_vector1_scale, + const int16_t* in_vector2, + int16_t in_vector2_scale, + int right_shifts, + int16_t* out_vector, + size_t length); +extern ScaleAndAddVectorsWithRound WebRtcSpl_ScaleAndAddVectorsWithRound; +int WebRtcSpl_ScaleAndAddVectorsWithRoundC(const int16_t* in_vector1, + int16_t in_vector1_scale, + const int16_t* in_vector2, + int16_t in_vector2_scale, + int right_shifts, + int16_t* out_vector, + size_t length); +#if defined(MIPS_DSP_R1_LE) +int WebRtcSpl_ScaleAndAddVectorsWithRound_mips(const int16_t* in_vector1, + int16_t in_vector1_scale, + const int16_t* in_vector2, + int16_t in_vector2_scale, + int right_shifts, + int16_t* out_vector, + size_t length); +#endif +// End: Vector scaling operations. + +// iLBC specific functions. Implementations in ilbc_specific_functions.c. +// Description at bottom of file. +void WebRtcSpl_ReverseOrderMultArrayElements(int16_t* out_vector, + const int16_t* in_vector, + const int16_t* window, + size_t vector_length, + int16_t right_shifts); +void WebRtcSpl_ElementwiseVectorMult(int16_t* out_vector, + const int16_t* in_vector, + const int16_t* window, + size_t vector_length, + int16_t right_shifts); +void WebRtcSpl_AddVectorsAndShift(int16_t* out_vector, + const int16_t* in_vector1, + const int16_t* in_vector2, + size_t vector_length, + int16_t right_shifts); +void WebRtcSpl_AddAffineVectorToVector(int16_t* out_vector, + int16_t* in_vector, + int16_t gain, + int32_t add_constant, + int16_t right_shifts, + size_t vector_length); +void WebRtcSpl_AffineTransformVector(int16_t* out_vector, + int16_t* in_vector, + int16_t gain, + int32_t add_constant, + int16_t right_shifts, + size_t vector_length); +// End: iLBC specific functions. + +// Signal processing operations. + +// A 32-bit fix-point implementation of auto-correlation computation +// +// Input: +// - in_vector : Vector to calculate autocorrelation upon +// - in_vector_length : Length (in samples) of |vector| +// - order : The order up to which the autocorrelation should be +// calculated +// +// Output: +// - result : auto-correlation values (values should be seen +// relative to each other since the absolute values +// might have been down shifted to avoid overflow) +// +// - scale : The number of left shifts required to obtain the +// auto-correlation in Q0 +// +// Return value : Number of samples in |result|, i.e. (order+1) +size_t WebRtcSpl_AutoCorrelation(const int16_t* in_vector, + size_t in_vector_length, + size_t order, + int32_t* result, + int* scale); + +// A 32-bit fix-point implementation of the Levinson-Durbin algorithm that +// does NOT use the 64 bit class +// +// Input: +// - auto_corr : Vector with autocorrelation values of length >= |order|+1 +// - order : The LPC filter order (support up to order 20) +// +// Output: +// - lpc_coef : lpc_coef[0..order] LPC coefficients in Q12 +// - refl_coef : refl_coef[0...order-1]| Reflection coefficients in Q15 +// +// Return value : 1 for stable 0 for unstable +int16_t WebRtcSpl_LevinsonDurbin(const int32_t* auto_corr, + int16_t* lpc_coef, + int16_t* refl_coef, + size_t order); + +// Converts reflection coefficients |refl_coef| to LPC coefficients |lpc_coef|. +// This version is a 16 bit operation. +// +// NOTE: The 16 bit refl_coef -> lpc_coef conversion might result in a +// "slightly unstable" filter (i.e., a pole just outside the unit circle) in +// "rare" cases even if the reflection coefficients are stable. +// +// Input: +// - refl_coef : Reflection coefficients in Q15 that should be converted +// to LPC coefficients +// - use_order : Number of coefficients in |refl_coef| +// +// Output: +// - lpc_coef : LPC coefficients in Q12 +void WebRtcSpl_ReflCoefToLpc(const int16_t* refl_coef, + int use_order, + int16_t* lpc_coef); + +// Converts LPC coefficients |lpc_coef| to reflection coefficients |refl_coef|. +// This version is a 16 bit operation. +// The conversion is implemented by the step-down algorithm. +// +// Input: +// - lpc_coef : LPC coefficients in Q12, that should be converted to +// reflection coefficients +// - use_order : Number of coefficients in |lpc_coef| +// +// Output: +// - refl_coef : Reflection coefficients in Q15. +void WebRtcSpl_LpcToReflCoef(int16_t* lpc_coef, + int use_order, + int16_t* refl_coef); + +// Calculates reflection coefficients (16 bit) from auto-correlation values +// +// Input: +// - auto_corr : Auto-correlation values +// - use_order : Number of coefficients wanted be calculated +// +// Output: +// - refl_coef : Reflection coefficients in Q15. +void WebRtcSpl_AutoCorrToReflCoef(const int32_t* auto_corr, + int use_order, + int16_t* refl_coef); + +// The functions (with related pointer) calculate the cross-correlation between +// two sequences |seq1| and |seq2|. +// |seq1| is fixed and |seq2| slides as the pointer is increased with the +// amount |step_seq2|. Note the arguments should obey the relationship: +// |dim_seq| - 1 + |step_seq2| * (|dim_cross_correlation| - 1) < +// buffer size of |seq2| +// +// Input: +// - seq1 : First sequence (fixed throughout the correlation) +// - seq2 : Second sequence (slides |step_vector2| for each +// new correlation) +// - dim_seq : Number of samples to use in the cross-correlation +// - dim_cross_correlation : Number of cross-correlations to calculate (the +// start position for |vector2| is updated for each +// new one) +// - right_shifts : Number of right bit shifts to use. This will +// become the output Q-domain. +// - step_seq2 : How many (positive or negative) steps the +// |vector2| pointer should be updated for each new +// cross-correlation value. +// +// Output: +// - cross_correlation : The cross-correlation in Q(-right_shifts) +typedef void (*CrossCorrelation)(int32_t* cross_correlation, + const int16_t* seq1, + const int16_t* seq2, + size_t dim_seq, + size_t dim_cross_correlation, + int right_shifts, + int step_seq2); +extern CrossCorrelation WebRtcSpl_CrossCorrelation; +void WebRtcSpl_CrossCorrelationC(int32_t* cross_correlation, + const int16_t* seq1, + const int16_t* seq2, + size_t dim_seq, + size_t dim_cross_correlation, + int right_shifts, + int step_seq2); +#if defined(WEBRTC_HAS_NEON) +void WebRtcSpl_CrossCorrelationNeon(int32_t* cross_correlation, + const int16_t* seq1, + const int16_t* seq2, + size_t dim_seq, + size_t dim_cross_correlation, + int right_shifts, + int step_seq2); +#endif +#if defined(MIPS32_LE) +void WebRtcSpl_CrossCorrelation_mips(int32_t* cross_correlation, + const int16_t* seq1, + const int16_t* seq2, + size_t dim_seq, + size_t dim_cross_correlation, + int right_shifts, + int step_seq2); +#endif + +// Creates (the first half of) a Hanning window. Size must be at least 1 and +// at most 512. +// +// Input: +// - size : Length of the requested Hanning window (1 to 512) +// +// Output: +// - window : Hanning vector in Q14. +void WebRtcSpl_GetHanningWindow(int16_t* window, size_t size); + +// Calculates y[k] = sqrt(1 - x[k]^2) for each element of the input vector +// |in_vector|. Input and output values are in Q15. +// +// Inputs: +// - in_vector : Values to calculate sqrt(1 - x^2) of +// - vector_length : Length of vector |in_vector| +// +// Output: +// - out_vector : Output values in Q15 +void WebRtcSpl_SqrtOfOneMinusXSquared(int16_t* in_vector, + size_t vector_length, + int16_t* out_vector); +// End: Signal processing operations. + +// Randomization functions. Implementations collected in +// randomization_functions.c and descriptions at bottom of this file. +int16_t WebRtcSpl_RandU(uint32_t* seed); +int16_t WebRtcSpl_RandN(uint32_t* seed); +int16_t WebRtcSpl_RandUArray(int16_t* vector, + int16_t vector_length, + uint32_t* seed); +// End: Randomization functions. + +// Math functions +int32_t WebRtcSpl_Sqrt(int32_t value); + +// Divisions. Implementations collected in division_operations.c and +// descriptions at bottom of this file. +uint32_t WebRtcSpl_DivU32U16(uint32_t num, uint16_t den); +int32_t WebRtcSpl_DivW32W16(int32_t num, int16_t den); +int16_t WebRtcSpl_DivW32W16ResW16(int32_t num, int16_t den); +int32_t WebRtcSpl_DivResultInQ31(int32_t num, int32_t den); +int32_t WebRtcSpl_DivW32HiLow(int32_t num, int16_t den_hi, int16_t den_low); +// End: Divisions. + +int32_t WebRtcSpl_Energy(int16_t* vector, + size_t vector_length, + int* scale_factor); + +// Filter operations. +size_t WebRtcSpl_FilterAR(const int16_t* ar_coef, + size_t ar_coef_length, + const int16_t* in_vector, + size_t in_vector_length, + int16_t* filter_state, + size_t filter_state_length, + int16_t* filter_state_low, + size_t filter_state_low_length, + int16_t* out_vector, + int16_t* out_vector_low, + size_t out_vector_low_length); + +// WebRtcSpl_FilterMAFastQ12(...) +// +// Performs a MA filtering on a vector in Q12 +// +// Input: +// - in_vector : Input samples (state in positions +// in_vector[-order] .. in_vector[-1]) +// - ma_coef : Filter coefficients (in Q12) +// - ma_coef_length : Number of B coefficients (order+1) +// - vector_length : Number of samples to be filtered +// +// Output: +// - out_vector : Filtered samples +// +void WebRtcSpl_FilterMAFastQ12(const int16_t* in_vector, + int16_t* out_vector, + const int16_t* ma_coef, + size_t ma_coef_length, + size_t vector_length); + +// Performs a AR filtering on a vector in Q12 +// Input: +// - data_in : Input samples +// - data_out : State information in positions +// data_out[-order] .. data_out[-1] +// - coefficients : Filter coefficients (in Q12) +// - coefficients_length: Number of coefficients (order+1) +// - data_length : Number of samples to be filtered +// Output: +// - data_out : Filtered samples +void WebRtcSpl_FilterARFastQ12(const int16_t* data_in, + int16_t* data_out, + const int16_t* __restrict coefficients, + size_t coefficients_length, + size_t data_length); + +// The functions (with related pointer) perform a MA down sampling filter +// on a vector. +// Input: +// - data_in : Input samples (state in positions +// data_in[-order] .. data_in[-1]) +// - data_in_length : Number of samples in |data_in| to be filtered. +// This must be at least +// |delay| + |factor|*(|out_vector_length|-1) + 1) +// - data_out_length : Number of down sampled samples desired +// - coefficients : Filter coefficients (in Q12) +// - coefficients_length: Number of coefficients (order+1) +// - factor : Decimation factor +// - delay : Delay of filter (compensated for in out_vector) +// Output: +// - data_out : Filtered samples +// Return value : 0 if OK, -1 if |in_vector| is too short +typedef int (*DownsampleFast)(const int16_t* data_in, + size_t data_in_length, + int16_t* data_out, + size_t data_out_length, + const int16_t* __restrict coefficients, + size_t coefficients_length, + int factor, + size_t delay); +extern DownsampleFast WebRtcSpl_DownsampleFast; +int WebRtcSpl_DownsampleFastC(const int16_t* data_in, + size_t data_in_length, + int16_t* data_out, + size_t data_out_length, + const int16_t* __restrict coefficients, + size_t coefficients_length, + int factor, + size_t delay); +#if defined(WEBRTC_HAS_NEON) +int WebRtcSpl_DownsampleFastNeon(const int16_t* data_in, + size_t data_in_length, + int16_t* data_out, + size_t data_out_length, + const int16_t* __restrict coefficients, + size_t coefficients_length, + int factor, + size_t delay); +#endif +#if defined(MIPS32_LE) +int WebRtcSpl_DownsampleFast_mips(const int16_t* data_in, + size_t data_in_length, + int16_t* data_out, + size_t data_out_length, + const int16_t* __restrict coefficients, + size_t coefficients_length, + int factor, + size_t delay); +#endif + +// End: Filter operations. + +// FFT operations + +int WebRtcSpl_ComplexFFT(int16_t vector[], int stages, int mode); +int WebRtcSpl_ComplexIFFT(int16_t vector[], int stages, int mode); + +// Treat a 16-bit complex data buffer |complex_data| as an array of 32-bit +// values, and swap elements whose indexes are bit-reverses of each other. +// +// Input: +// - complex_data : Complex data buffer containing 2^|stages| real +// elements interleaved with 2^|stages| imaginary +// elements: [Re Im Re Im Re Im....] +// - stages : Number of FFT stages. Must be at least 3 and at most +// 10, since the table WebRtcSpl_kSinTable1024[] is 1024 +// elements long. +// +// Output: +// - complex_data : The complex data buffer. + +void WebRtcSpl_ComplexBitReverse(int16_t* __restrict complex_data, int stages); + +// End: FFT operations + +/************************************************************ + * + * RESAMPLING FUNCTIONS AND THEIR STRUCTS ARE DEFINED BELOW + * + ************************************************************/ + +/******************************************************************* + * resample.c + * + * Includes the following resampling combinations + * 22 kHz -> 16 kHz + * 16 kHz -> 22 kHz + * 22 kHz -> 8 kHz + * 8 kHz -> 22 kHz + * + ******************************************************************/ + +// state structure for 22 -> 16 resampler +typedef struct { + int32_t S_22_44[8]; + int32_t S_44_32[8]; + int32_t S_32_16[8]; +} WebRtcSpl_State22khzTo16khz; + +void WebRtcSpl_Resample22khzTo16khz(const int16_t* in, + int16_t* out, + WebRtcSpl_State22khzTo16khz* state, + int32_t* tmpmem); + +void WebRtcSpl_ResetResample22khzTo16khz(WebRtcSpl_State22khzTo16khz* state); + +// state structure for 16 -> 22 resampler +typedef struct { + int32_t S_16_32[8]; + int32_t S_32_22[8]; +} WebRtcSpl_State16khzTo22khz; + +void WebRtcSpl_Resample16khzTo22khz(const int16_t* in, + int16_t* out, + WebRtcSpl_State16khzTo22khz* state, + int32_t* tmpmem); + +void WebRtcSpl_ResetResample16khzTo22khz(WebRtcSpl_State16khzTo22khz* state); + +// state structure for 22 -> 8 resampler +typedef struct { + int32_t S_22_22[16]; + int32_t S_22_16[8]; + int32_t S_16_8[8]; +} WebRtcSpl_State22khzTo8khz; + +void WebRtcSpl_Resample22khzTo8khz(const int16_t* in, + int16_t* out, + WebRtcSpl_State22khzTo8khz* state, + int32_t* tmpmem); + +void WebRtcSpl_ResetResample22khzTo8khz(WebRtcSpl_State22khzTo8khz* state); + +// state structure for 8 -> 22 resampler +typedef struct { + int32_t S_8_16[8]; + int32_t S_16_11[8]; + int32_t S_11_22[8]; +} WebRtcSpl_State8khzTo22khz; + +void WebRtcSpl_Resample8khzTo22khz(const int16_t* in, + int16_t* out, + WebRtcSpl_State8khzTo22khz* state, + int32_t* tmpmem); + +void WebRtcSpl_ResetResample8khzTo22khz(WebRtcSpl_State8khzTo22khz* state); + +/******************************************************************* + * resample_fractional.c + * Functions for internal use in the other resample functions + * + * Includes the following resampling combinations + * 48 kHz -> 32 kHz + * 32 kHz -> 24 kHz + * 44 kHz -> 32 kHz + * + ******************************************************************/ + +void WebRtcSpl_Resample48khzTo32khz(const int32_t* In, int32_t* Out, size_t K); + +void WebRtcSpl_Resample32khzTo24khz(const int32_t* In, int32_t* Out, size_t K); + +void WebRtcSpl_Resample44khzTo32khz(const int32_t* In, int32_t* Out, size_t K); + +/******************************************************************* + * resample_48khz.c + * + * Includes the following resampling combinations + * 48 kHz -> 16 kHz + * 16 kHz -> 48 kHz + * 48 kHz -> 8 kHz + * 8 kHz -> 48 kHz + * + ******************************************************************/ + +typedef struct { + int32_t S_48_48[16]; + int32_t S_48_32[8]; + int32_t S_32_16[8]; +} WebRtcSpl_State48khzTo16khz; + +void WebRtcSpl_Resample48khzTo16khz(const int16_t* in, + int16_t* out, + WebRtcSpl_State48khzTo16khz* state, + int32_t* tmpmem); + +void WebRtcSpl_ResetResample48khzTo16khz(WebRtcSpl_State48khzTo16khz* state); + +typedef struct { + int32_t S_16_32[8]; + int32_t S_32_24[8]; + int32_t S_24_48[8]; +} WebRtcSpl_State16khzTo48khz; + +void WebRtcSpl_Resample16khzTo48khz(const int16_t* in, + int16_t* out, + WebRtcSpl_State16khzTo48khz* state, + int32_t* tmpmem); + +void WebRtcSpl_ResetResample16khzTo48khz(WebRtcSpl_State16khzTo48khz* state); + +typedef struct { + int32_t S_48_24[8]; + int32_t S_24_24[16]; + int32_t S_24_16[8]; + int32_t S_16_8[8]; +} WebRtcSpl_State48khzTo8khz; + +void WebRtcSpl_Resample48khzTo8khz(const int16_t* in, + int16_t* out, + WebRtcSpl_State48khzTo8khz* state, + int32_t* tmpmem); + +void WebRtcSpl_ResetResample48khzTo8khz(WebRtcSpl_State48khzTo8khz* state); + +typedef struct { + int32_t S_8_16[8]; + int32_t S_16_12[8]; + int32_t S_12_24[8]; + int32_t S_24_48[8]; +} WebRtcSpl_State8khzTo48khz; + +void WebRtcSpl_Resample8khzTo48khz(const int16_t* in, + int16_t* out, + WebRtcSpl_State8khzTo48khz* state, + int32_t* tmpmem); + +void WebRtcSpl_ResetResample8khzTo48khz(WebRtcSpl_State8khzTo48khz* state); + +/******************************************************************* + * resample_by_2.c + * + * Includes down and up sampling by a factor of two. + * + ******************************************************************/ + +void WebRtcSpl_DownsampleBy2(const int16_t* in, + size_t len, + int16_t* out, + int32_t* filtState); + +void WebRtcSpl_UpsampleBy2(const int16_t* in, + size_t len, + int16_t* out, + int32_t* filtState); + +/************************************************************ + * END OF RESAMPLING FUNCTIONS + ************************************************************/ +void WebRtcSpl_AnalysisQMF(const int16_t* in_data, + size_t in_data_length, + int16_t* low_band, + int16_t* high_band, + int32_t* filter_state1, + int32_t* filter_state2); +void WebRtcSpl_SynthesisQMF(const int16_t* low_band, + const int16_t* high_band, + size_t band_length, + int16_t* out_data, + int32_t* filter_state1, + int32_t* filter_state2); + +#ifdef __cplusplus +} +#endif // __cplusplus +#endif // COMMON_AUDIO_SIGNAL_PROCESSING_INCLUDE_SIGNAL_PROCESSING_LIBRARY_H_ + +// +// WebRtcSpl_AddSatW16(...) +// WebRtcSpl_AddSatW32(...) +// +// Returns the result of a saturated 16-bit, respectively 32-bit, addition of +// the numbers specified by the |var1| and |var2| parameters. +// +// Input: +// - var1 : Input variable 1 +// - var2 : Input variable 2 +// +// Return value : Added and saturated value +// + +// +// WebRtcSpl_SubSatW16(...) +// WebRtcSpl_SubSatW32(...) +// +// Returns the result of a saturated 16-bit, respectively 32-bit, subtraction +// of the numbers specified by the |var1| and |var2| parameters. +// +// Input: +// - var1 : Input variable 1 +// - var2 : Input variable 2 +// +// Returned value : Subtracted and saturated value +// + +// +// WebRtcSpl_GetSizeInBits(...) +// +// Returns the # of bits that are needed at the most to represent the number +// specified by the |value| parameter. +// +// Input: +// - value : Input value +// +// Return value : Number of bits needed to represent |value| +// + +// +// WebRtcSpl_NormW32(...) +// +// Norm returns the # of left shifts required to 32-bit normalize the 32-bit +// signed number specified by the |value| parameter. +// +// Input: +// - value : Input value +// +// Return value : Number of bit shifts needed to 32-bit normalize |value| +// + +// +// WebRtcSpl_NormW16(...) +// +// Norm returns the # of left shifts required to 16-bit normalize the 16-bit +// signed number specified by the |value| parameter. +// +// Input: +// - value : Input value +// +// Return value : Number of bit shifts needed to 32-bit normalize |value| +// + +// +// WebRtcSpl_NormU32(...) +// +// Norm returns the # of left shifts required to 32-bit normalize the unsigned +// 32-bit number specified by the |value| parameter. +// +// Input: +// - value : Input value +// +// Return value : Number of bit shifts needed to 32-bit normalize |value| +// + +// +// WebRtcSpl_GetScalingSquare(...) +// +// Returns the # of bits required to scale the samples specified in the +// |in_vector| parameter so that, if the squares of the samples are added the +// # of times specified by the |times| parameter, the 32-bit addition will not +// overflow (result in int32_t). +// +// Input: +// - in_vector : Input vector to check scaling on +// - in_vector_length : Samples in |in_vector| +// - times : Number of additions to be performed +// +// Return value : Number of right bit shifts needed to avoid +// overflow in the addition calculation +// + +// +// WebRtcSpl_MemSetW16(...) +// +// Sets all the values in the int16_t vector |vector| of length +// |vector_length| to the specified value |set_value| +// +// Input: +// - vector : Pointer to the int16_t vector +// - set_value : Value specified +// - vector_length : Length of vector +// + +// +// WebRtcSpl_MemSetW32(...) +// +// Sets all the values in the int32_t vector |vector| of length +// |vector_length| to the specified value |set_value| +// +// Input: +// - vector : Pointer to the int16_t vector +// - set_value : Value specified +// - vector_length : Length of vector +// + +// +// WebRtcSpl_MemCpyReversedOrder(...) +// +// Copies all the values from the source int16_t vector |in_vector| to a +// destination int16_t vector |out_vector|. It is done in reversed order, +// meaning that the first sample of |in_vector| is copied to the last sample of +// the |out_vector|. The procedure continues until the last sample of +// |in_vector| has been copied to the first sample of |out_vector|. This +// creates a reversed vector. Used in e.g. prediction in iLBC. +// +// Input: +// - in_vector : Pointer to the first sample in a int16_t vector +// of length |length| +// - vector_length : Number of elements to copy +// +// Output: +// - out_vector : Pointer to the last sample in a int16_t vector +// of length |length| +// + +// +// WebRtcSpl_CopyFromEndW16(...) +// +// Copies the rightmost |samples| of |in_vector| (of length |in_vector_length|) +// to the vector |out_vector|. +// +// Input: +// - in_vector : Input vector +// - in_vector_length : Number of samples in |in_vector| +// - samples : Number of samples to extract (from right side) +// from |in_vector| +// +// Output: +// - out_vector : Vector with the requested samples +// + +// +// WebRtcSpl_ZerosArrayW16(...) +// WebRtcSpl_ZerosArrayW32(...) +// +// Inserts the value "zero" in all positions of a w16 and a w32 vector +// respectively. +// +// Input: +// - vector_length : Number of samples in vector +// +// Output: +// - vector : Vector containing all zeros +// + +// +// WebRtcSpl_VectorBitShiftW16(...) +// WebRtcSpl_VectorBitShiftW32(...) +// +// Bit shifts all the values in a vector up or downwards. Different calls for +// int16_t and int32_t vectors respectively. +// +// Input: +// - vector_length : Length of vector +// - in_vector : Pointer to the vector that should be bit shifted +// - right_shifts : Number of right bit shifts (negative value gives left +// shifts) +// +// Output: +// - out_vector : Pointer to the result vector (can be the same as +// |in_vector|) +// + +// +// WebRtcSpl_VectorBitShiftW32ToW16(...) +// +// Bit shifts all the values in a int32_t vector up or downwards and +// stores the result as an int16_t vector. The function will saturate the +// signal if needed, before storing in the output vector. +// +// Input: +// - vector_length : Length of vector +// - in_vector : Pointer to the vector that should be bit shifted +// - right_shifts : Number of right bit shifts (negative value gives left +// shifts) +// +// Output: +// - out_vector : Pointer to the result vector (can be the same as +// |in_vector|) +// + +// +// WebRtcSpl_ScaleVector(...) +// +// Performs the vector operation: +// out_vector[k] = (gain*in_vector[k])>>right_shifts +// +// Input: +// - in_vector : Input vector +// - gain : Scaling gain +// - vector_length : Elements in the |in_vector| +// - right_shifts : Number of right bit shifts applied +// +// Output: +// - out_vector : Output vector (can be the same as |in_vector|) +// + +// +// WebRtcSpl_ScaleVectorWithSat(...) +// +// Performs the vector operation: +// out_vector[k] = SATURATE( (gain*in_vector[k])>>right_shifts ) +// +// Input: +// - in_vector : Input vector +// - gain : Scaling gain +// - vector_length : Elements in the |in_vector| +// - right_shifts : Number of right bit shifts applied +// +// Output: +// - out_vector : Output vector (can be the same as |in_vector|) +// + +// +// WebRtcSpl_ScaleAndAddVectors(...) +// +// Performs the vector operation: +// out_vector[k] = (gain1*in_vector1[k])>>right_shifts1 +// + (gain2*in_vector2[k])>>right_shifts2 +// +// Input: +// - in_vector1 : Input vector 1 +// - gain1 : Gain to be used for vector 1 +// - right_shifts1 : Right bit shift to be used for vector 1 +// - in_vector2 : Input vector 2 +// - gain2 : Gain to be used for vector 2 +// - right_shifts2 : Right bit shift to be used for vector 2 +// - vector_length : Elements in the input vectors +// +// Output: +// - out_vector : Output vector +// + +// +// WebRtcSpl_ReverseOrderMultArrayElements(...) +// +// Performs the vector operation: +// out_vector[n] = (in_vector[n]*window[-n])>>right_shifts +// +// Input: +// - in_vector : Input vector +// - window : Window vector (should be reversed). The pointer +// should be set to the last value in the vector +// - right_shifts : Number of right bit shift to be applied after the +// multiplication +// - vector_length : Number of elements in |in_vector| +// +// Output: +// - out_vector : Output vector (can be same as |in_vector|) +// + +// +// WebRtcSpl_ElementwiseVectorMult(...) +// +// Performs the vector operation: +// out_vector[n] = (in_vector[n]*window[n])>>right_shifts +// +// Input: +// - in_vector : Input vector +// - window : Window vector. +// - right_shifts : Number of right bit shift to be applied after the +// multiplication +// - vector_length : Number of elements in |in_vector| +// +// Output: +// - out_vector : Output vector (can be same as |in_vector|) +// + +// +// WebRtcSpl_AddVectorsAndShift(...) +// +// Performs the vector operation: +// out_vector[k] = (in_vector1[k] + in_vector2[k])>>right_shifts +// +// Input: +// - in_vector1 : Input vector 1 +// - in_vector2 : Input vector 2 +// - right_shifts : Number of right bit shift to be applied after the +// multiplication +// - vector_length : Number of elements in |in_vector1| and |in_vector2| +// +// Output: +// - out_vector : Output vector (can be same as |in_vector1|) +// + +// +// WebRtcSpl_AddAffineVectorToVector(...) +// +// Adds an affine transformed vector to another vector |out_vector|, i.e, +// performs +// out_vector[k] += (in_vector[k]*gain+add_constant)>>right_shifts +// +// Input: +// - in_vector : Input vector +// - gain : Gain value, used to multiply the in vector with +// - add_constant : Constant value to add (usually 1<<(right_shifts-1), +// but others can be used as well +// - right_shifts : Number of right bit shifts (0-16) +// - vector_length : Number of samples in |in_vector| and |out_vector| +// +// Output: +// - out_vector : Vector with the output +// + +// +// WebRtcSpl_AffineTransformVector(...) +// +// Affine transforms a vector, i.e, performs +// out_vector[k] = (in_vector[k]*gain+add_constant)>>right_shifts +// +// Input: +// - in_vector : Input vector +// - gain : Gain value, used to multiply the in vector with +// - add_constant : Constant value to add (usually 1<<(right_shifts-1), +// but others can be used as well +// - right_shifts : Number of right bit shifts (0-16) +// - vector_length : Number of samples in |in_vector| and |out_vector| +// +// Output: +// - out_vector : Vector with the output +// + +// +// WebRtcSpl_IncreaseSeed(...) +// +// Increases the seed (and returns the new value) +// +// Input: +// - seed : Seed for random calculation +// +// Output: +// - seed : Updated seed value +// +// Return value : The new seed value +// + +// +// WebRtcSpl_RandU(...) +// +// Produces a uniformly distributed value in the int16_t range +// +// Input: +// - seed : Seed for random calculation +// +// Output: +// - seed : Updated seed value +// +// Return value : Uniformly distributed value in the range +// [Word16_MIN...Word16_MAX] +// + +// +// WebRtcSpl_RandN(...) +// +// Produces a normal distributed value in the int16_t range +// +// Input: +// - seed : Seed for random calculation +// +// Output: +// - seed : Updated seed value +// +// Return value : N(0,1) value in the Q13 domain +// + +// +// WebRtcSpl_RandUArray(...) +// +// Produces a uniformly distributed vector with elements in the int16_t +// range +// +// Input: +// - vector_length : Samples wanted in the vector +// - seed : Seed for random calculation +// +// Output: +// - vector : Vector with the uniform values +// - seed : Updated seed value +// +// Return value : Number of samples in vector, i.e., |vector_length| +// + +// +// WebRtcSpl_Sqrt(...) +// +// Returns the square root of the input value |value|. The precision of this +// function is integer precision, i.e., sqrt(8) gives 2 as answer. +// If |value| is a negative number then 0 is returned. +// +// Algorithm: +// +// A sixth order Taylor Series expansion is used here to compute the square +// root of a number y^0.5 = (1+x)^0.5 +// where +// x = y-1 +// = 1+(x/2)-0.5*((x/2)^2+0.5*((x/2)^3-0.625*((x/2)^4+0.875*((x/2)^5) +// 0.5 <= x < 1 +// +// Input: +// - value : Value to calculate sqrt of +// +// Return value : Result of the sqrt calculation +// + +// +// WebRtcSpl_DivU32U16(...) +// +// Divides a uint32_t |num| by a uint16_t |den|. +// +// If |den|==0, (uint32_t)0xFFFFFFFF is returned. +// +// Input: +// - num : Numerator +// - den : Denominator +// +// Return value : Result of the division (as a uint32_t), i.e., the +// integer part of num/den. +// + +// +// WebRtcSpl_DivW32W16(...) +// +// Divides a int32_t |num| by a int16_t |den|. +// +// If |den|==0, (int32_t)0x7FFFFFFF is returned. +// +// Input: +// - num : Numerator +// - den : Denominator +// +// Return value : Result of the division (as a int32_t), i.e., the +// integer part of num/den. +// + +// +// WebRtcSpl_DivW32W16ResW16(...) +// +// Divides a int32_t |num| by a int16_t |den|, assuming that the +// result is less than 32768, otherwise an unpredictable result will occur. +// +// If |den|==0, (int16_t)0x7FFF is returned. +// +// Input: +// - num : Numerator +// - den : Denominator +// +// Return value : Result of the division (as a int16_t), i.e., the +// integer part of num/den. +// + +// +// WebRtcSpl_DivResultInQ31(...) +// +// Divides a int32_t |num| by a int16_t |den|, assuming that the +// absolute value of the denominator is larger than the numerator, otherwise +// an unpredictable result will occur. +// +// Input: +// - num : Numerator +// - den : Denominator +// +// Return value : Result of the division in Q31. +// + +// +// WebRtcSpl_DivW32HiLow(...) +// +// Divides a int32_t |num| by a denominator in hi, low format. The +// absolute value of the denominator has to be larger (or equal to) the +// numerator. +// +// Input: +// - num : Numerator +// - den_hi : High part of denominator +// - den_low : Low part of denominator +// +// Return value : Divided value in Q31 +// + +// +// WebRtcSpl_Energy(...) +// +// Calculates the energy of a vector +// +// Input: +// - vector : Vector which the energy should be calculated on +// - vector_length : Number of samples in vector +// +// Output: +// - scale_factor : Number of left bit shifts needed to get the physical +// energy value, i.e, to get the Q0 value +// +// Return value : Energy value in Q(-|scale_factor|) +// + +// +// WebRtcSpl_FilterAR(...) +// +// Performs a 32-bit AR filtering on a vector in Q12 +// +// Input: +// - ar_coef : AR-coefficient vector (values in Q12), +// ar_coef[0] must be 4096. +// - ar_coef_length : Number of coefficients in |ar_coef|. +// - in_vector : Vector to be filtered. +// - in_vector_length : Number of samples in |in_vector|. +// - filter_state : Current state (higher part) of the filter. +// - filter_state_length : Length (in samples) of |filter_state|. +// - filter_state_low : Current state (lower part) of the filter. +// - filter_state_low_length : Length (in samples) of |filter_state_low|. +// - out_vector_low_length : Maximum length (in samples) of +// |out_vector_low|. +// +// Output: +// - filter_state : Updated state (upper part) vector. +// - filter_state_low : Updated state (lower part) vector. +// - out_vector : Vector containing the upper part of the +// filtered values. +// - out_vector_low : Vector containing the lower part of the +// filtered values. +// +// Return value : Number of samples in the |out_vector|. +// + +// +// WebRtcSpl_ComplexIFFT(...) +// +// Complex Inverse FFT +// +// Computes an inverse complex 2^|stages|-point FFT on the input vector, which +// is in bit-reversed order. The original content of the vector is destroyed in +// the process, since the input is overwritten by the output, normal-ordered, +// FFT vector. With X as the input complex vector, y as the output complex +// vector and with M = 2^|stages|, the following is computed: +// +// M-1 +// y(k) = sum[X(i)*[cos(2*pi*i*k/M) + j*sin(2*pi*i*k/M)]] +// i=0 +// +// The implementations are optimized for speed, not for code size. It uses the +// decimation-in-time algorithm with radix-2 butterfly technique. +// +// Input: +// - vector : In pointer to complex vector containing 2^|stages| +// real elements interleaved with 2^|stages| imaginary +// elements. +// [ReImReImReIm....] +// The elements are in Q(-scale) domain, see more on Return +// Value below. +// +// - stages : Number of FFT stages. Must be at least 3 and at most 10, +// since the table WebRtcSpl_kSinTable1024[] is 1024 +// elements long. +// +// - mode : This parameter gives the user to choose how the FFT +// should work. +// mode==0: Low-complexity and Low-accuracy mode +// mode==1: High-complexity and High-accuracy mode +// +// Output: +// - vector : Out pointer to the FFT vector (the same as input). +// +// Return Value : The scale value that tells the number of left bit shifts +// that the elements in the |vector| should be shifted with +// in order to get Q0 values, i.e. the physically correct +// values. The scale parameter is always 0 or positive, +// except if N>1024 (|stages|>10), which returns a scale +// value of -1, indicating error. +// + +// +// WebRtcSpl_ComplexFFT(...) +// +// Complex FFT +// +// Computes a complex 2^|stages|-point FFT on the input vector, which is in +// bit-reversed order. The original content of the vector is destroyed in +// the process, since the input is overwritten by the output, normal-ordered, +// FFT vector. With x as the input complex vector, Y as the output complex +// vector and with M = 2^|stages|, the following is computed: +// +// M-1 +// Y(k) = 1/M * sum[x(i)*[cos(2*pi*i*k/M) + j*sin(2*pi*i*k/M)]] +// i=0 +// +// The implementations are optimized for speed, not for code size. It uses the +// decimation-in-time algorithm with radix-2 butterfly technique. +// +// This routine prevents overflow by scaling by 2 before each FFT stage. This is +// a fixed scaling, for proper normalization - there will be log2(n) passes, so +// this results in an overall factor of 1/n, distributed to maximize arithmetic +// accuracy. +// +// Input: +// - vector : In pointer to complex vector containing 2^|stages| real +// elements interleaved with 2^|stages| imaginary elements. +// [ReImReImReIm....] +// The output is in the Q0 domain. +// +// - stages : Number of FFT stages. Must be at least 3 and at most 10, +// since the table WebRtcSpl_kSinTable1024[] is 1024 +// elements long. +// +// - mode : This parameter gives the user to choose how the FFT +// should work. +// mode==0: Low-complexity and Low-accuracy mode +// mode==1: High-complexity and High-accuracy mode +// +// Output: +// - vector : The output FFT vector is in the Q0 domain. +// +// Return value : The scale parameter is always 0, except if N>1024, +// which returns a scale value of -1, indicating error. +// + +// +// WebRtcSpl_AnalysisQMF(...) +// +// Splits a 0-2*F Hz signal into two sub bands: 0-F Hz and F-2*F Hz. The +// current version has F = 8000, therefore, a super-wideband audio signal is +// split to lower-band 0-8 kHz and upper-band 8-16 kHz. +// +// Input: +// - in_data : Wide band speech signal, 320 samples (10 ms) +// +// Input & Output: +// - filter_state1 : Filter state for first All-pass filter +// - filter_state2 : Filter state for second All-pass filter +// +// Output: +// - low_band : Lower-band signal 0-8 kHz band, 160 samples (10 ms) +// - high_band : Upper-band signal 8-16 kHz band (flipped in frequency +// domain), 160 samples (10 ms) +// + +// +// WebRtcSpl_SynthesisQMF(...) +// +// Combines the two sub bands (0-F and F-2*F Hz) into a signal of 0-2*F +// Hz, (current version has F = 8000 Hz). So the filter combines lower-band +// (0-8 kHz) and upper-band (8-16 kHz) channels to obtain super-wideband 0-16 +// kHz audio. +// +// Input: +// - low_band : The signal with the 0-8 kHz band, 160 samples (10 ms) +// - high_band : The signal with the 8-16 kHz band, 160 samples (10 ms) +// +// Input & Output: +// - filter_state1 : Filter state for first All-pass filter +// - filter_state2 : Filter state for second All-pass filter +// +// Output: +// - out_data : Super-wideband speech signal, 0-16 kHz +// + +// int16_t WebRtcSpl_SatW32ToW16(...) +// +// This function saturates a 32-bit word into a 16-bit word. +// +// Input: +// - value32 : The value of a 32-bit word. +// +// Output: +// - out16 : the saturated 16-bit word. +// + +// int32_t WebRtc_MulAccumW16(...) +// +// This function multiply a 16-bit word by a 16-bit word, and accumulate this +// value to a 32-bit integer. +// +// Input: +// - a : The value of the first 16-bit word. +// - b : The value of the second 16-bit word. +// - c : The value of an 32-bit integer. +// +// Return Value: The value of a * b + c. +// diff --git a/runtime/engine/asr/server/brpc/paraformerCPP/webrtc/common_audio/signal_processing/include/spl_inl.h b/runtime/engine/asr/server/brpc/paraformerCPP/webrtc/common_audio/signal_processing/include/spl_inl.h new file mode 100644 index 00000000..d24b3a5f --- /dev/null +++ b/runtime/engine/asr/server/brpc/paraformerCPP/webrtc/common_audio/signal_processing/include/spl_inl.h @@ -0,0 +1,153 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +// This header file includes the inline functions in +// the fix point signal processing library. + +#ifndef COMMON_AUDIO_SIGNAL_PROCESSING_INCLUDE_SPL_INL_H_ +#define COMMON_AUDIO_SIGNAL_PROCESSING_INCLUDE_SPL_INL_H_ + +#include "webrtc/rtc_base/compile_assert_c.h" + +extern const int8_t kWebRtcSpl_CountLeadingZeros32_Table[64]; + +// Don't call this directly except in tests! +static __inline int WebRtcSpl_CountLeadingZeros32_NotBuiltin(uint32_t n) { + // Normalize n by rounding up to the nearest number that is a sequence of 0 + // bits followed by a sequence of 1 bits. This number has the same number of + // leading zeros as the original n. There are exactly 33 such values. + n |= n >> 1; + n |= n >> 2; + n |= n >> 4; + n |= n >> 8; + n |= n >> 16; + + // Multiply the modified n with a constant selected (by exhaustive search) + // such that each of the 33 possible values of n give a product whose 6 most + // significant bits are unique. Then look up the answer in the table. + return kWebRtcSpl_CountLeadingZeros32_Table[(n * 0x8c0b2891) >> 26]; +} + +// Don't call this directly except in tests! +static __inline int WebRtcSpl_CountLeadingZeros64_NotBuiltin(uint64_t n) { + const int leading_zeros = n >> 32 == 0 ? 32 : 0; + return leading_zeros + WebRtcSpl_CountLeadingZeros32_NotBuiltin( + (uint32_t)(n >> (32 - leading_zeros))); +} + +// Returns the number of leading zero bits in the argument. +static __inline int WebRtcSpl_CountLeadingZeros32(uint32_t n) { +#ifdef __GNUC__ + RTC_COMPILE_ASSERT(sizeof(unsigned int) == sizeof(uint32_t)); + return n == 0 ? 32 : __builtin_clz(n); +#else + return WebRtcSpl_CountLeadingZeros32_NotBuiltin(n); +#endif +} + +// Returns the number of leading zero bits in the argument. +static __inline int WebRtcSpl_CountLeadingZeros64(uint64_t n) { +#ifdef __GNUC__ + RTC_COMPILE_ASSERT(sizeof(unsigned long long) == sizeof(uint64_t)); // NOLINT + return n == 0 ? 64 : __builtin_clzll(n); +#else + return WebRtcSpl_CountLeadingZeros64_NotBuiltin(n); +#endif +} + +#ifdef WEBRTC_ARCH_ARM_V7 +#include "webrtc/common_audio/signal_processing/include/spl_inl_armv7.h" +#else + +#if defined(MIPS32_LE) +#include "webrtc/common_audio/signal_processing/include/spl_inl_mips.h" +#endif + +#if !defined(MIPS_DSP_R1_LE) +static __inline int16_t WebRtcSpl_SatW32ToW16(int32_t value32) { + int16_t out16 = (int16_t)value32; + + if (value32 > 32767) + out16 = 32767; + else if (value32 < -32768) + out16 = -32768; + + return out16; +} + +static __inline int32_t WebRtcSpl_AddSatW32(int32_t a, int32_t b) { + // Do the addition in unsigned numbers, since signed overflow is undefined + // behavior. + const int32_t sum = (int32_t)((uint32_t)a + (uint32_t)b); + + // a + b can't overflow if a and b have different signs. If they have the + // same sign, a + b also has the same sign iff it didn't overflow. + if ((a < 0) == (b < 0) && (a < 0) != (sum < 0)) { + // The direction of the overflow is obvious from the sign of a + b. + return sum < 0 ? INT32_MAX : INT32_MIN; + } + return sum; +} + +static __inline int32_t WebRtcSpl_SubSatW32(int32_t a, int32_t b) { + // Do the subtraction in unsigned numbers, since signed overflow is undefined + // behavior. + const int32_t diff = (int32_t)((uint32_t)a - (uint32_t)b); + + // a - b can't overflow if a and b have the same sign. If they have different + // signs, a - b has the same sign as a iff it didn't overflow. + if ((a < 0) != (b < 0) && (a < 0) != (diff < 0)) { + // The direction of the overflow is obvious from the sign of a - b. + return diff < 0 ? INT32_MAX : INT32_MIN; + } + return diff; +} + +static __inline int16_t WebRtcSpl_AddSatW16(int16_t a, int16_t b) { + return WebRtcSpl_SatW32ToW16((int32_t)a + (int32_t)b); +} + +static __inline int16_t WebRtcSpl_SubSatW16(int16_t var1, int16_t var2) { + return WebRtcSpl_SatW32ToW16((int32_t)var1 - (int32_t)var2); +} +#endif // #if !defined(MIPS_DSP_R1_LE) + +#if !defined(MIPS32_LE) +static __inline int16_t WebRtcSpl_GetSizeInBits(uint32_t n) { + return 32 - WebRtcSpl_CountLeadingZeros32(n); +} + +// Return the number of steps a can be left-shifted without overflow, +// or 0 if a == 0. +static __inline int16_t WebRtcSpl_NormW32(int32_t a) { + return a == 0 ? 0 : WebRtcSpl_CountLeadingZeros32(a < 0 ? ~a : a) - 1; +} + +// Return the number of steps a can be left-shifted without overflow, +// or 0 if a == 0. +static __inline int16_t WebRtcSpl_NormU32(uint32_t a) { + return a == 0 ? 0 : WebRtcSpl_CountLeadingZeros32(a); +} + +// Return the number of steps a can be left-shifted without overflow, +// or 0 if a == 0. +static __inline int16_t WebRtcSpl_NormW16(int16_t a) { + const int32_t a32 = a; + return a == 0 ? 0 : WebRtcSpl_CountLeadingZeros32(a < 0 ? ~a32 : a32) - 17; +} + +static __inline int32_t WebRtc_MulAccumW16(int16_t a, int16_t b, int32_t c) { + return (a * b + c); +} +#endif // #if !defined(MIPS32_LE) + +#endif // WEBRTC_ARCH_ARM_V7 + +#endif // COMMON_AUDIO_SIGNAL_PROCESSING_INCLUDE_SPL_INL_H_ diff --git a/runtime/engine/asr/server/brpc/paraformerCPP/webrtc/common_audio/signal_processing/min_max_operations.c b/runtime/engine/asr/server/brpc/paraformerCPP/webrtc/common_audio/signal_processing/min_max_operations.c new file mode 100644 index 00000000..75975bb0 --- /dev/null +++ b/runtime/engine/asr/server/brpc/paraformerCPP/webrtc/common_audio/signal_processing/min_max_operations.c @@ -0,0 +1,224 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/* + * This file contains the implementation of functions + * WebRtcSpl_MaxAbsValueW16C() + * WebRtcSpl_MaxAbsValueW32C() + * WebRtcSpl_MaxValueW16C() + * WebRtcSpl_MaxValueW32C() + * WebRtcSpl_MinValueW16C() + * WebRtcSpl_MinValueW32C() + * WebRtcSpl_MaxAbsIndexW16() + * WebRtcSpl_MaxIndexW16() + * WebRtcSpl_MaxIndexW32() + * WebRtcSpl_MinIndexW16() + * WebRtcSpl_MinIndexW32() + * + */ + +#include + +#include "webrtc/rtc_base/checks.h" +#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h" + +// TODO(bjorn/kma): Consolidate function pairs (e.g. combine +// WebRtcSpl_MaxAbsValueW16C and WebRtcSpl_MaxAbsIndexW16 into a single one.) +// TODO(kma): Move the next six functions into min_max_operations_c.c. + +// Maximum absolute value of word16 vector. C version for generic platforms. +int16_t WebRtcSpl_MaxAbsValueW16C(const int16_t* vector, size_t length) { + size_t i = 0; + int absolute = 0, maximum = 0; + + RTC_DCHECK_GT(length, 0); + + for (i = 0; i < length; i++) { + absolute = abs((int)vector[i]); + + if (absolute > maximum) { + maximum = absolute; + } + } + + // Guard the case for abs(-32768). + if (maximum > WEBRTC_SPL_WORD16_MAX) { + maximum = WEBRTC_SPL_WORD16_MAX; + } + + return (int16_t)maximum; +} + +// Maximum absolute value of word32 vector. C version for generic platforms. +int32_t WebRtcSpl_MaxAbsValueW32C(const int32_t* vector, size_t length) { + // Use uint32_t for the local variables, to accommodate the return value + // of abs(0x80000000), which is 0x80000000. + + uint32_t absolute = 0, maximum = 0; + size_t i = 0; + + RTC_DCHECK_GT(length, 0); + + for (i = 0; i < length; i++) { + absolute = abs((int)vector[i]); + if (absolute > maximum) { + maximum = absolute; + } + } + + maximum = WEBRTC_SPL_MIN(maximum, WEBRTC_SPL_WORD32_MAX); + + return (int32_t)maximum; +} + +// Maximum value of word16 vector. C version for generic platforms. +int16_t WebRtcSpl_MaxValueW16C(const int16_t* vector, size_t length) { + int16_t maximum = WEBRTC_SPL_WORD16_MIN; + size_t i = 0; + + RTC_DCHECK_GT(length, 0); + + for (i = 0; i < length; i++) { + if (vector[i] > maximum) + maximum = vector[i]; + } + return maximum; +} + +// Maximum value of word32 vector. C version for generic platforms. +int32_t WebRtcSpl_MaxValueW32C(const int32_t* vector, size_t length) { + int32_t maximum = WEBRTC_SPL_WORD32_MIN; + size_t i = 0; + + RTC_DCHECK_GT(length, 0); + + for (i = 0; i < length; i++) { + if (vector[i] > maximum) + maximum = vector[i]; + } + return maximum; +} + +// Minimum value of word16 vector. C version for generic platforms. +int16_t WebRtcSpl_MinValueW16C(const int16_t* vector, size_t length) { + int16_t minimum = WEBRTC_SPL_WORD16_MAX; + size_t i = 0; + + RTC_DCHECK_GT(length, 0); + + for (i = 0; i < length; i++) { + if (vector[i] < minimum) + minimum = vector[i]; + } + return minimum; +} + +// Minimum value of word32 vector. C version for generic platforms. +int32_t WebRtcSpl_MinValueW32C(const int32_t* vector, size_t length) { + int32_t minimum = WEBRTC_SPL_WORD32_MAX; + size_t i = 0; + + RTC_DCHECK_GT(length, 0); + + for (i = 0; i < length; i++) { + if (vector[i] < minimum) + minimum = vector[i]; + } + return minimum; +} + +// Index of maximum absolute value in a word16 vector. +size_t WebRtcSpl_MaxAbsIndexW16(const int16_t* vector, size_t length) { + // Use type int for local variables, to accomodate the value of abs(-32768). + + size_t i = 0, index = 0; + int absolute = 0, maximum = 0; + + RTC_DCHECK_GT(length, 0); + + for (i = 0; i < length; i++) { + absolute = abs((int)vector[i]); + + if (absolute > maximum) { + maximum = absolute; + index = i; + } + } + + return index; +} + +// Index of maximum value in a word16 vector. +size_t WebRtcSpl_MaxIndexW16(const int16_t* vector, size_t length) { + size_t i = 0, index = 0; + int16_t maximum = WEBRTC_SPL_WORD16_MIN; + + RTC_DCHECK_GT(length, 0); + + for (i = 0; i < length; i++) { + if (vector[i] > maximum) { + maximum = vector[i]; + index = i; + } + } + + return index; +} + +// Index of maximum value in a word32 vector. +size_t WebRtcSpl_MaxIndexW32(const int32_t* vector, size_t length) { + size_t i = 0, index = 0; + int32_t maximum = WEBRTC_SPL_WORD32_MIN; + + RTC_DCHECK_GT(length, 0); + + for (i = 0; i < length; i++) { + if (vector[i] > maximum) { + maximum = vector[i]; + index = i; + } + } + + return index; +} + +// Index of minimum value in a word16 vector. +size_t WebRtcSpl_MinIndexW16(const int16_t* vector, size_t length) { + size_t i = 0, index = 0; + int16_t minimum = WEBRTC_SPL_WORD16_MAX; + + RTC_DCHECK_GT(length, 0); + + for (i = 0; i < length; i++) { + if (vector[i] < minimum) { + minimum = vector[i]; + index = i; + } + } + + return index; +} + +// Index of minimum value in a word32 vector. +size_t WebRtcSpl_MinIndexW32(const int32_t* vector, size_t length) { + size_t i = 0, index = 0; + int32_t minimum = WEBRTC_SPL_WORD32_MAX; + + RTC_DCHECK_GT(length, 0); + + for (i = 0; i < length; i++) { + if (vector[i] < minimum) { + minimum = vector[i]; + index = i; + } + } + + return index; +} diff --git a/runtime/engine/asr/server/brpc/paraformerCPP/webrtc/common_audio/signal_processing/resample_48khz.c b/runtime/engine/asr/server/brpc/paraformerCPP/webrtc/common_audio/signal_processing/resample_48khz.c new file mode 100644 index 00000000..2220cc33 --- /dev/null +++ b/runtime/engine/asr/server/brpc/paraformerCPP/webrtc/common_audio/signal_processing/resample_48khz.c @@ -0,0 +1,186 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + + +/* + * This file contains resampling functions between 48 kHz and nb/wb. + * The description header can be found in signal_processing_library.h + * + */ + +#include +#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h" +#include "webrtc/common_audio/signal_processing/resample_by_2_internal.h" + +//////////////////////////// +///// 48 kHz -> 16 kHz ///// +//////////////////////////// + +// 48 -> 16 resampler +void WebRtcSpl_Resample48khzTo16khz(const int16_t* in, int16_t* out, + WebRtcSpl_State48khzTo16khz* state, int32_t* tmpmem) +{ + ///// 48 --> 48(LP) ///// + // int16_t in[480] + // int32_t out[480] + ///// + WebRtcSpl_LPBy2ShortToInt(in, 480, tmpmem + 16, state->S_48_48); + + ///// 48 --> 32 ///// + // int32_t in[480] + // int32_t out[320] + ///// + // copy state to and from input array + memcpy(tmpmem + 8, state->S_48_32, 8 * sizeof(int32_t)); + memcpy(state->S_48_32, tmpmem + 488, 8 * sizeof(int32_t)); + WebRtcSpl_Resample48khzTo32khz(tmpmem + 8, tmpmem, 160); + + ///// 32 --> 16 ///// + // int32_t in[320] + // int16_t out[160] + ///// + WebRtcSpl_DownBy2IntToShort(tmpmem, 320, out, state->S_32_16); +} + +// initialize state of 48 -> 16 resampler +void WebRtcSpl_ResetResample48khzTo16khz(WebRtcSpl_State48khzTo16khz* state) +{ + memset(state->S_48_48, 0, 16 * sizeof(int32_t)); + memset(state->S_48_32, 0, 8 * sizeof(int32_t)); + memset(state->S_32_16, 0, 8 * sizeof(int32_t)); +} + +//////////////////////////// +///// 16 kHz -> 48 kHz ///// +//////////////////////////// + +// 16 -> 48 resampler +void WebRtcSpl_Resample16khzTo48khz(const int16_t* in, int16_t* out, + WebRtcSpl_State16khzTo48khz* state, int32_t* tmpmem) +{ + ///// 16 --> 32 ///// + // int16_t in[160] + // int32_t out[320] + ///// + WebRtcSpl_UpBy2ShortToInt(in, 160, tmpmem + 16, state->S_16_32); + + ///// 32 --> 24 ///// + // int32_t in[320] + // int32_t out[240] + // copy state to and from input array + ///// + memcpy(tmpmem + 8, state->S_32_24, 8 * sizeof(int32_t)); + memcpy(state->S_32_24, tmpmem + 328, 8 * sizeof(int32_t)); + WebRtcSpl_Resample32khzTo24khz(tmpmem + 8, tmpmem, 80); + + ///// 24 --> 48 ///// + // int32_t in[240] + // int16_t out[480] + ///// + WebRtcSpl_UpBy2IntToShort(tmpmem, 240, out, state->S_24_48); +} + +// initialize state of 16 -> 48 resampler +void WebRtcSpl_ResetResample16khzTo48khz(WebRtcSpl_State16khzTo48khz* state) +{ + memset(state->S_16_32, 0, 8 * sizeof(int32_t)); + memset(state->S_32_24, 0, 8 * sizeof(int32_t)); + memset(state->S_24_48, 0, 8 * sizeof(int32_t)); +} + +//////////////////////////// +///// 48 kHz -> 8 kHz ///// +//////////////////////////// + +// 48 -> 8 resampler +void WebRtcSpl_Resample48khzTo8khz(const int16_t* in, int16_t* out, + WebRtcSpl_State48khzTo8khz* state, int32_t* tmpmem) +{ + ///// 48 --> 24 ///// + // int16_t in[480] + // int32_t out[240] + ///// + WebRtcSpl_DownBy2ShortToInt(in, 480, tmpmem + 256, state->S_48_24); + + ///// 24 --> 24(LP) ///// + // int32_t in[240] + // int32_t out[240] + ///// + WebRtcSpl_LPBy2IntToInt(tmpmem + 256, 240, tmpmem + 16, state->S_24_24); + + ///// 24 --> 16 ///// + // int32_t in[240] + // int32_t out[160] + ///// + // copy state to and from input array + memcpy(tmpmem + 8, state->S_24_16, 8 * sizeof(int32_t)); + memcpy(state->S_24_16, tmpmem + 248, 8 * sizeof(int32_t)); + WebRtcSpl_Resample48khzTo32khz(tmpmem + 8, tmpmem, 80); + + ///// 16 --> 8 ///// + // int32_t in[160] + // int16_t out[80] + ///// + WebRtcSpl_DownBy2IntToShort(tmpmem, 160, out, state->S_16_8); +} + +// initialize state of 48 -> 8 resampler +void WebRtcSpl_ResetResample48khzTo8khz(WebRtcSpl_State48khzTo8khz* state) +{ + memset(state->S_48_24, 0, 8 * sizeof(int32_t)); + memset(state->S_24_24, 0, 16 * sizeof(int32_t)); + memset(state->S_24_16, 0, 8 * sizeof(int32_t)); + memset(state->S_16_8, 0, 8 * sizeof(int32_t)); +} + +//////////////////////////// +///// 8 kHz -> 48 kHz ///// +//////////////////////////// + +// 8 -> 48 resampler +void WebRtcSpl_Resample8khzTo48khz(const int16_t* in, int16_t* out, + WebRtcSpl_State8khzTo48khz* state, int32_t* tmpmem) +{ + ///// 8 --> 16 ///// + // int16_t in[80] + // int32_t out[160] + ///// + WebRtcSpl_UpBy2ShortToInt(in, 80, tmpmem + 264, state->S_8_16); + + ///// 16 --> 12 ///// + // int32_t in[160] + // int32_t out[120] + ///// + // copy state to and from input array + memcpy(tmpmem + 256, state->S_16_12, 8 * sizeof(int32_t)); + memcpy(state->S_16_12, tmpmem + 416, 8 * sizeof(int32_t)); + WebRtcSpl_Resample32khzTo24khz(tmpmem + 256, tmpmem + 240, 40); + + ///// 12 --> 24 ///// + // int32_t in[120] + // int16_t out[240] + ///// + WebRtcSpl_UpBy2IntToInt(tmpmem + 240, 120, tmpmem, state->S_12_24); + + ///// 24 --> 48 ///// + // int32_t in[240] + // int16_t out[480] + ///// + WebRtcSpl_UpBy2IntToShort(tmpmem, 240, out, state->S_24_48); +} + +// initialize state of 8 -> 48 resampler +void WebRtcSpl_ResetResample8khzTo48khz(WebRtcSpl_State8khzTo48khz* state) +{ + memset(state->S_8_16, 0, 8 * sizeof(int32_t)); + memset(state->S_16_12, 0, 8 * sizeof(int32_t)); + memset(state->S_12_24, 0, 8 * sizeof(int32_t)); + memset(state->S_24_48, 0, 8 * sizeof(int32_t)); +} diff --git a/runtime/engine/asr/server/brpc/paraformerCPP/webrtc/common_audio/signal_processing/resample_by_2_internal.c b/runtime/engine/asr/server/brpc/paraformerCPP/webrtc/common_audio/signal_processing/resample_by_2_internal.c new file mode 100644 index 00000000..72bc0f92 --- /dev/null +++ b/runtime/engine/asr/server/brpc/paraformerCPP/webrtc/common_audio/signal_processing/resample_by_2_internal.c @@ -0,0 +1,689 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + + +/* + * This header file contains some internal resampling functions. + * + */ + +#include "webrtc/common_audio/signal_processing/resample_by_2_internal.h" +#include "webrtc/rtc_base/sanitizer.h" + +// allpass filter coefficients. +static const int16_t kResampleAllpass[2][3] = { + {821, 6110, 12382}, + {3050, 9368, 15063} +}; + +// +// decimator +// input: int32_t (shifted 15 positions to the left, + offset 16384) OVERWRITTEN! +// output: int16_t (saturated) (of length len/2) +// state: filter state array; length = 8 + +void RTC_NO_SANITIZE("signed-integer-overflow") // bugs.webrtc.org/5486 +WebRtcSpl_DownBy2IntToShort(int32_t *in, int32_t len, int16_t *out, + int32_t *state) +{ + int32_t tmp0, tmp1, diff; + int32_t i; + + len >>= 1; + + // lower allpass filter (operates on even input samples) + for (i = 0; i < len; i++) + { + tmp0 = in[i << 1]; + diff = tmp0 - state[1]; + // UBSan: -1771017321 - 999586185 cannot be represented in type 'int' + + // scale down and round + diff = (diff + (1 << 13)) >> 14; + tmp1 = state[0] + diff * kResampleAllpass[1][0]; + state[0] = tmp0; + diff = tmp1 - state[2]; + // scale down and truncate + diff = diff >> 14; + if (diff < 0) + diff += 1; + tmp0 = state[1] + diff * kResampleAllpass[1][1]; + state[1] = tmp1; + diff = tmp0 - state[3]; + // scale down and truncate + diff = diff >> 14; + if (diff < 0) + diff += 1; + state[3] = state[2] + diff * kResampleAllpass[1][2]; + state[2] = tmp0; + + // divide by two and store temporarily + in[i << 1] = (state[3] >> 1); + } + + in++; + + // upper allpass filter (operates on odd input samples) + for (i = 0; i < len; i++) + { + tmp0 = in[i << 1]; + diff = tmp0 - state[5]; + // scale down and round + diff = (diff + (1 << 13)) >> 14; + tmp1 = state[4] + diff * kResampleAllpass[0][0]; + state[4] = tmp0; + diff = tmp1 - state[6]; + // scale down and round + diff = diff >> 14; + if (diff < 0) + diff += 1; + tmp0 = state[5] + diff * kResampleAllpass[0][1]; + state[5] = tmp1; + diff = tmp0 - state[7]; + // scale down and truncate + diff = diff >> 14; + if (diff < 0) + diff += 1; + state[7] = state[6] + diff * kResampleAllpass[0][2]; + state[6] = tmp0; + + // divide by two and store temporarily + in[i << 1] = (state[7] >> 1); + } + + in--; + + // combine allpass outputs + for (i = 0; i < len; i += 2) + { + // divide by two, add both allpass outputs and round + tmp0 = (in[i << 1] + in[(i << 1) + 1]) >> 15; + tmp1 = (in[(i << 1) + 2] + in[(i << 1) + 3]) >> 15; + if (tmp0 > (int32_t)0x00007FFF) + tmp0 = 0x00007FFF; + if (tmp0 < (int32_t)0xFFFF8000) + tmp0 = 0xFFFF8000; + out[i] = (int16_t)tmp0; + if (tmp1 > (int32_t)0x00007FFF) + tmp1 = 0x00007FFF; + if (tmp1 < (int32_t)0xFFFF8000) + tmp1 = 0xFFFF8000; + out[i + 1] = (int16_t)tmp1; + } +} + +// +// decimator +// input: int16_t +// output: int32_t (shifted 15 positions to the left, + offset 16384) (of length len/2) +// state: filter state array; length = 8 + +void RTC_NO_SANITIZE("signed-integer-overflow") // bugs.webrtc.org/5486 +WebRtcSpl_DownBy2ShortToInt(const int16_t *in, + int32_t len, + int32_t *out, + int32_t *state) +{ + int32_t tmp0, tmp1, diff; + int32_t i; + + len >>= 1; + + // lower allpass filter (operates on even input samples) + for (i = 0; i < len; i++) + { + tmp0 = ((int32_t)in[i << 1] << 15) + (1 << 14); + diff = tmp0 - state[1]; + // scale down and round + diff = (diff + (1 << 13)) >> 14; + tmp1 = state[0] + diff * kResampleAllpass[1][0]; + state[0] = tmp0; + diff = tmp1 - state[2]; + // UBSan: -1379909682 - 834099714 cannot be represented in type 'int' + + // scale down and truncate + diff = diff >> 14; + if (diff < 0) + diff += 1; + tmp0 = state[1] + diff * kResampleAllpass[1][1]; + state[1] = tmp1; + diff = tmp0 - state[3]; + // scale down and truncate + diff = diff >> 14; + if (diff < 0) + diff += 1; + state[3] = state[2] + diff * kResampleAllpass[1][2]; + state[2] = tmp0; + + // divide by two and store temporarily + out[i] = (state[3] >> 1); + } + + in++; + + // upper allpass filter (operates on odd input samples) + for (i = 0; i < len; i++) + { + tmp0 = ((int32_t)in[i << 1] << 15) + (1 << 14); + diff = tmp0 - state[5]; + // scale down and round + diff = (diff + (1 << 13)) >> 14; + tmp1 = state[4] + diff * kResampleAllpass[0][0]; + state[4] = tmp0; + diff = tmp1 - state[6]; + // scale down and round + diff = diff >> 14; + if (diff < 0) + diff += 1; + tmp0 = state[5] + diff * kResampleAllpass[0][1]; + state[5] = tmp1; + diff = tmp0 - state[7]; + // scale down and truncate + diff = diff >> 14; + if (diff < 0) + diff += 1; + state[7] = state[6] + diff * kResampleAllpass[0][2]; + state[6] = tmp0; + + // divide by two and store temporarily + out[i] += (state[7] >> 1); + } + + in--; +} + +// +// interpolator +// input: int16_t +// output: int32_t (normalized, not saturated) (of length len*2) +// state: filter state array; length = 8 +void WebRtcSpl_UpBy2ShortToInt(const int16_t *in, int32_t len, int32_t *out, + int32_t *state) +{ + int32_t tmp0, tmp1, diff; + int32_t i; + + // upper allpass filter (generates odd output samples) + for (i = 0; i < len; i++) + { + tmp0 = ((int32_t)in[i] << 15) + (1 << 14); + diff = tmp0 - state[5]; + // scale down and round + diff = (diff + (1 << 13)) >> 14; + tmp1 = state[4] + diff * kResampleAllpass[0][0]; + state[4] = tmp0; + diff = tmp1 - state[6]; + // scale down and truncate + diff = diff >> 14; + if (diff < 0) + diff += 1; + tmp0 = state[5] + diff * kResampleAllpass[0][1]; + state[5] = tmp1; + diff = tmp0 - state[7]; + // scale down and truncate + diff = diff >> 14; + if (diff < 0) + diff += 1; + state[7] = state[6] + diff * kResampleAllpass[0][2]; + state[6] = tmp0; + + // scale down, round and store + out[i << 1] = state[7] >> 15; + } + + out++; + + // lower allpass filter (generates even output samples) + for (i = 0; i < len; i++) + { + tmp0 = ((int32_t)in[i] << 15) + (1 << 14); + diff = tmp0 - state[1]; + // scale down and round + diff = (diff + (1 << 13)) >> 14; + tmp1 = state[0] + diff * kResampleAllpass[1][0]; + state[0] = tmp0; + diff = tmp1 - state[2]; + // scale down and truncate + diff = diff >> 14; + if (diff < 0) + diff += 1; + tmp0 = state[1] + diff * kResampleAllpass[1][1]; + state[1] = tmp1; + diff = tmp0 - state[3]; + // scale down and truncate + diff = diff >> 14; + if (diff < 0) + diff += 1; + state[3] = state[2] + diff * kResampleAllpass[1][2]; + state[2] = tmp0; + + // scale down, round and store + out[i << 1] = state[3] >> 15; + } +} + +// +// interpolator +// input: int32_t (shifted 15 positions to the left, + offset 16384) +// output: int32_t (shifted 15 positions to the left, + offset 16384) (of length len*2) +// state: filter state array; length = 8 +void WebRtcSpl_UpBy2IntToInt(const int32_t *in, int32_t len, int32_t *out, + int32_t *state) +{ + int32_t tmp0, tmp1, diff; + int32_t i; + + // upper allpass filter (generates odd output samples) + for (i = 0; i < len; i++) + { + tmp0 = in[i]; + diff = tmp0 - state[5]; + // scale down and round + diff = (diff + (1 << 13)) >> 14; + tmp1 = state[4] + diff * kResampleAllpass[0][0]; + state[4] = tmp0; + diff = tmp1 - state[6]; + // scale down and truncate + diff = diff >> 14; + if (diff < 0) + diff += 1; + tmp0 = state[5] + diff * kResampleAllpass[0][1]; + state[5] = tmp1; + diff = tmp0 - state[7]; + // scale down and truncate + diff = diff >> 14; + if (diff < 0) + diff += 1; + state[7] = state[6] + diff * kResampleAllpass[0][2]; + state[6] = tmp0; + + // scale down, round and store + out[i << 1] = state[7]; + } + + out++; + + // lower allpass filter (generates even output samples) + for (i = 0; i < len; i++) + { + tmp0 = in[i]; + diff = tmp0 - state[1]; + // scale down and round + diff = (diff + (1 << 13)) >> 14; + tmp1 = state[0] + diff * kResampleAllpass[1][0]; + state[0] = tmp0; + diff = tmp1 - state[2]; + // scale down and truncate + diff = diff >> 14; + if (diff < 0) + diff += 1; + tmp0 = state[1] + diff * kResampleAllpass[1][1]; + state[1] = tmp1; + diff = tmp0 - state[3]; + // scale down and truncate + diff = diff >> 14; + if (diff < 0) + diff += 1; + state[3] = state[2] + diff * kResampleAllpass[1][2]; + state[2] = tmp0; + + // scale down, round and store + out[i << 1] = state[3]; + } +} + +// +// interpolator +// input: int32_t (shifted 15 positions to the left, + offset 16384) +// output: int16_t (saturated) (of length len*2) +// state: filter state array; length = 8 +void WebRtcSpl_UpBy2IntToShort(const int32_t *in, int32_t len, int16_t *out, + int32_t *state) +{ + int32_t tmp0, tmp1, diff; + int32_t i; + + // upper allpass filter (generates odd output samples) + for (i = 0; i < len; i++) + { + tmp0 = in[i]; + diff = tmp0 - state[5]; + // scale down and round + diff = (diff + (1 << 13)) >> 14; + tmp1 = state[4] + diff * kResampleAllpass[0][0]; + state[4] = tmp0; + diff = tmp1 - state[6]; + // scale down and round + diff = diff >> 14; + if (diff < 0) + diff += 1; + tmp0 = state[5] + diff * kResampleAllpass[0][1]; + state[5] = tmp1; + diff = tmp0 - state[7]; + // scale down and truncate + diff = diff >> 14; + if (diff < 0) + diff += 1; + state[7] = state[6] + diff * kResampleAllpass[0][2]; + state[6] = tmp0; + + // scale down, saturate and store + tmp1 = state[7] >> 15; + if (tmp1 > (int32_t)0x00007FFF) + tmp1 = 0x00007FFF; + if (tmp1 < (int32_t)0xFFFF8000) + tmp1 = 0xFFFF8000; + out[i << 1] = (int16_t)tmp1; + } + + out++; + + // lower allpass filter (generates even output samples) + for (i = 0; i < len; i++) + { + tmp0 = in[i]; + diff = tmp0 - state[1]; + // scale down and round + diff = (diff + (1 << 13)) >> 14; + tmp1 = state[0] + diff * kResampleAllpass[1][0]; + state[0] = tmp0; + diff = tmp1 - state[2]; + // scale down and truncate + diff = diff >> 14; + if (diff < 0) + diff += 1; + tmp0 = state[1] + diff * kResampleAllpass[1][1]; + state[1] = tmp1; + diff = tmp0 - state[3]; + // scale down and truncate + diff = diff >> 14; + if (diff < 0) + diff += 1; + state[3] = state[2] + diff * kResampleAllpass[1][2]; + state[2] = tmp0; + + // scale down, saturate and store + tmp1 = state[3] >> 15; + if (tmp1 > (int32_t)0x00007FFF) + tmp1 = 0x00007FFF; + if (tmp1 < (int32_t)0xFFFF8000) + tmp1 = 0xFFFF8000; + out[i << 1] = (int16_t)tmp1; + } +} + +// lowpass filter +// input: int16_t +// output: int32_t (normalized, not saturated) +// state: filter state array; length = 8 +void WebRtcSpl_LPBy2ShortToInt(const int16_t* in, int32_t len, int32_t* out, + int32_t* state) +{ + int32_t tmp0, tmp1, diff; + int32_t i; + + len >>= 1; + + // lower allpass filter: odd input -> even output samples + in++; + // initial state of polyphase delay element + tmp0 = state[12]; + for (i = 0; i < len; i++) + { + diff = tmp0 - state[1]; + // scale down and round + diff = (diff + (1 << 13)) >> 14; + tmp1 = state[0] + diff * kResampleAllpass[1][0]; + state[0] = tmp0; + diff = tmp1 - state[2]; + // scale down and truncate + diff = diff >> 14; + if (diff < 0) + diff += 1; + tmp0 = state[1] + diff * kResampleAllpass[1][1]; + state[1] = tmp1; + diff = tmp0 - state[3]; + // scale down and truncate + diff = diff >> 14; + if (diff < 0) + diff += 1; + state[3] = state[2] + diff * kResampleAllpass[1][2]; + state[2] = tmp0; + + // scale down, round and store + out[i << 1] = state[3] >> 1; + tmp0 = ((int32_t)in[i << 1] << 15) + (1 << 14); + } + in--; + + // upper allpass filter: even input -> even output samples + for (i = 0; i < len; i++) + { + tmp0 = ((int32_t)in[i << 1] << 15) + (1 << 14); + diff = tmp0 - state[5]; + // scale down and round + diff = (diff + (1 << 13)) >> 14; + tmp1 = state[4] + diff * kResampleAllpass[0][0]; + state[4] = tmp0; + diff = tmp1 - state[6]; + // scale down and round + diff = diff >> 14; + if (diff < 0) + diff += 1; + tmp0 = state[5] + diff * kResampleAllpass[0][1]; + state[5] = tmp1; + diff = tmp0 - state[7]; + // scale down and truncate + diff = diff >> 14; + if (diff < 0) + diff += 1; + state[7] = state[6] + diff * kResampleAllpass[0][2]; + state[6] = tmp0; + + // average the two allpass outputs, scale down and store + out[i << 1] = (out[i << 1] + (state[7] >> 1)) >> 15; + } + + // switch to odd output samples + out++; + + // lower allpass filter: even input -> odd output samples + for (i = 0; i < len; i++) + { + tmp0 = ((int32_t)in[i << 1] << 15) + (1 << 14); + diff = tmp0 - state[9]; + // scale down and round + diff = (diff + (1 << 13)) >> 14; + tmp1 = state[8] + diff * kResampleAllpass[1][0]; + state[8] = tmp0; + diff = tmp1 - state[10]; + // scale down and truncate + diff = diff >> 14; + if (diff < 0) + diff += 1; + tmp0 = state[9] + diff * kResampleAllpass[1][1]; + state[9] = tmp1; + diff = tmp0 - state[11]; + // scale down and truncate + diff = diff >> 14; + if (diff < 0) + diff += 1; + state[11] = state[10] + diff * kResampleAllpass[1][2]; + state[10] = tmp0; + + // scale down, round and store + out[i << 1] = state[11] >> 1; + } + + // upper allpass filter: odd input -> odd output samples + in++; + for (i = 0; i < len; i++) + { + tmp0 = ((int32_t)in[i << 1] << 15) + (1 << 14); + diff = tmp0 - state[13]; + // scale down and round + diff = (diff + (1 << 13)) >> 14; + tmp1 = state[12] + diff * kResampleAllpass[0][0]; + state[12] = tmp0; + diff = tmp1 - state[14]; + // scale down and round + diff = diff >> 14; + if (diff < 0) + diff += 1; + tmp0 = state[13] + diff * kResampleAllpass[0][1]; + state[13] = tmp1; + diff = tmp0 - state[15]; + // scale down and truncate + diff = diff >> 14; + if (diff < 0) + diff += 1; + state[15] = state[14] + diff * kResampleAllpass[0][2]; + state[14] = tmp0; + + // average the two allpass outputs, scale down and store + out[i << 1] = (out[i << 1] + (state[15] >> 1)) >> 15; + } +} + +// lowpass filter +// input: int32_t (shifted 15 positions to the left, + offset 16384) +// output: int32_t (normalized, not saturated) +// state: filter state array; length = 8 +void RTC_NO_SANITIZE("signed-integer-overflow") // bugs.webrtc.org/5486 +WebRtcSpl_LPBy2IntToInt(const int32_t* in, int32_t len, int32_t* out, + int32_t* state) +{ + int32_t tmp0, tmp1, diff; + int32_t i; + + len >>= 1; + + // lower allpass filter: odd input -> even output samples + in++; + // initial state of polyphase delay element + tmp0 = state[12]; + for (i = 0; i < len; i++) + { + diff = tmp0 - state[1]; + // scale down and round + diff = (diff + (1 << 13)) >> 14; + tmp1 = state[0] + diff * kResampleAllpass[1][0]; + state[0] = tmp0; + diff = tmp1 - state[2]; + // scale down and truncate + diff = diff >> 14; + if (diff < 0) + diff += 1; + tmp0 = state[1] + diff * kResampleAllpass[1][1]; + state[1] = tmp1; + diff = tmp0 - state[3]; + // scale down and truncate + diff = diff >> 14; + if (diff < 0) + diff += 1; + state[3] = state[2] + diff * kResampleAllpass[1][2]; + state[2] = tmp0; + + // scale down, round and store + out[i << 1] = state[3] >> 1; + tmp0 = in[i << 1]; + } + in--; + + // upper allpass filter: even input -> even output samples + for (i = 0; i < len; i++) + { + tmp0 = in[i << 1]; + diff = tmp0 - state[5]; + // UBSan: -794814117 - 1566149201 cannot be represented in type 'int' + + // scale down and round + diff = (diff + (1 << 13)) >> 14; + tmp1 = state[4] + diff * kResampleAllpass[0][0]; + state[4] = tmp0; + diff = tmp1 - state[6]; + // scale down and round + diff = diff >> 14; + if (diff < 0) + diff += 1; + tmp0 = state[5] + diff * kResampleAllpass[0][1]; + state[5] = tmp1; + diff = tmp0 - state[7]; + // scale down and truncate + diff = diff >> 14; + if (diff < 0) + diff += 1; + state[7] = state[6] + diff * kResampleAllpass[0][2]; + state[6] = tmp0; + + // average the two allpass outputs, scale down and store + out[i << 1] = (out[i << 1] + (state[7] >> 1)) >> 15; + } + + // switch to odd output samples + out++; + + // lower allpass filter: even input -> odd output samples + for (i = 0; i < len; i++) + { + tmp0 = in[i << 1]; + diff = tmp0 - state[9]; + // scale down and round + diff = (diff + (1 << 13)) >> 14; + tmp1 = state[8] + diff * kResampleAllpass[1][0]; + state[8] = tmp0; + diff = tmp1 - state[10]; + // scale down and truncate + diff = diff >> 14; + if (diff < 0) + diff += 1; + tmp0 = state[9] + diff * kResampleAllpass[1][1]; + state[9] = tmp1; + diff = tmp0 - state[11]; + // scale down and truncate + diff = diff >> 14; + if (diff < 0) + diff += 1; + state[11] = state[10] + diff * kResampleAllpass[1][2]; + state[10] = tmp0; + + // scale down, round and store + out[i << 1] = state[11] >> 1; + } + + // upper allpass filter: odd input -> odd output samples + in++; + for (i = 0; i < len; i++) + { + tmp0 = in[i << 1]; + diff = tmp0 - state[13]; + // scale down and round + diff = (diff + (1 << 13)) >> 14; + tmp1 = state[12] + diff * kResampleAllpass[0][0]; + state[12] = tmp0; + diff = tmp1 - state[14]; + // scale down and round + diff = diff >> 14; + if (diff < 0) + diff += 1; + tmp0 = state[13] + diff * kResampleAllpass[0][1]; + state[13] = tmp1; + diff = tmp0 - state[15]; + // scale down and truncate + diff = diff >> 14; + if (diff < 0) + diff += 1; + state[15] = state[14] + diff * kResampleAllpass[0][2]; + state[14] = tmp0; + + // average the two allpass outputs, scale down and store + out[i << 1] = (out[i << 1] + (state[15] >> 1)) >> 15; + } +} diff --git a/runtime/engine/asr/server/brpc/paraformerCPP/webrtc/common_audio/signal_processing/resample_by_2_internal.h b/runtime/engine/asr/server/brpc/paraformerCPP/webrtc/common_audio/signal_processing/resample_by_2_internal.h new file mode 100644 index 00000000..145395a4 --- /dev/null +++ b/runtime/engine/asr/server/brpc/paraformerCPP/webrtc/common_audio/signal_processing/resample_by_2_internal.h @@ -0,0 +1,60 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/* + * This header file contains some internal resampling functions. + * + */ + +#ifndef COMMON_AUDIO_SIGNAL_PROCESSING_RESAMPLE_BY_2_INTERNAL_H_ +#define COMMON_AUDIO_SIGNAL_PROCESSING_RESAMPLE_BY_2_INTERNAL_H_ + +#include + +/******************************************************************* + * resample_by_2_fast.c + * Functions for internal use in the other resample functions + ******************************************************************/ +void WebRtcSpl_DownBy2IntToShort(int32_t* in, + int32_t len, + int16_t* out, + int32_t* state); + +void WebRtcSpl_DownBy2ShortToInt(const int16_t* in, + int32_t len, + int32_t* out, + int32_t* state); + +void WebRtcSpl_UpBy2ShortToInt(const int16_t* in, + int32_t len, + int32_t* out, + int32_t* state); + +void WebRtcSpl_UpBy2IntToInt(const int32_t* in, + int32_t len, + int32_t* out, + int32_t* state); + +void WebRtcSpl_UpBy2IntToShort(const int32_t* in, + int32_t len, + int16_t* out, + int32_t* state); + +void WebRtcSpl_LPBy2ShortToInt(const int16_t* in, + int32_t len, + int32_t* out, + int32_t* state); + +void WebRtcSpl_LPBy2IntToInt(const int32_t* in, + int32_t len, + int32_t* out, + int32_t* state); + +#endif // COMMON_AUDIO_SIGNAL_PROCESSING_RESAMPLE_BY_2_INTERNAL_H_ diff --git a/runtime/engine/asr/server/brpc/paraformerCPP/webrtc/common_audio/signal_processing/resample_fractional.c b/runtime/engine/asr/server/brpc/paraformerCPP/webrtc/common_audio/signal_processing/resample_fractional.c new file mode 100644 index 00000000..6409fbac --- /dev/null +++ b/runtime/engine/asr/server/brpc/paraformerCPP/webrtc/common_audio/signal_processing/resample_fractional.c @@ -0,0 +1,239 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + + +/* + * This file contains the resampling functions between 48, 44, 32 and 24 kHz. + * The description headers can be found in signal_processing_library.h + * + */ + +#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h" + +// interpolation coefficients +static const int16_t kCoefficients48To32[2][8] = { + {778, -2050, 1087, 23285, 12903, -3783, 441, 222}, + {222, 441, -3783, 12903, 23285, 1087, -2050, 778} +}; + +static const int16_t kCoefficients32To24[3][8] = { + {767, -2362, 2434, 24406, 10620, -3838, 721, 90}, + {386, -381, -2646, 19062, 19062, -2646, -381, 386}, + {90, 721, -3838, 10620, 24406, 2434, -2362, 767} +}; + +static const int16_t kCoefficients44To32[4][9] = { + {117, -669, 2245, -6183, 26267, 13529, -3245, 845, -138}, + {-101, 612, -2283, 8532, 29790, -5138, 1789, -524, 91}, + {50, -292, 1016, -3064, 32010, 3933, -1147, 315, -53}, + {-156, 974, -3863, 18603, 21691, -6246, 2353, -712, 126} +}; + +// Resampling ratio: 2/3 +// input: int32_t (normalized, not saturated) :: size 3 * K +// output: int32_t (shifted 15 positions to the left, + offset 16384) :: size 2 * K +// K: number of blocks + +void WebRtcSpl_Resample48khzTo32khz(const int32_t *In, int32_t *Out, size_t K) +{ + ///////////////////////////////////////////////////////////// + // Filter operation: + // + // Perform resampling (3 input samples -> 2 output samples); + // process in sub blocks of size 3 samples. + int32_t tmp; + size_t m; + + for (m = 0; m < K; m++) + { + tmp = 1 << 14; + tmp += kCoefficients48To32[0][0] * In[0]; + tmp += kCoefficients48To32[0][1] * In[1]; + tmp += kCoefficients48To32[0][2] * In[2]; + tmp += kCoefficients48To32[0][3] * In[3]; + tmp += kCoefficients48To32[0][4] * In[4]; + tmp += kCoefficients48To32[0][5] * In[5]; + tmp += kCoefficients48To32[0][6] * In[6]; + tmp += kCoefficients48To32[0][7] * In[7]; + Out[0] = tmp; + + tmp = 1 << 14; + tmp += kCoefficients48To32[1][0] * In[1]; + tmp += kCoefficients48To32[1][1] * In[2]; + tmp += kCoefficients48To32[1][2] * In[3]; + tmp += kCoefficients48To32[1][3] * In[4]; + tmp += kCoefficients48To32[1][4] * In[5]; + tmp += kCoefficients48To32[1][5] * In[6]; + tmp += kCoefficients48To32[1][6] * In[7]; + tmp += kCoefficients48To32[1][7] * In[8]; + Out[1] = tmp; + + // update pointers + In += 3; + Out += 2; + } +} + +// Resampling ratio: 3/4 +// input: int32_t (normalized, not saturated) :: size 4 * K +// output: int32_t (shifted 15 positions to the left, + offset 16384) :: size 3 * K +// K: number of blocks + +void WebRtcSpl_Resample32khzTo24khz(const int32_t *In, int32_t *Out, size_t K) +{ + ///////////////////////////////////////////////////////////// + // Filter operation: + // + // Perform resampling (4 input samples -> 3 output samples); + // process in sub blocks of size 4 samples. + size_t m; + int32_t tmp; + + for (m = 0; m < K; m++) + { + tmp = 1 << 14; + tmp += kCoefficients32To24[0][0] * In[0]; + tmp += kCoefficients32To24[0][1] * In[1]; + tmp += kCoefficients32To24[0][2] * In[2]; + tmp += kCoefficients32To24[0][3] * In[3]; + tmp += kCoefficients32To24[0][4] * In[4]; + tmp += kCoefficients32To24[0][5] * In[5]; + tmp += kCoefficients32To24[0][6] * In[6]; + tmp += kCoefficients32To24[0][7] * In[7]; + Out[0] = tmp; + + tmp = 1 << 14; + tmp += kCoefficients32To24[1][0] * In[1]; + tmp += kCoefficients32To24[1][1] * In[2]; + tmp += kCoefficients32To24[1][2] * In[3]; + tmp += kCoefficients32To24[1][3] * In[4]; + tmp += kCoefficients32To24[1][4] * In[5]; + tmp += kCoefficients32To24[1][5] * In[6]; + tmp += kCoefficients32To24[1][6] * In[7]; + tmp += kCoefficients32To24[1][7] * In[8]; + Out[1] = tmp; + + tmp = 1 << 14; + tmp += kCoefficients32To24[2][0] * In[2]; + tmp += kCoefficients32To24[2][1] * In[3]; + tmp += kCoefficients32To24[2][2] * In[4]; + tmp += kCoefficients32To24[2][3] * In[5]; + tmp += kCoefficients32To24[2][4] * In[6]; + tmp += kCoefficients32To24[2][5] * In[7]; + tmp += kCoefficients32To24[2][6] * In[8]; + tmp += kCoefficients32To24[2][7] * In[9]; + Out[2] = tmp; + + // update pointers + In += 4; + Out += 3; + } +} + +// +// fractional resampling filters +// Fout = 11/16 * Fin +// Fout = 8/11 * Fin +// + +// compute two inner-products and store them to output array +static void WebRtcSpl_ResampDotProduct(const int32_t *in1, const int32_t *in2, + const int16_t *coef_ptr, int32_t *out1, + int32_t *out2) +{ + int32_t tmp1 = 16384; + int32_t tmp2 = 16384; + int16_t coef; + + coef = coef_ptr[0]; + tmp1 += coef * in1[0]; + tmp2 += coef * in2[-0]; + + coef = coef_ptr[1]; + tmp1 += coef * in1[1]; + tmp2 += coef * in2[-1]; + + coef = coef_ptr[2]; + tmp1 += coef * in1[2]; + tmp2 += coef * in2[-2]; + + coef = coef_ptr[3]; + tmp1 += coef * in1[3]; + tmp2 += coef * in2[-3]; + + coef = coef_ptr[4]; + tmp1 += coef * in1[4]; + tmp2 += coef * in2[-4]; + + coef = coef_ptr[5]; + tmp1 += coef * in1[5]; + tmp2 += coef * in2[-5]; + + coef = coef_ptr[6]; + tmp1 += coef * in1[6]; + tmp2 += coef * in2[-6]; + + coef = coef_ptr[7]; + tmp1 += coef * in1[7]; + tmp2 += coef * in2[-7]; + + coef = coef_ptr[8]; + *out1 = tmp1 + coef * in1[8]; + *out2 = tmp2 + coef * in2[-8]; +} + +// Resampling ratio: 8/11 +// input: int32_t (normalized, not saturated) :: size 11 * K +// output: int32_t (shifted 15 positions to the left, + offset 16384) :: size 8 * K +// K: number of blocks + +void WebRtcSpl_Resample44khzTo32khz(const int32_t *In, int32_t *Out, size_t K) +{ + ///////////////////////////////////////////////////////////// + // Filter operation: + // + // Perform resampling (11 input samples -> 8 output samples); + // process in sub blocks of size 11 samples. + int32_t tmp; + size_t m; + + for (m = 0; m < K; m++) + { + tmp = 1 << 14; + + // first output sample + Out[0] = ((int32_t)In[3] << 15) + tmp; + + // sum and accumulate filter coefficients and input samples + tmp += kCoefficients44To32[3][0] * In[5]; + tmp += kCoefficients44To32[3][1] * In[6]; + tmp += kCoefficients44To32[3][2] * In[7]; + tmp += kCoefficients44To32[3][3] * In[8]; + tmp += kCoefficients44To32[3][4] * In[9]; + tmp += kCoefficients44To32[3][5] * In[10]; + tmp += kCoefficients44To32[3][6] * In[11]; + tmp += kCoefficients44To32[3][7] * In[12]; + tmp += kCoefficients44To32[3][8] * In[13]; + Out[4] = tmp; + + // sum and accumulate filter coefficients and input samples + WebRtcSpl_ResampDotProduct(&In[0], &In[17], kCoefficients44To32[0], &Out[1], &Out[7]); + + // sum and accumulate filter coefficients and input samples + WebRtcSpl_ResampDotProduct(&In[2], &In[15], kCoefficients44To32[1], &Out[2], &Out[6]); + + // sum and accumulate filter coefficients and input samples + WebRtcSpl_ResampDotProduct(&In[3], &In[14], kCoefficients44To32[2], &Out[3], &Out[5]); + + // update pointers + In += 11; + Out += 8; + } +} diff --git a/runtime/engine/asr/server/brpc/paraformerCPP/webrtc/common_audio/signal_processing/spl_init.c b/runtime/engine/asr/server/brpc/paraformerCPP/webrtc/common_audio/signal_processing/spl_init.c new file mode 100644 index 00000000..950448cc --- /dev/null +++ b/runtime/engine/asr/server/brpc/paraformerCPP/webrtc/common_audio/signal_processing/spl_init.c @@ -0,0 +1,139 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/* The global function contained in this file initializes SPL function + * pointers, currently only for ARM platforms. + * + * Some code came from common/rtcd.c in the WebM project. + */ + +#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h" +#include "webrtc/system_wrappers/include/cpu_features_wrapper.h" + +/* Declare function pointers. */ +MaxAbsValueW16 WebRtcSpl_MaxAbsValueW16; +MaxAbsValueW32 WebRtcSpl_MaxAbsValueW32; +MaxValueW16 WebRtcSpl_MaxValueW16; +MaxValueW32 WebRtcSpl_MaxValueW32; +MinValueW16 WebRtcSpl_MinValueW16; +MinValueW32 WebRtcSpl_MinValueW32; +CrossCorrelation WebRtcSpl_CrossCorrelation; +DownsampleFast WebRtcSpl_DownsampleFast; +ScaleAndAddVectorsWithRound WebRtcSpl_ScaleAndAddVectorsWithRound; + +#if (!defined(WEBRTC_HAS_NEON)) && !defined(MIPS32_LE) +/* Initialize function pointers to the generic C version. */ +static void InitPointersToC(void) { + WebRtcSpl_MaxAbsValueW16 = WebRtcSpl_MaxAbsValueW16C; + WebRtcSpl_MaxAbsValueW32 = WebRtcSpl_MaxAbsValueW32C; + WebRtcSpl_MaxValueW16 = WebRtcSpl_MaxValueW16C; + WebRtcSpl_MaxValueW32 = WebRtcSpl_MaxValueW32C; + WebRtcSpl_MinValueW16 = WebRtcSpl_MinValueW16C; + WebRtcSpl_MinValueW32 = WebRtcSpl_MinValueW32C; + WebRtcSpl_CrossCorrelation = WebRtcSpl_CrossCorrelationC; + WebRtcSpl_DownsampleFast = WebRtcSpl_DownsampleFastC; + WebRtcSpl_ScaleAndAddVectorsWithRound = + WebRtcSpl_ScaleAndAddVectorsWithRoundC; +} +#endif + +#if defined(WEBRTC_HAS_NEON) +/* Initialize function pointers to the Neon version. */ +static void InitPointersToNeon(void) { + WebRtcSpl_MaxAbsValueW16 = WebRtcSpl_MaxAbsValueW16Neon; + WebRtcSpl_MaxAbsValueW32 = WebRtcSpl_MaxAbsValueW32Neon; + WebRtcSpl_MaxValueW16 = WebRtcSpl_MaxValueW16Neon; + WebRtcSpl_MaxValueW32 = WebRtcSpl_MaxValueW32Neon; + WebRtcSpl_MinValueW16 = WebRtcSpl_MinValueW16Neon; + WebRtcSpl_MinValueW32 = WebRtcSpl_MinValueW32Neon; + WebRtcSpl_CrossCorrelation = WebRtcSpl_CrossCorrelationNeon; + WebRtcSpl_DownsampleFast = WebRtcSpl_DownsampleFastNeon; + WebRtcSpl_ScaleAndAddVectorsWithRound = + WebRtcSpl_ScaleAndAddVectorsWithRoundC; +} +#endif + +#if defined(MIPS32_LE) +/* Initialize function pointers to the MIPS version. */ +static void InitPointersToMIPS(void) { + WebRtcSpl_MaxAbsValueW16 = WebRtcSpl_MaxAbsValueW16_mips; + WebRtcSpl_MaxValueW16 = WebRtcSpl_MaxValueW16_mips; + WebRtcSpl_MaxValueW32 = WebRtcSpl_MaxValueW32_mips; + WebRtcSpl_MinValueW16 = WebRtcSpl_MinValueW16_mips; + WebRtcSpl_MinValueW32 = WebRtcSpl_MinValueW32_mips; + WebRtcSpl_CrossCorrelation = WebRtcSpl_CrossCorrelation_mips; + WebRtcSpl_DownsampleFast = WebRtcSpl_DownsampleFast_mips; +#if defined(MIPS_DSP_R1_LE) + WebRtcSpl_MaxAbsValueW32 = WebRtcSpl_MaxAbsValueW32_mips; + WebRtcSpl_ScaleAndAddVectorsWithRound = + WebRtcSpl_ScaleAndAddVectorsWithRound_mips; +#else + WebRtcSpl_MaxAbsValueW32 = WebRtcSpl_MaxAbsValueW32C; + WebRtcSpl_ScaleAndAddVectorsWithRound = + WebRtcSpl_ScaleAndAddVectorsWithRoundC; +#endif +} +#endif + +static void InitFunctionPointers(void) { +#if defined(WEBRTC_HAS_NEON) + InitPointersToNeon(); +#elif defined(MIPS32_LE) + InitPointersToMIPS(); +#else + InitPointersToC(); +#endif /* WEBRTC_HAS_NEON */ +} + +#include + +static void once(void (*func)(void)) { + static pthread_once_t lock = PTHREAD_ONCE_INIT; + pthread_once(&lock, func); +} +// #if defined(WEBRTC_POSIX) +// #include + +// static void once(void (*func)(void)) { +// static pthread_once_t lock = PTHREAD_ONCE_INIT; +// pthread_once(&lock, func); +// } + +// #elif defined(_WIN32) +// #include + +// static void once(void (*func)(void)) { +// /* Didn't use InitializeCriticalSection() since there's no race-free context +// * in which to execute it. +// * +// * TODO(kma): Change to different implementation (e.g. +// * InterlockedCompareExchangePointer) to avoid issues similar to +// * http://code.google.com/p/webm/issues/detail?id=467. +// */ +// static CRITICAL_SECTION lock = {(void *)((size_t)-1), -1, 0, 0, 0, 0}; +// static int done = 0; + +// EnterCriticalSection(&lock); +// if (!done) { +// func(); +// done = 1; +// } +// LeaveCriticalSection(&lock); +// } + +// /* There's no fallback version as an #else block here to ensure thread safety. +// * In case of neither pthread for WEBRTC_POSIX nor _WIN32 is present, build +// * system should pick it up. +// */ +// #endif /* WEBRTC_POSIX */ + +void WebRtcSpl_Init(void) { + once(InitFunctionPointers); +} diff --git a/runtime/engine/asr/server/brpc/paraformerCPP/webrtc/common_audio/signal_processing/spl_inl.c b/runtime/engine/asr/server/brpc/paraformerCPP/webrtc/common_audio/signal_processing/spl_inl.c new file mode 100644 index 00000000..efa6a65f --- /dev/null +++ b/runtime/engine/asr/server/brpc/paraformerCPP/webrtc/common_audio/signal_processing/spl_inl.c @@ -0,0 +1,24 @@ +/* + * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include + +#include "webrtc/common_audio/signal_processing/include/spl_inl.h" + +// Table used by WebRtcSpl_CountLeadingZeros32_NotBuiltin. For each uint32_t n +// that's a sequence of 0 bits followed by a sequence of 1 bits, the entry at +// index (n * 0x8c0b2891) >> 26 in this table gives the number of zero bits in +// n. +const int8_t kWebRtcSpl_CountLeadingZeros32_Table[64] = { + 32, 8, 17, -1, -1, 14, -1, -1, -1, 20, -1, -1, -1, 28, -1, 18, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, 26, 25, 24, + 4, 11, 23, 31, 3, 7, 10, 16, 22, 30, -1, -1, 2, 6, 13, 9, + -1, 15, -1, 21, -1, 29, 19, -1, -1, -1, -1, -1, 1, 27, 5, 12, +}; diff --git a/runtime/engine/asr/server/brpc/paraformerCPP/webrtc/common_audio/signal_processing/spl_sqrt.c b/runtime/engine/asr/server/brpc/paraformerCPP/webrtc/common_audio/signal_processing/spl_sqrt.c new file mode 100644 index 00000000..f79ac9fa --- /dev/null +++ b/runtime/engine/asr/server/brpc/paraformerCPP/webrtc/common_audio/signal_processing/spl_sqrt.c @@ -0,0 +1,194 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + + +/* + * This file contains the function WebRtcSpl_Sqrt(). + * The description header can be found in signal_processing_library.h + * + */ + +#include "webrtc/rtc_base/checks.h" +#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h" + +int32_t WebRtcSpl_SqrtLocal(int32_t in); + +int32_t WebRtcSpl_SqrtLocal(int32_t in) +{ + + int16_t x_half, t16; + int32_t A, B, x2; + + /* The following block performs: + y=in/2 + x=y-2^30 + x_half=x/2^31 + t = 1 + (x_half) - 0.5*((x_half)^2) + 0.5*((x_half)^3) - 0.625*((x_half)^4) + + 0.875*((x_half)^5) + */ + + B = in / 2; + + B = B - ((int32_t)0x40000000); // B = in/2 - 1/2 + x_half = (int16_t)(B >> 16); // x_half = x/2 = (in-1)/2 + B = B + ((int32_t)0x40000000); // B = 1 + x/2 + B = B + ((int32_t)0x40000000); // Add 0.5 twice (since 1.0 does not exist in Q31) + + x2 = ((int32_t)x_half) * ((int32_t)x_half) * 2; // A = (x/2)^2 + A = -x2; // A = -(x/2)^2 + B = B + (A >> 1); // B = 1 + x/2 - 0.5*(x/2)^2 + + A >>= 16; + A = A * A * 2; // A = (x/2)^4 + t16 = (int16_t)(A >> 16); + B += -20480 * t16 * 2; // B = B - 0.625*A + // After this, B = 1 + x/2 - 0.5*(x/2)^2 - 0.625*(x/2)^4 + + A = x_half * t16 * 2; // A = (x/2)^5 + t16 = (int16_t)(A >> 16); + B += 28672 * t16 * 2; // B = B + 0.875*A + // After this, B = 1 + x/2 - 0.5*(x/2)^2 - 0.625*(x/2)^4 + 0.875*(x/2)^5 + + t16 = (int16_t)(x2 >> 16); + A = x_half * t16 * 2; // A = x/2^3 + + B = B + (A >> 1); // B = B + 0.5*A + // After this, B = 1 + x/2 - 0.5*(x/2)^2 + 0.5*(x/2)^3 - 0.625*(x/2)^4 + 0.875*(x/2)^5 + + B = B + ((int32_t)32768); // Round off bit + + return B; +} + +int32_t WebRtcSpl_Sqrt(int32_t value) +{ + /* + Algorithm: + + Six term Taylor Series is used here to compute the square root of a number + y^0.5 = (1+x)^0.5 where x = y-1 + = 1+(x/2)-0.5*((x/2)^2+0.5*((x/2)^3-0.625*((x/2)^4+0.875*((x/2)^5) + 0.5 <= x < 1 + + Example of how the algorithm works, with ut=sqrt(in), and + with in=73632 and ut=271 (even shift value case): + + in=73632 + y= in/131072 + x=y-1 + t = 1 + (x/2) - 0.5*((x/2)^2) + 0.5*((x/2)^3) - 0.625*((x/2)^4) + 0.875*((x/2)^5) + ut=t*(1/sqrt(2))*512 + + or: + + in=73632 + in2=73632*2^14 + y= in2/2^31 + x=y-1 + t = 1 + (x/2) - 0.5*((x/2)^2) + 0.5*((x/2)^3) - 0.625*((x/2)^4) + 0.875*((x/2)^5) + ut=t*(1/sqrt(2)) + ut2=ut*2^9 + + which gives: + + in = 73632 + in2 = 1206386688 + y = 0.56176757812500 + x = -0.43823242187500 + t = 0.74973506527313 + ut = 0.53014274874797 + ut2 = 2.714330873589594e+002 + + or: + + in=73632 + in2=73632*2^14 + y=in2/2 + x=y-2^30 + x_half=x/2^31 + t = 1 + (x_half) - 0.5*((x_half)^2) + 0.5*((x_half)^3) - 0.625*((x_half)^4) + + 0.875*((x_half)^5) + ut=t*(1/sqrt(2)) + ut2=ut*2^9 + + which gives: + + in = 73632 + in2 = 1206386688 + y = 603193344 + x = -470548480 + x_half = -0.21911621093750 + t = 0.74973506527313 + ut = 0.53014274874797 + ut2 = 2.714330873589594e+002 + + */ + + int16_t x_norm, nshift, t16, sh; + int32_t A; + + int16_t k_sqrt_2 = 23170; // 1/sqrt2 (==5a82) + + A = value; + + // The convention in this function is to calculate sqrt(abs(A)). Negate the + // input if it is negative. + if (A < 0) { + if (A == WEBRTC_SPL_WORD32_MIN) { + // This number cannot be held in an int32_t after negating. + // Map it to the maximum positive value. + A = WEBRTC_SPL_WORD32_MAX; + } else { + A = -A; + } + } else if (A == 0) { + return 0; // sqrt(0) = 0 + } + + sh = WebRtcSpl_NormW32(A); // # shifts to normalize A + A = WEBRTC_SPL_LSHIFT_W32(A, sh); // Normalize A + if (A < (WEBRTC_SPL_WORD32_MAX - 32767)) + { + A = A + ((int32_t)32768); // Round off bit + } else + { + A = WEBRTC_SPL_WORD32_MAX; + } + + x_norm = (int16_t)(A >> 16); // x_norm = AH + + nshift = (sh / 2); + RTC_DCHECK_GE(nshift, 0); + + A = (int32_t)WEBRTC_SPL_LSHIFT_W32((int32_t)x_norm, 16); + A = WEBRTC_SPL_ABS_W32(A); // A = abs(x_norm<<16) + A = WebRtcSpl_SqrtLocal(A); // A = sqrt(A) + + if (2 * nshift == sh) { + // Even shift value case + + t16 = (int16_t)(A >> 16); // t16 = AH + + A = k_sqrt_2 * t16 * 2; // A = 1/sqrt(2)*t16 + A = A + ((int32_t)32768); // Round off + A = A & ((int32_t)0x7fff0000); // Round off + + A >>= 15; // A = A>>16 + + } else + { + A >>= 16; // A = A>>16 + } + + A = A & ((int32_t)0x0000ffff); + A >>= nshift; // De-normalize the result. + + return A; +} diff --git a/runtime/engine/asr/server/brpc/paraformerCPP/webrtc/common_audio/signal_processing/vector_scaling_operations.c b/runtime/engine/asr/server/brpc/paraformerCPP/webrtc/common_audio/signal_processing/vector_scaling_operations.c new file mode 100644 index 00000000..e1f391d1 --- /dev/null +++ b/runtime/engine/asr/server/brpc/paraformerCPP/webrtc/common_audio/signal_processing/vector_scaling_operations.c @@ -0,0 +1,165 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + + +/* + * This file contains implementations of the functions + * WebRtcSpl_VectorBitShiftW16() + * WebRtcSpl_VectorBitShiftW32() + * WebRtcSpl_VectorBitShiftW32ToW16() + * WebRtcSpl_ScaleVector() + * WebRtcSpl_ScaleVectorWithSat() + * WebRtcSpl_ScaleAndAddVectors() + * WebRtcSpl_ScaleAndAddVectorsWithRoundC() + */ + +#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h" + +void WebRtcSpl_VectorBitShiftW16(int16_t *res, size_t length, + const int16_t *in, int16_t right_shifts) +{ + size_t i; + + if (right_shifts > 0) + { + for (i = length; i > 0; i--) + { + (*res++) = ((*in++) >> right_shifts); + } + } else + { + for (i = length; i > 0; i--) + { + (*res++) = ((*in++) * (1 << (-right_shifts))); + } + } +} + +void WebRtcSpl_VectorBitShiftW32(int32_t *out_vector, + size_t vector_length, + const int32_t *in_vector, + int16_t right_shifts) +{ + size_t i; + + if (right_shifts > 0) + { + for (i = vector_length; i > 0; i--) + { + (*out_vector++) = ((*in_vector++) >> right_shifts); + } + } else + { + for (i = vector_length; i > 0; i--) + { + (*out_vector++) = ((*in_vector++) << (-right_shifts)); + } + } +} + +void WebRtcSpl_VectorBitShiftW32ToW16(int16_t* out, size_t length, + const int32_t* in, int right_shifts) { + size_t i; + int32_t tmp_w32; + + if (right_shifts >= 0) { + for (i = length; i > 0; i--) { + tmp_w32 = (*in++) >> right_shifts; + (*out++) = WebRtcSpl_SatW32ToW16(tmp_w32); + } + } else { + int left_shifts = -right_shifts; + for (i = length; i > 0; i--) { + tmp_w32 = (*in++) << left_shifts; + (*out++) = WebRtcSpl_SatW32ToW16(tmp_w32); + } + } +} + +void WebRtcSpl_ScaleVector(const int16_t *in_vector, int16_t *out_vector, + int16_t gain, size_t in_vector_length, + int16_t right_shifts) +{ + // Performs vector operation: out_vector = (gain*in_vector)>>right_shifts + size_t i; + const int16_t *inptr; + int16_t *outptr; + + inptr = in_vector; + outptr = out_vector; + + for (i = 0; i < in_vector_length; i++) + { + *outptr++ = (int16_t)((*inptr++ * gain) >> right_shifts); + } +} + +void WebRtcSpl_ScaleVectorWithSat(const int16_t *in_vector, int16_t *out_vector, + int16_t gain, size_t in_vector_length, + int16_t right_shifts) +{ + // Performs vector operation: out_vector = (gain*in_vector)>>right_shifts + size_t i; + const int16_t *inptr; + int16_t *outptr; + + inptr = in_vector; + outptr = out_vector; + + for (i = 0; i < in_vector_length; i++) { + *outptr++ = WebRtcSpl_SatW32ToW16((*inptr++ * gain) >> right_shifts); + } +} + +void WebRtcSpl_ScaleAndAddVectors(const int16_t *in1, int16_t gain1, int shift1, + const int16_t *in2, int16_t gain2, int shift2, + int16_t *out, size_t vector_length) +{ + // Performs vector operation: out = (gain1*in1)>>shift1 + (gain2*in2)>>shift2 + size_t i; + const int16_t *in1ptr; + const int16_t *in2ptr; + int16_t *outptr; + + in1ptr = in1; + in2ptr = in2; + outptr = out; + + for (i = 0; i < vector_length; i++) + { + *outptr++ = (int16_t)((gain1 * *in1ptr++) >> shift1) + + (int16_t)((gain2 * *in2ptr++) >> shift2); + } +} + +// C version of WebRtcSpl_ScaleAndAddVectorsWithRound() for generic platforms. +int WebRtcSpl_ScaleAndAddVectorsWithRoundC(const int16_t* in_vector1, + int16_t in_vector1_scale, + const int16_t* in_vector2, + int16_t in_vector2_scale, + int right_shifts, + int16_t* out_vector, + size_t length) { + size_t i = 0; + int round_value = (1 << right_shifts) >> 1; + + if (in_vector1 == NULL || in_vector2 == NULL || out_vector == NULL || + length == 0 || right_shifts < 0) { + return -1; + } + + for (i = 0; i < length; i++) { + out_vector[i] = (int16_t)(( + in_vector1[i] * in_vector1_scale + in_vector2[i] * in_vector2_scale + + round_value) >> right_shifts); + } + + return 0; +} diff --git a/runtime/engine/asr/server/brpc/paraformerCPP/webrtc/common_audio/third_party/spl_sqrt_floor/spl_sqrt_floor.c b/runtime/engine/asr/server/brpc/paraformerCPP/webrtc/common_audio/third_party/spl_sqrt_floor/spl_sqrt_floor.c new file mode 100644 index 00000000..25bb0a11 --- /dev/null +++ b/runtime/engine/asr/server/brpc/paraformerCPP/webrtc/common_audio/third_party/spl_sqrt_floor/spl_sqrt_floor.c @@ -0,0 +1,77 @@ +/* + * Written by Wilco Dijkstra, 1996. The following email exchange establishes the + * license. + * + * From: Wilco Dijkstra + * Date: Fri, Jun 24, 2011 at 3:20 AM + * Subject: Re: sqrt routine + * To: Kevin Ma + * Hi Kevin, + * Thanks for asking. Those routines are public domain (originally posted to + * comp.sys.arm a long time ago), so you can use them freely for any purpose. + * Cheers, + * Wilco + * + * ----- Original Message ----- + * From: "Kevin Ma" + * To: + * Sent: Thursday, June 23, 2011 11:44 PM + * Subject: Fwd: sqrt routine + * Hi Wilco, + * I saw your sqrt routine from several web sites, including + * http://www.finesse.demon.co.uk/steven/sqrt.html. + * Just wonder if there's any copyright information with your Successive + * approximation routines, or if I can freely use it for any purpose. + * Thanks. + * Kevin + */ + +// Minor modifications in code style for WebRTC, 2012. + +#include "webrtc/common_audio/third_party/spl_sqrt_floor/spl_sqrt_floor.h" + +/* + * Algorithm: + * Successive approximation of the equation (root + delta) ^ 2 = N + * until delta < 1. If delta < 1 we have the integer part of SQRT (N). + * Use delta = 2^i for i = 15 .. 0. + * + * Output precision is 16 bits. Note for large input values (close to + * 0x7FFFFFFF), bit 15 (the highest bit of the low 16-bit half word) + * contains the MSB information (a non-sign value). Do with caution + * if you need to cast the output to int16_t type. + * + * If the input value is negative, it returns 0. + */ + +#define WEBRTC_SPL_SQRT_ITER(N) \ + try1 = root + (1 << (N)); \ + if (value >= try1 << (N)) \ + { \ + value -= try1 << (N); \ + root |= 2 << (N); \ + } + +int32_t WebRtcSpl_SqrtFloor(int32_t value) +{ + int32_t root = 0, try1; + + WEBRTC_SPL_SQRT_ITER (15); + WEBRTC_SPL_SQRT_ITER (14); + WEBRTC_SPL_SQRT_ITER (13); + WEBRTC_SPL_SQRT_ITER (12); + WEBRTC_SPL_SQRT_ITER (11); + WEBRTC_SPL_SQRT_ITER (10); + WEBRTC_SPL_SQRT_ITER ( 9); + WEBRTC_SPL_SQRT_ITER ( 8); + WEBRTC_SPL_SQRT_ITER ( 7); + WEBRTC_SPL_SQRT_ITER ( 6); + WEBRTC_SPL_SQRT_ITER ( 5); + WEBRTC_SPL_SQRT_ITER ( 4); + WEBRTC_SPL_SQRT_ITER ( 3); + WEBRTC_SPL_SQRT_ITER ( 2); + WEBRTC_SPL_SQRT_ITER ( 1); + WEBRTC_SPL_SQRT_ITER ( 0); + + return root >> 1; +} diff --git a/runtime/engine/asr/server/brpc/paraformerCPP/webrtc/common_audio/third_party/spl_sqrt_floor/spl_sqrt_floor.h b/runtime/engine/asr/server/brpc/paraformerCPP/webrtc/common_audio/third_party/spl_sqrt_floor/spl_sqrt_floor.h new file mode 100644 index 00000000..eaa58e30 --- /dev/null +++ b/runtime/engine/asr/server/brpc/paraformerCPP/webrtc/common_audio/third_party/spl_sqrt_floor/spl_sqrt_floor.h @@ -0,0 +1,29 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include + +// +// WebRtcSpl_SqrtFloor(...) +// +// Returns the square root of the input value |value|. The precision of this +// function is rounding down integer precision, i.e., sqrt(8) gives 2 as answer. +// If |value| is a negative number then 0 is returned. +// +// Algorithm: +// +// An iterative 4 cylce/bit routine +// +// Input: +// - value : Value to calculate sqrt of +// +// Return value : Result of the sqrt calculation +// +int32_t WebRtcSpl_SqrtFloor(int32_t value); diff --git a/runtime/engine/asr/server/brpc/paraformerCPP/webrtc/common_audio/vad/include/webrtc_vad.h b/runtime/engine/asr/server/brpc/paraformerCPP/webrtc/common_audio/vad/include/webrtc_vad.h new file mode 100644 index 00000000..f5bbadf5 --- /dev/null +++ b/runtime/engine/asr/server/brpc/paraformerCPP/webrtc/common_audio/vad/include/webrtc_vad.h @@ -0,0 +1,87 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/* + * This header file includes the VAD API calls. Specific function calls are + * given below. + */ + +#ifndef COMMON_AUDIO_VAD_INCLUDE_WEBRTC_VAD_H_ // NOLINT +#define COMMON_AUDIO_VAD_INCLUDE_WEBRTC_VAD_H_ + +#include +#include + +typedef struct WebRtcVadInst VadInst; + +#ifdef __cplusplus +extern "C" { +#endif + +// Creates an instance to the VAD structure. +VadInst* WebRtcVad_Create(void); + +// Frees the dynamic memory of a specified VAD instance. +// +// - handle [i] : Pointer to VAD instance that should be freed. +void WebRtcVad_Free(VadInst* handle); + +// Initializes a VAD instance. +// +// - handle [i/o] : Instance that should be initialized. +// +// returns : 0 - (OK), +// -1 - (null pointer or Default mode could not be set). +int WebRtcVad_Init(VadInst* handle); + +// Sets the VAD operating mode. A more aggressive (higher mode) VAD is more +// restrictive in reporting speech. Put in other words the probability of being +// speech when the VAD returns 1 is increased with increasing mode. As a +// consequence also the missed detection rate goes up. +// +// - handle [i/o] : VAD instance. +// - mode [i] : Aggressiveness mode (0, 1, 2, or 3). +// +// returns : 0 - (OK), +// -1 - (null pointer, mode could not be set or the VAD instance +// has not been initialized). +int WebRtcVad_set_mode(VadInst* handle, int mode); + +// Calculates a VAD decision for the |audio_frame|. For valid sampling rates +// frame lengths, see the description of WebRtcVad_ValidRatesAndFrameLengths(). +// +// - handle [i/o] : VAD Instance. Needs to be initialized by +// WebRtcVad_Init() before call. +// - fs [i] : Sampling frequency (Hz): 8000, 16000, or 32000 +// - audio_frame [i] : Audio frame buffer. +// - frame_length [i] : Length of audio frame buffer in number of samples. +// +// returns : 1 - (Active Voice), +// 0 - (Non-active Voice), +// -1 - (Error) +int WebRtcVad_Process(VadInst* handle, + int fs, + const int16_t* audio_frame, + size_t frame_length); + +// Checks for valid combinations of |rate| and |frame_length|. We support 10, +// 20 and 30 ms frames and the rates 8000, 16000 and 32000 Hz. +// +// - rate [i] : Sampling frequency (Hz). +// - frame_length [i] : Speech frame buffer length in number of samples. +// +// returns : 0 - (valid combination), -1 - (invalid combination) +int WebRtcVad_ValidRateAndFrameLength(int rate, size_t frame_length); + +#ifdef __cplusplus +} +#endif + +#endif // COMMON_AUDIO_VAD_INCLUDE_WEBRTC_VAD_H_ // NOLINT diff --git a/runtime/engine/asr/server/brpc/paraformerCPP/webrtc/common_audio/vad/vad_core.c b/runtime/engine/asr/server/brpc/paraformerCPP/webrtc/common_audio/vad/vad_core.c new file mode 100644 index 00000000..eb336f9b --- /dev/null +++ b/runtime/engine/asr/server/brpc/paraformerCPP/webrtc/common_audio/vad/vad_core.c @@ -0,0 +1,685 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "webrtc/common_audio/vad/vad_core.h" + +#include "webrtc/rtc_base/sanitizer.h" +#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h" +#include "webrtc/common_audio/vad/vad_filterbank.h" +#include "webrtc/common_audio/vad/vad_gmm.h" +#include "webrtc/common_audio/vad/vad_sp.h" + +// Spectrum Weighting +static const int16_t kSpectrumWeight[kNumChannels] = { 6, 8, 10, 12, 14, 16 }; +static const int16_t kNoiseUpdateConst = 655; // Q15 +static const int16_t kSpeechUpdateConst = 6554; // Q15 +static const int16_t kBackEta = 154; // Q8 +// Minimum difference between the two models, Q5 +static const int16_t kMinimumDifference[kNumChannels] = { + 544, 544, 576, 576, 576, 576 }; +// Upper limit of mean value for speech model, Q7 +static const int16_t kMaximumSpeech[kNumChannels] = { + 11392, 11392, 11520, 11520, 11520, 11520 }; +// Minimum value for mean value +static const int16_t kMinimumMean[kNumGaussians] = { 640, 768 }; +// Upper limit of mean value for noise model, Q7 +static const int16_t kMaximumNoise[kNumChannels] = { + 9216, 9088, 8960, 8832, 8704, 8576 }; +// Start values for the Gaussian models, Q7 +// Weights for the two Gaussians for the six channels (noise) +static const int16_t kNoiseDataWeights[kTableSize] = { + 34, 62, 72, 66, 53, 25, 94, 66, 56, 62, 75, 103 }; +// Weights for the two Gaussians for the six channels (speech) +static const int16_t kSpeechDataWeights[kTableSize] = { + 48, 82, 45, 87, 50, 47, 80, 46, 83, 41, 78, 81 }; +// Means for the two Gaussians for the six channels (noise) +static const int16_t kNoiseDataMeans[kTableSize] = { + 6738, 4892, 7065, 6715, 6771, 3369, 7646, 3863, 7820, 7266, 5020, 4362 }; +// Means for the two Gaussians for the six channels (speech) +static const int16_t kSpeechDataMeans[kTableSize] = { + 8306, 10085, 10078, 11823, 11843, 6309, 9473, 9571, 10879, 7581, 8180, 7483 +}; +// Stds for the two Gaussians for the six channels (noise) +static const int16_t kNoiseDataStds[kTableSize] = { + 378, 1064, 493, 582, 688, 593, 474, 697, 475, 688, 421, 455 }; +// Stds for the two Gaussians for the six channels (speech) +static const int16_t kSpeechDataStds[kTableSize] = { + 555, 505, 567, 524, 585, 1231, 509, 828, 492, 1540, 1079, 850 }; + +// Constants used in GmmProbability(). +// +// Maximum number of counted speech (VAD = 1) frames in a row. +static const int16_t kMaxSpeechFrames = 6; +// Minimum standard deviation for both speech and noise. +static const int16_t kMinStd = 384; + +// Constants in WebRtcVad_InitCore(). +// Default aggressiveness mode. +static const short kDefaultMode = 0; +static const int kInitCheck = 42; + +// Constants used in WebRtcVad_set_mode_core(). +// +// Thresholds for different frame lengths (10 ms, 20 ms and 30 ms). +// +// Mode 0, Quality. +static const int16_t kOverHangMax1Q[3] = { 8, 4, 3 }; +static const int16_t kOverHangMax2Q[3] = { 14, 7, 5 }; +static const int16_t kLocalThresholdQ[3] = { 24, 21, 24 }; +static const int16_t kGlobalThresholdQ[3] = { 57, 48, 57 }; +// Mode 1, Low bitrate. +static const int16_t kOverHangMax1LBR[3] = { 8, 4, 3 }; +static const int16_t kOverHangMax2LBR[3] = { 14, 7, 5 }; +static const int16_t kLocalThresholdLBR[3] = { 37, 32, 37 }; +static const int16_t kGlobalThresholdLBR[3] = { 100, 80, 100 }; +// Mode 2, Aggressive. +static const int16_t kOverHangMax1AGG[3] = { 6, 3, 2 }; +static const int16_t kOverHangMax2AGG[3] = { 9, 5, 3 }; +static const int16_t kLocalThresholdAGG[3] = { 82, 78, 82 }; +static const int16_t kGlobalThresholdAGG[3] = { 285, 260, 285 }; +// Mode 3, Very aggressive. +static const int16_t kOverHangMax1VAG[3] = { 6, 3, 2 }; +static const int16_t kOverHangMax2VAG[3] = { 9, 5, 3 }; +static const int16_t kLocalThresholdVAG[3] = { 94, 94, 94 }; +static const int16_t kGlobalThresholdVAG[3] = { 1100, 1050, 1100 }; + +// Calculates the weighted average w.r.t. number of Gaussians. The |data| are +// updated with an |offset| before averaging. +// +// - data [i/o] : Data to average. +// - offset [i] : An offset added to |data|. +// - weights [i] : Weights used for averaging. +// +// returns : The weighted average. +static int32_t WeightedAverage(int16_t* data, int16_t offset, + const int16_t* weights) { + int k; + int32_t weighted_average = 0; + + for (k = 0; k < kNumGaussians; k++) { + data[k * kNumChannels] += offset; + weighted_average += data[k * kNumChannels] * weights[k * kNumChannels]; + } + return weighted_average; +} + +// An s16 x s32 -> s32 multiplication that's allowed to overflow. (It's still +// undefined behavior, so not a good idea; this just makes UBSan ignore the +// violation, so that our old code can continue to do what it's always been +// doing.) +static inline int32_t RTC_NO_SANITIZE("signed-integer-overflow") + OverflowingMulS16ByS32ToS32(int16_t a, int32_t b) { + return a * b; +} + +// Calculates the probabilities for both speech and background noise using +// Gaussian Mixture Models (GMM). A hypothesis-test is performed to decide which +// type of signal is most probable. +// +// - self [i/o] : Pointer to VAD instance +// - features [i] : Feature vector of length |kNumChannels| +// = log10(energy in frequency band) +// - total_power [i] : Total power in audio frame. +// - frame_length [i] : Number of input samples +// +// - returns : the VAD decision (0 - noise, 1 - speech). +static int16_t GmmProbability(VadInstT* self, int16_t* features, + int16_t total_power, size_t frame_length) { + int channel, k; + int16_t feature_minimum; + int16_t h0, h1; + int16_t log_likelihood_ratio; + int16_t vadflag = 0; + int16_t shifts_h0, shifts_h1; + int16_t tmp_s16, tmp1_s16, tmp2_s16; + int16_t diff; + int gaussian; + int16_t nmk, nmk2, nmk3, smk, smk2, nsk, ssk; + int16_t delt, ndelt; + int16_t maxspe, maxmu; + int16_t deltaN[kTableSize], deltaS[kTableSize]; + int16_t ngprvec[kTableSize] = { 0 }; // Conditional probability = 0. + int16_t sgprvec[kTableSize] = { 0 }; // Conditional probability = 0. + int32_t h0_test, h1_test; + int32_t tmp1_s32, tmp2_s32; + int32_t sum_log_likelihood_ratios = 0; + int32_t noise_global_mean, speech_global_mean; + int32_t noise_probability[kNumGaussians], speech_probability[kNumGaussians]; + int16_t overhead1, overhead2, individualTest, totalTest; + + // Set various thresholds based on frame lengths (80, 160 or 240 samples). + if (frame_length == 80) { + overhead1 = self->over_hang_max_1[0]; + overhead2 = self->over_hang_max_2[0]; + individualTest = self->individual[0]; + totalTest = self->total[0]; + } else if (frame_length == 160) { + overhead1 = self->over_hang_max_1[1]; + overhead2 = self->over_hang_max_2[1]; + individualTest = self->individual[1]; + totalTest = self->total[1]; + } else { + overhead1 = self->over_hang_max_1[2]; + overhead2 = self->over_hang_max_2[2]; + individualTest = self->individual[2]; + totalTest = self->total[2]; + } + + if (total_power > kMinEnergy) { + // The signal power of current frame is large enough for processing. The + // processing consists of two parts: + // 1) Calculating the likelihood of speech and thereby a VAD decision. + // 2) Updating the underlying model, w.r.t., the decision made. + + // The detection scheme is an LRT with hypothesis + // H0: Noise + // H1: Speech + // + // We combine a global LRT with local tests, for each frequency sub-band, + // here defined as |channel|. + for (channel = 0; channel < kNumChannels; channel++) { + // For each channel we model the probability with a GMM consisting of + // |kNumGaussians|, with different means and standard deviations depending + // on H0 or H1. + h0_test = 0; + h1_test = 0; + for (k = 0; k < kNumGaussians; k++) { + gaussian = channel + k * kNumChannels; + // Probability under H0, that is, probability of frame being noise. + // Value given in Q27 = Q7 * Q20. + tmp1_s32 = WebRtcVad_GaussianProbability(features[channel], + self->noise_means[gaussian], + self->noise_stds[gaussian], + &deltaN[gaussian]); + noise_probability[k] = kNoiseDataWeights[gaussian] * tmp1_s32; + h0_test += noise_probability[k]; // Q27 + + // Probability under H1, that is, probability of frame being speech. + // Value given in Q27 = Q7 * Q20. + tmp1_s32 = WebRtcVad_GaussianProbability(features[channel], + self->speech_means[gaussian], + self->speech_stds[gaussian], + &deltaS[gaussian]); + speech_probability[k] = kSpeechDataWeights[gaussian] * tmp1_s32; + h1_test += speech_probability[k]; // Q27 + } + + // Calculate the log likelihood ratio: log2(Pr{X|H1} / Pr{X|H1}). + // Approximation: + // log2(Pr{X|H1} / Pr{X|H1}) = log2(Pr{X|H1}*2^Q) - log2(Pr{X|H1}*2^Q) + // = log2(h1_test) - log2(h0_test) + // = log2(2^(31-shifts_h1)*(1+b1)) + // - log2(2^(31-shifts_h0)*(1+b0)) + // = shifts_h0 - shifts_h1 + // + log2(1+b1) - log2(1+b0) + // ~= shifts_h0 - shifts_h1 + // + // Note that b0 and b1 are values less than 1, hence, 0 <= log2(1+b0) < 1. + // Further, b0 and b1 are independent and on the average the two terms + // cancel. + shifts_h0 = WebRtcSpl_NormW32(h0_test); + shifts_h1 = WebRtcSpl_NormW32(h1_test); + if (h0_test == 0) { + shifts_h0 = 31; + } + if (h1_test == 0) { + shifts_h1 = 31; + } + log_likelihood_ratio = shifts_h0 - shifts_h1; + + // Update |sum_log_likelihood_ratios| with spectrum weighting. This is + // used for the global VAD decision. + sum_log_likelihood_ratios += + (int32_t) (log_likelihood_ratio * kSpectrumWeight[channel]); + + // Local VAD decision. + if ((log_likelihood_ratio * 4) > individualTest) { + vadflag = 1; + } + + // TODO(bjornv): The conditional probabilities below are applied on the + // hard coded number of Gaussians set to two. Find a way to generalize. + // Calculate local noise probabilities used later when updating the GMM. + h0 = (int16_t) (h0_test >> 12); // Q15 + if (h0 > 0) { + // High probability of noise. Assign conditional probabilities for each + // Gaussian in the GMM. + tmp1_s32 = (noise_probability[0] & 0xFFFFF000) << 2; // Q29 + ngprvec[channel] = (int16_t) WebRtcSpl_DivW32W16(tmp1_s32, h0); // Q14 + ngprvec[channel + kNumChannels] = 16384 - ngprvec[channel]; + } else { + // Low noise probability. Assign conditional probability 1 to the first + // Gaussian and 0 to the rest (which is already set at initialization). + ngprvec[channel] = 16384; + } + + // Calculate local speech probabilities used later when updating the GMM. + h1 = (int16_t) (h1_test >> 12); // Q15 + if (h1 > 0) { + // High probability of speech. Assign conditional probabilities for each + // Gaussian in the GMM. Otherwise use the initialized values, i.e., 0. + tmp1_s32 = (speech_probability[0] & 0xFFFFF000) << 2; // Q29 + sgprvec[channel] = (int16_t) WebRtcSpl_DivW32W16(tmp1_s32, h1); // Q14 + sgprvec[channel + kNumChannels] = 16384 - sgprvec[channel]; + } + } + + // Make a global VAD decision. + vadflag |= (sum_log_likelihood_ratios >= totalTest); + + // Update the model parameters. + maxspe = 12800; + for (channel = 0; channel < kNumChannels; channel++) { + + // Get minimum value in past which is used for long term correction in Q4. + feature_minimum = WebRtcVad_FindMinimum(self, features[channel], channel); + + // Compute the "global" mean, that is the sum of the two means weighted. + noise_global_mean = WeightedAverage(&self->noise_means[channel], 0, + &kNoiseDataWeights[channel]); + tmp1_s16 = (int16_t) (noise_global_mean >> 6); // Q8 + + for (k = 0; k < kNumGaussians; k++) { + gaussian = channel + k * kNumChannels; + + nmk = self->noise_means[gaussian]; + smk = self->speech_means[gaussian]; + nsk = self->noise_stds[gaussian]; + ssk = self->speech_stds[gaussian]; + + // Update noise mean vector if the frame consists of noise only. + nmk2 = nmk; + if (!vadflag) { + // deltaN = (x-mu)/sigma^2 + // ngprvec[k] = |noise_probability[k]| / + // (|noise_probability[0]| + |noise_probability[1]|) + + // (Q14 * Q11 >> 11) = Q14. + delt = (int16_t)((ngprvec[gaussian] * deltaN[gaussian]) >> 11); + // Q7 + (Q14 * Q15 >> 22) = Q7. + nmk2 = nmk + (int16_t)((delt * kNoiseUpdateConst) >> 22); + } + + // Long term correction of the noise mean. + // Q8 - Q8 = Q8. + ndelt = (feature_minimum << 4) - tmp1_s16; + // Q7 + (Q8 * Q8) >> 9 = Q7. + nmk3 = nmk2 + (int16_t)((ndelt * kBackEta) >> 9); + + // Control that the noise mean does not drift to much. + tmp_s16 = (int16_t) ((k + 5) << 7); + if (nmk3 < tmp_s16) { + nmk3 = tmp_s16; + } + tmp_s16 = (int16_t) ((72 + k - channel) << 7); + if (nmk3 > tmp_s16) { + nmk3 = tmp_s16; + } + self->noise_means[gaussian] = nmk3; + + if (vadflag) { + // Update speech mean vector: + // |deltaS| = (x-mu)/sigma^2 + // sgprvec[k] = |speech_probability[k]| / + // (|speech_probability[0]| + |speech_probability[1]|) + + // (Q14 * Q11) >> 11 = Q14. + delt = (int16_t)((sgprvec[gaussian] * deltaS[gaussian]) >> 11); + // Q14 * Q15 >> 21 = Q8. + tmp_s16 = (int16_t)((delt * kSpeechUpdateConst) >> 21); + // Q7 + (Q8 >> 1) = Q7. With rounding. + smk2 = smk + ((tmp_s16 + 1) >> 1); + + // Control that the speech mean does not drift to much. + maxmu = maxspe + 640; + if (smk2 < kMinimumMean[k]) { + smk2 = kMinimumMean[k]; + } + if (smk2 > maxmu) { + smk2 = maxmu; + } + self->speech_means[gaussian] = smk2; // Q7. + + // (Q7 >> 3) = Q4. With rounding. + tmp_s16 = ((smk + 4) >> 3); + + tmp_s16 = features[channel] - tmp_s16; // Q4 + // (Q11 * Q4 >> 3) = Q12. + tmp1_s32 = (deltaS[gaussian] * tmp_s16) >> 3; + tmp2_s32 = tmp1_s32 - 4096; + tmp_s16 = sgprvec[gaussian] >> 2; + // (Q14 >> 2) * Q12 = Q24. + tmp1_s32 = tmp_s16 * tmp2_s32; + + tmp2_s32 = tmp1_s32 >> 4; // Q20 + + // 0.1 * Q20 / Q7 = Q13. + if (tmp2_s32 > 0) { + tmp_s16 = (int16_t) WebRtcSpl_DivW32W16(tmp2_s32, ssk * 10); + } else { + tmp_s16 = (int16_t) WebRtcSpl_DivW32W16(-tmp2_s32, ssk * 10); + tmp_s16 = -tmp_s16; + } + // Divide by 4 giving an update factor of 0.025 (= 0.1 / 4). + // Note that division by 4 equals shift by 2, hence, + // (Q13 >> 8) = (Q13 >> 6) / 4 = Q7. + tmp_s16 += 128; // Rounding. + ssk += (tmp_s16 >> 8); + if (ssk < kMinStd) { + ssk = kMinStd; + } + self->speech_stds[gaussian] = ssk; + } else { + // Update GMM variance vectors. + // deltaN * (features[channel] - nmk) - 1 + // Q4 - (Q7 >> 3) = Q4. + tmp_s16 = features[channel] - (nmk >> 3); + // (Q11 * Q4 >> 3) = Q12. + tmp1_s32 = (deltaN[gaussian] * tmp_s16) >> 3; + tmp1_s32 -= 4096; + + // (Q14 >> 2) * Q12 = Q24. + tmp_s16 = (ngprvec[gaussian] + 2) >> 2; + tmp2_s32 = OverflowingMulS16ByS32ToS32(tmp_s16, tmp1_s32); + // Q20 * approx 0.001 (2^-10=0.0009766), hence, + // (Q24 >> 14) = (Q24 >> 4) / 2^10 = Q20. + tmp1_s32 = tmp2_s32 >> 14; + + // Q20 / Q7 = Q13. + if (tmp1_s32 > 0) { + tmp_s16 = (int16_t) WebRtcSpl_DivW32W16(tmp1_s32, nsk); + } else { + tmp_s16 = (int16_t) WebRtcSpl_DivW32W16(-tmp1_s32, nsk); + tmp_s16 = -tmp_s16; + } + tmp_s16 += 32; // Rounding + nsk += tmp_s16 >> 6; // Q13 >> 6 = Q7. + if (nsk < kMinStd) { + nsk = kMinStd; + } + self->noise_stds[gaussian] = nsk; + } + } + + // Separate models if they are too close. + // |noise_global_mean| in Q14 (= Q7 * Q7). + noise_global_mean = WeightedAverage(&self->noise_means[channel], 0, + &kNoiseDataWeights[channel]); + + // |speech_global_mean| in Q14 (= Q7 * Q7). + speech_global_mean = WeightedAverage(&self->speech_means[channel], 0, + &kSpeechDataWeights[channel]); + + // |diff| = "global" speech mean - "global" noise mean. + // (Q14 >> 9) - (Q14 >> 9) = Q5. + diff = (int16_t) (speech_global_mean >> 9) - + (int16_t) (noise_global_mean >> 9); + if (diff < kMinimumDifference[channel]) { + tmp_s16 = kMinimumDifference[channel] - diff; + + // |tmp1_s16| = ~0.8 * (kMinimumDifference - diff) in Q7. + // |tmp2_s16| = ~0.2 * (kMinimumDifference - diff) in Q7. + tmp1_s16 = (int16_t)((13 * tmp_s16) >> 2); + tmp2_s16 = (int16_t)((3 * tmp_s16) >> 2); + + // Move Gaussian means for speech model by |tmp1_s16| and update + // |speech_global_mean|. Note that |self->speech_means[channel]| is + // changed after the call. + speech_global_mean = WeightedAverage(&self->speech_means[channel], + tmp1_s16, + &kSpeechDataWeights[channel]); + + // Move Gaussian means for noise model by -|tmp2_s16| and update + // |noise_global_mean|. Note that |self->noise_means[channel]| is + // changed after the call. + noise_global_mean = WeightedAverage(&self->noise_means[channel], + -tmp2_s16, + &kNoiseDataWeights[channel]); + } + + // Control that the speech & noise means do not drift to much. + maxspe = kMaximumSpeech[channel]; + tmp2_s16 = (int16_t) (speech_global_mean >> 7); + if (tmp2_s16 > maxspe) { + // Upper limit of speech model. + tmp2_s16 -= maxspe; + + for (k = 0; k < kNumGaussians; k++) { + self->speech_means[channel + k * kNumChannels] -= tmp2_s16; + } + } + + tmp2_s16 = (int16_t) (noise_global_mean >> 7); + if (tmp2_s16 > kMaximumNoise[channel]) { + tmp2_s16 -= kMaximumNoise[channel]; + + for (k = 0; k < kNumGaussians; k++) { + self->noise_means[channel + k * kNumChannels] -= tmp2_s16; + } + } + } + self->frame_counter++; + } + + // Smooth with respect to transition hysteresis. + if (!vadflag) { + if (self->over_hang > 0) { + vadflag = 2 + self->over_hang; + self->over_hang--; + } + self->num_of_speech = 0; + } else { + self->num_of_speech++; + if (self->num_of_speech > kMaxSpeechFrames) { + self->num_of_speech = kMaxSpeechFrames; + self->over_hang = overhead2; + } else { + self->over_hang = overhead1; + } + } + return vadflag; +} + +// Initialize the VAD. Set aggressiveness mode to default value. +int WebRtcVad_InitCore(VadInstT* self) { + int i; + + if (self == NULL) { + return -1; + } + + // Initialization of general struct variables. + self->vad = 1; // Speech active (=1). + self->frame_counter = 0; + self->over_hang = 0; + self->num_of_speech = 0; + + // Initialization of downsampling filter state. + memset(self->downsampling_filter_states, 0, + sizeof(self->downsampling_filter_states)); + + // Initialization of 48 to 8 kHz downsampling. + WebRtcSpl_ResetResample48khzTo8khz(&self->state_48_to_8); + + // Read initial PDF parameters. + for (i = 0; i < kTableSize; i++) { + self->noise_means[i] = kNoiseDataMeans[i]; + self->speech_means[i] = kSpeechDataMeans[i]; + self->noise_stds[i] = kNoiseDataStds[i]; + self->speech_stds[i] = kSpeechDataStds[i]; + } + + // Initialize Index and Minimum value vectors. + for (i = 0; i < 16 * kNumChannels; i++) { + self->low_value_vector[i] = 10000; + self->index_vector[i] = 0; + } + + // Initialize splitting filter states. + memset(self->upper_state, 0, sizeof(self->upper_state)); + memset(self->lower_state, 0, sizeof(self->lower_state)); + + // Initialize high pass filter states. + memset(self->hp_filter_state, 0, sizeof(self->hp_filter_state)); + + // Initialize mean value memory, for WebRtcVad_FindMinimum(). + for (i = 0; i < kNumChannels; i++) { + self->mean_value[i] = 1600; + } + + // Set aggressiveness mode to default (=|kDefaultMode|). + if (WebRtcVad_set_mode_core(self, kDefaultMode) != 0) { + return -1; + } + + self->init_flag = kInitCheck; + + return 0; +} + +// Set aggressiveness mode +int WebRtcVad_set_mode_core(VadInstT* self, int mode) { + int return_value = 0; + + switch (mode) { + case 0: + // Quality mode. + memcpy(self->over_hang_max_1, kOverHangMax1Q, + sizeof(self->over_hang_max_1)); + memcpy(self->over_hang_max_2, kOverHangMax2Q, + sizeof(self->over_hang_max_2)); + memcpy(self->individual, kLocalThresholdQ, + sizeof(self->individual)); + memcpy(self->total, kGlobalThresholdQ, + sizeof(self->total)); + break; + case 1: + // Low bitrate mode. + memcpy(self->over_hang_max_1, kOverHangMax1LBR, + sizeof(self->over_hang_max_1)); + memcpy(self->over_hang_max_2, kOverHangMax2LBR, + sizeof(self->over_hang_max_2)); + memcpy(self->individual, kLocalThresholdLBR, + sizeof(self->individual)); + memcpy(self->total, kGlobalThresholdLBR, + sizeof(self->total)); + break; + case 2: + // Aggressive mode. + memcpy(self->over_hang_max_1, kOverHangMax1AGG, + sizeof(self->over_hang_max_1)); + memcpy(self->over_hang_max_2, kOverHangMax2AGG, + sizeof(self->over_hang_max_2)); + memcpy(self->individual, kLocalThresholdAGG, + sizeof(self->individual)); + memcpy(self->total, kGlobalThresholdAGG, + sizeof(self->total)); + break; + case 3: + // Very aggressive mode. + memcpy(self->over_hang_max_1, kOverHangMax1VAG, + sizeof(self->over_hang_max_1)); + memcpy(self->over_hang_max_2, kOverHangMax2VAG, + sizeof(self->over_hang_max_2)); + memcpy(self->individual, kLocalThresholdVAG, + sizeof(self->individual)); + memcpy(self->total, kGlobalThresholdVAG, + sizeof(self->total)); + break; + default: + return_value = -1; + break; + } + + return return_value; +} + +// Calculate VAD decision by first extracting feature values and then calculate +// probability for both speech and background noise. + +int WebRtcVad_CalcVad48khz(VadInstT* inst, const int16_t* speech_frame, + size_t frame_length) { + int vad; + size_t i; + int16_t speech_nb[240]; // 30 ms in 8 kHz. + // |tmp_mem| is a temporary memory used by resample function, length is + // frame length in 10 ms (480 samples) + 256 extra. + int32_t tmp_mem[480 + 256] = { 0 }; + const size_t kFrameLen10ms48khz = 480; + const size_t kFrameLen10ms8khz = 80; + size_t num_10ms_frames = frame_length / kFrameLen10ms48khz; + + for (i = 0; i < num_10ms_frames; i++) { + WebRtcSpl_Resample48khzTo8khz(speech_frame, + &speech_nb[i * kFrameLen10ms8khz], + &inst->state_48_to_8, + tmp_mem); + } + + // Do VAD on an 8 kHz signal + vad = WebRtcVad_CalcVad8khz(inst, speech_nb, frame_length / 6); + + return vad; +} + +int WebRtcVad_CalcVad32khz(VadInstT* inst, const int16_t* speech_frame, + size_t frame_length) +{ + size_t len; + int vad; + int16_t speechWB[480]; // Downsampled speech frame: 960 samples (30ms in SWB) + int16_t speechNB[240]; // Downsampled speech frame: 480 samples (30ms in WB) + + + // Downsample signal 32->16->8 before doing VAD + WebRtcVad_Downsampling(speech_frame, speechWB, &(inst->downsampling_filter_states[2]), + frame_length); + len = frame_length / 2; + + WebRtcVad_Downsampling(speechWB, speechNB, inst->downsampling_filter_states, len); + len /= 2; + + // Do VAD on an 8 kHz signal + vad = WebRtcVad_CalcVad8khz(inst, speechNB, len); + + return vad; +} + +int WebRtcVad_CalcVad16khz(VadInstT* inst, const int16_t* speech_frame, + size_t frame_length) +{ + size_t len; + int vad; + int16_t speechNB[240]; // Downsampled speech frame: 480 samples (30ms in WB) + + // Wideband: Downsample signal before doing VAD + WebRtcVad_Downsampling(speech_frame, speechNB, inst->downsampling_filter_states, + frame_length); + + len = frame_length / 2; + vad = WebRtcVad_CalcVad8khz(inst, speechNB, len); + + return vad; +} + +int WebRtcVad_CalcVad8khz(VadInstT* inst, const int16_t* speech_frame, + size_t frame_length) +{ + int16_t feature_vector[kNumChannels], total_power; + + // Get power in the bands + total_power = WebRtcVad_CalculateFeatures(inst, speech_frame, frame_length, + feature_vector); + + // Make a VAD + inst->vad = GmmProbability(inst, feature_vector, total_power, frame_length); + + return inst->vad; +} diff --git a/runtime/engine/asr/server/brpc/paraformerCPP/webrtc/common_audio/vad/vad_core.h b/runtime/engine/asr/server/brpc/paraformerCPP/webrtc/common_audio/vad/vad_core.h new file mode 100644 index 00000000..8f0cfc06 --- /dev/null +++ b/runtime/engine/asr/server/brpc/paraformerCPP/webrtc/common_audio/vad/vad_core.h @@ -0,0 +1,114 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/* + * This header file includes the descriptions of the core VAD calls. + */ + +#ifndef COMMON_AUDIO_VAD_VAD_CORE_H_ +#define COMMON_AUDIO_VAD_VAD_CORE_H_ + +#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h" + +enum { kNumChannels = 6 }; // Number of frequency bands (named channels). +enum { kNumGaussians = 2 }; // Number of Gaussians per channel in the GMM. +enum { kTableSize = kNumChannels * kNumGaussians }; +enum { kMinEnergy = 10 }; // Minimum energy required to trigger audio signal. + +typedef struct VadInstT_ { + int vad; + int32_t downsampling_filter_states[4]; + WebRtcSpl_State48khzTo8khz state_48_to_8; + int16_t noise_means[kTableSize]; + int16_t speech_means[kTableSize]; + int16_t noise_stds[kTableSize]; + int16_t speech_stds[kTableSize]; + // TODO(bjornv): Change to |frame_count|. + int32_t frame_counter; + int16_t over_hang; // Over Hang + int16_t num_of_speech; + // TODO(bjornv): Change to |age_vector|. + int16_t index_vector[16 * kNumChannels]; + int16_t low_value_vector[16 * kNumChannels]; + // TODO(bjornv): Change to |median|. + int16_t mean_value[kNumChannels]; + int16_t upper_state[5]; + int16_t lower_state[5]; + int16_t hp_filter_state[4]; + int16_t over_hang_max_1[3]; + int16_t over_hang_max_2[3]; + int16_t individual[3]; + int16_t total[3]; + + int init_flag; +} VadInstT; + +// Initializes the core VAD component. The default aggressiveness mode is +// controlled by |kDefaultMode| in vad_core.c. +// +// - self [i/o] : Instance that should be initialized +// +// returns : 0 (OK), -1 (null pointer in or if the default mode can't be +// set) +int WebRtcVad_InitCore(VadInstT* self); + +/**************************************************************************** + * WebRtcVad_set_mode_core(...) + * + * This function changes the VAD settings + * + * Input: + * - inst : VAD instance + * - mode : Aggressiveness degree + * 0 (High quality) - 3 (Highly aggressive) + * + * Output: + * - inst : Changed instance + * + * Return value : 0 - Ok + * -1 - Error + */ + +int WebRtcVad_set_mode_core(VadInstT* self, int mode); + +/**************************************************************************** + * WebRtcVad_CalcVad48khz(...) + * WebRtcVad_CalcVad32khz(...) + * WebRtcVad_CalcVad16khz(...) + * WebRtcVad_CalcVad8khz(...) + * + * Calculate probability for active speech and make VAD decision. + * + * Input: + * - inst : Instance that should be initialized + * - speech_frame : Input speech frame + * - frame_length : Number of input samples + * + * Output: + * - inst : Updated filter states etc. + * + * Return value : VAD decision + * 0 - No active speech + * 1-6 - Active speech + */ +int WebRtcVad_CalcVad48khz(VadInstT* inst, + const int16_t* speech_frame, + size_t frame_length); +int WebRtcVad_CalcVad32khz(VadInstT* inst, + const int16_t* speech_frame, + size_t frame_length); +int WebRtcVad_CalcVad16khz(VadInstT* inst, + const int16_t* speech_frame, + size_t frame_length); +int WebRtcVad_CalcVad8khz(VadInstT* inst, + const int16_t* speech_frame, + size_t frame_length); + +#endif // COMMON_AUDIO_VAD_VAD_CORE_H_ diff --git a/runtime/engine/asr/server/brpc/paraformerCPP/webrtc/common_audio/vad/vad_filterbank.c b/runtime/engine/asr/server/brpc/paraformerCPP/webrtc/common_audio/vad/vad_filterbank.c new file mode 100644 index 00000000..7d25e2ab --- /dev/null +++ b/runtime/engine/asr/server/brpc/paraformerCPP/webrtc/common_audio/vad/vad_filterbank.c @@ -0,0 +1,329 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "webrtc/common_audio/vad/vad_filterbank.h" + +#include "webrtc/rtc_base/checks.h" +#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h" + +// Constants used in LogOfEnergy(). +static const int16_t kLogConst = 24660; // 160*log10(2) in Q9. +static const int16_t kLogEnergyIntPart = 14336; // 14 in Q10 + +// Coefficients used by HighPassFilter, Q14. +static const int16_t kHpZeroCoefs[3] = { 6631, -13262, 6631 }; +static const int16_t kHpPoleCoefs[3] = { 16384, -7756, 5620 }; + +// Allpass filter coefficients, upper and lower, in Q15. +// Upper: 0.64, Lower: 0.17 +static const int16_t kAllPassCoefsQ15[2] = { 20972, 5571 }; + +// Adjustment for division with two in SplitFilter. +static const int16_t kOffsetVector[6] = { 368, 368, 272, 176, 176, 176 }; + +// High pass filtering, with a cut-off frequency at 80 Hz, if the |data_in| is +// sampled at 500 Hz. +// +// - data_in [i] : Input audio data sampled at 500 Hz. +// - data_length [i] : Length of input and output data. +// - filter_state [i/o] : State of the filter. +// - data_out [o] : Output audio data in the frequency interval +// 80 - 250 Hz. +static void HighPassFilter(const int16_t* data_in, size_t data_length, + int16_t* filter_state, int16_t* data_out) { + size_t i; + const int16_t* in_ptr = data_in; + int16_t* out_ptr = data_out; + int32_t tmp32 = 0; + + + // The sum of the absolute values of the impulse response: + // The zero/pole-filter has a max amplification of a single sample of: 1.4546 + // Impulse response: 0.4047 -0.6179 -0.0266 0.1993 0.1035 -0.0194 + // The all-zero section has a max amplification of a single sample of: 1.6189 + // Impulse response: 0.4047 -0.8094 0.4047 0 0 0 + // The all-pole section has a max amplification of a single sample of: 1.9931 + // Impulse response: 1.0000 0.4734 -0.1189 -0.2187 -0.0627 0.04532 + + for (i = 0; i < data_length; i++) { + // All-zero section (filter coefficients in Q14). + tmp32 = kHpZeroCoefs[0] * *in_ptr; + tmp32 += kHpZeroCoefs[1] * filter_state[0]; + tmp32 += kHpZeroCoefs[2] * filter_state[1]; + filter_state[1] = filter_state[0]; + filter_state[0] = *in_ptr++; + + // All-pole section (filter coefficients in Q14). + tmp32 -= kHpPoleCoefs[1] * filter_state[2]; + tmp32 -= kHpPoleCoefs[2] * filter_state[3]; + filter_state[3] = filter_state[2]; + filter_state[2] = (int16_t) (tmp32 >> 14); + *out_ptr++ = filter_state[2]; + } +} + +// All pass filtering of |data_in|, used before splitting the signal into two +// frequency bands (low pass vs high pass). +// Note that |data_in| and |data_out| can NOT correspond to the same address. +// +// - data_in [i] : Input audio signal given in Q0. +// - data_length [i] : Length of input and output data. +// - filter_coefficient [i] : Given in Q15. +// - filter_state [i/o] : State of the filter given in Q(-1). +// - data_out [o] : Output audio signal given in Q(-1). +static void AllPassFilter(const int16_t* data_in, size_t data_length, + int16_t filter_coefficient, int16_t* filter_state, + int16_t* data_out) { + // The filter can only cause overflow (in the w16 output variable) + // if more than 4 consecutive input numbers are of maximum value and + // has the the same sign as the impulse responses first taps. + // First 6 taps of the impulse response: + // 0.6399 0.5905 -0.3779 0.2418 -0.1547 0.0990 + + size_t i; + int16_t tmp16 = 0; + int32_t tmp32 = 0; + int32_t state32 = ((int32_t) (*filter_state) * (1 << 16)); // Q15 + + for (i = 0; i < data_length; i++) { + tmp32 = state32 + filter_coefficient * *data_in; + tmp16 = (int16_t) (tmp32 >> 16); // Q(-1) + *data_out++ = tmp16; + state32 = (*data_in * (1 << 14)) - filter_coefficient * tmp16; // Q14 + state32 *= 2; // Q15. + data_in += 2; + } + + *filter_state = (int16_t) (state32 >> 16); // Q(-1) +} + +// Splits |data_in| into |hp_data_out| and |lp_data_out| corresponding to +// an upper (high pass) part and a lower (low pass) part respectively. +// +// - data_in [i] : Input audio data to be split into two frequency bands. +// - data_length [i] : Length of |data_in|. +// - upper_state [i/o] : State of the upper filter, given in Q(-1). +// - lower_state [i/o] : State of the lower filter, given in Q(-1). +// - hp_data_out [o] : Output audio data of the upper half of the spectrum. +// The length is |data_length| / 2. +// - lp_data_out [o] : Output audio data of the lower half of the spectrum. +// The length is |data_length| / 2. +static void SplitFilter(const int16_t* data_in, size_t data_length, + int16_t* upper_state, int16_t* lower_state, + int16_t* hp_data_out, int16_t* lp_data_out) { + size_t i; + size_t half_length = data_length >> 1; // Downsampling by 2. + int16_t tmp_out; + + // All-pass filtering upper branch. + AllPassFilter(&data_in[0], half_length, kAllPassCoefsQ15[0], upper_state, + hp_data_out); + + // All-pass filtering lower branch. + AllPassFilter(&data_in[1], half_length, kAllPassCoefsQ15[1], lower_state, + lp_data_out); + + // Make LP and HP signals. + for (i = 0; i < half_length; i++) { + tmp_out = *hp_data_out; + *hp_data_out++ -= *lp_data_out; + *lp_data_out++ += tmp_out; + } +} + +// Calculates the energy of |data_in| in dB, and also updates an overall +// |total_energy| if necessary. +// +// - data_in [i] : Input audio data for energy calculation. +// - data_length [i] : Length of input data. +// - offset [i] : Offset value added to |log_energy|. +// - total_energy [i/o] : An external energy updated with the energy of +// |data_in|. +// NOTE: |total_energy| is only updated if +// |total_energy| <= |kMinEnergy|. +// - log_energy [o] : 10 * log10("energy of |data_in|") given in Q4. +static void LogOfEnergy(const int16_t* data_in, size_t data_length, + int16_t offset, int16_t* total_energy, + int16_t* log_energy) { + // |tot_rshifts| accumulates the number of right shifts performed on |energy|. + int tot_rshifts = 0; + // The |energy| will be normalized to 15 bits. We use unsigned integer because + // we eventually will mask out the fractional part. + uint32_t energy = 0; + + RTC_DCHECK(data_in); + RTC_DCHECK_GT(data_length, 0); + + energy = (uint32_t) WebRtcSpl_Energy((int16_t*) data_in, data_length, + &tot_rshifts); + + if (energy != 0) { + // By construction, normalizing to 15 bits is equivalent with 17 leading + // zeros of an unsigned 32 bit value. + int normalizing_rshifts = 17 - WebRtcSpl_NormU32(energy); + // In a 15 bit representation the leading bit is 2^14. log2(2^14) in Q10 is + // (14 << 10), which is what we initialize |log2_energy| with. For a more + // detailed derivations, see below. + int16_t log2_energy = kLogEnergyIntPart; + + tot_rshifts += normalizing_rshifts; + // Normalize |energy| to 15 bits. + // |tot_rshifts| is now the total number of right shifts performed on + // |energy| after normalization. This means that |energy| is in + // Q(-tot_rshifts). + if (normalizing_rshifts < 0) { + energy <<= -normalizing_rshifts; + } else { + energy >>= normalizing_rshifts; + } + + // Calculate the energy of |data_in| in dB, in Q4. + // + // 10 * log10("true energy") in Q4 = 2^4 * 10 * log10("true energy") = + // 160 * log10(|energy| * 2^|tot_rshifts|) = + // 160 * log10(2) * log2(|energy| * 2^|tot_rshifts|) = + // 160 * log10(2) * (log2(|energy|) + log2(2^|tot_rshifts|)) = + // (160 * log10(2)) * (log2(|energy|) + |tot_rshifts|) = + // |kLogConst| * (|log2_energy| + |tot_rshifts|) + // + // We know by construction that |energy| is normalized to 15 bits. Hence, + // |energy| = 2^14 + frac_Q15, where frac_Q15 is a fractional part in Q15. + // Further, we'd like |log2_energy| in Q10 + // log2(|energy|) in Q10 = 2^10 * log2(2^14 + frac_Q15) = + // 2^10 * log2(2^14 * (1 + frac_Q15 * 2^-14)) = + // 2^10 * (14 + log2(1 + frac_Q15 * 2^-14)) ~= + // (14 << 10) + 2^10 * (frac_Q15 * 2^-14) = + // (14 << 10) + (frac_Q15 * 2^-4) = (14 << 10) + (frac_Q15 >> 4) + // + // Note that frac_Q15 = (|energy| & 0x00003FFF) + + // Calculate and add the fractional part to |log2_energy|. + log2_energy += (int16_t) ((energy & 0x00003FFF) >> 4); + + // |kLogConst| is in Q9, |log2_energy| in Q10 and |tot_rshifts| in Q0. + // Note that we in our derivation above have accounted for an output in Q4. + *log_energy = (int16_t)(((kLogConst * log2_energy) >> 19) + + ((tot_rshifts * kLogConst) >> 9)); + + if (*log_energy < 0) { + *log_energy = 0; + } + } else { + *log_energy = offset; + return; + } + + *log_energy += offset; + + // Update the approximate |total_energy| with the energy of |data_in|, if + // |total_energy| has not exceeded |kMinEnergy|. |total_energy| is used as an + // energy indicator in WebRtcVad_GmmProbability() in vad_core.c. + if (*total_energy <= kMinEnergy) { + if (tot_rshifts >= 0) { + // We know by construction that the |energy| > |kMinEnergy| in Q0, so add + // an arbitrary value such that |total_energy| exceeds |kMinEnergy|. + *total_energy += kMinEnergy + 1; + } else { + // By construction |energy| is represented by 15 bits, hence any number of + // right shifted |energy| will fit in an int16_t. In addition, adding the + // value to |total_energy| is wrap around safe as long as + // |kMinEnergy| < 8192. + *total_energy += (int16_t) (energy >> -tot_rshifts); // Q0. + } + } +} + +int16_t WebRtcVad_CalculateFeatures(VadInstT* self, const int16_t* data_in, + size_t data_length, int16_t* features) { + int16_t total_energy = 0; + // We expect |data_length| to be 80, 160 or 240 samples, which corresponds to + // 10, 20 or 30 ms in 8 kHz. Therefore, the intermediate downsampled data will + // have at most 120 samples after the first split and at most 60 samples after + // the second split. + int16_t hp_120[120], lp_120[120]; + int16_t hp_60[60], lp_60[60]; + const size_t half_data_length = data_length >> 1; + size_t length = half_data_length; // |data_length| / 2, corresponds to + // bandwidth = 2000 Hz after downsampling. + + // Initialize variables for the first SplitFilter(). + int frequency_band = 0; + const int16_t* in_ptr = data_in; // [0 - 4000] Hz. + int16_t* hp_out_ptr = hp_120; // [2000 - 4000] Hz. + int16_t* lp_out_ptr = lp_120; // [0 - 2000] Hz. + + RTC_DCHECK_LE(data_length, 240); + RTC_DCHECK_LT(4, kNumChannels - 1); // Checking maximum |frequency_band|. + + // Split at 2000 Hz and downsample. + SplitFilter(in_ptr, data_length, &self->upper_state[frequency_band], + &self->lower_state[frequency_band], hp_out_ptr, lp_out_ptr); + + // For the upper band (2000 Hz - 4000 Hz) split at 3000 Hz and downsample. + frequency_band = 1; + in_ptr = hp_120; // [2000 - 4000] Hz. + hp_out_ptr = hp_60; // [3000 - 4000] Hz. + lp_out_ptr = lp_60; // [2000 - 3000] Hz. + SplitFilter(in_ptr, length, &self->upper_state[frequency_band], + &self->lower_state[frequency_band], hp_out_ptr, lp_out_ptr); + + // Energy in 3000 Hz - 4000 Hz. + length >>= 1; // |data_length| / 4 <=> bandwidth = 1000 Hz. + + LogOfEnergy(hp_60, length, kOffsetVector[5], &total_energy, &features[5]); + + // Energy in 2000 Hz - 3000 Hz. + LogOfEnergy(lp_60, length, kOffsetVector[4], &total_energy, &features[4]); + + // For the lower band (0 Hz - 2000 Hz) split at 1000 Hz and downsample. + frequency_band = 2; + in_ptr = lp_120; // [0 - 2000] Hz. + hp_out_ptr = hp_60; // [1000 - 2000] Hz. + lp_out_ptr = lp_60; // [0 - 1000] Hz. + length = half_data_length; // |data_length| / 2 <=> bandwidth = 2000 Hz. + SplitFilter(in_ptr, length, &self->upper_state[frequency_band], + &self->lower_state[frequency_band], hp_out_ptr, lp_out_ptr); + + // Energy in 1000 Hz - 2000 Hz. + length >>= 1; // |data_length| / 4 <=> bandwidth = 1000 Hz. + LogOfEnergy(hp_60, length, kOffsetVector[3], &total_energy, &features[3]); + + // For the lower band (0 Hz - 1000 Hz) split at 500 Hz and downsample. + frequency_band = 3; + in_ptr = lp_60; // [0 - 1000] Hz. + hp_out_ptr = hp_120; // [500 - 1000] Hz. + lp_out_ptr = lp_120; // [0 - 500] Hz. + SplitFilter(in_ptr, length, &self->upper_state[frequency_band], + &self->lower_state[frequency_band], hp_out_ptr, lp_out_ptr); + + // Energy in 500 Hz - 1000 Hz. + length >>= 1; // |data_length| / 8 <=> bandwidth = 500 Hz. + LogOfEnergy(hp_120, length, kOffsetVector[2], &total_energy, &features[2]); + + // For the lower band (0 Hz - 500 Hz) split at 250 Hz and downsample. + frequency_band = 4; + in_ptr = lp_120; // [0 - 500] Hz. + hp_out_ptr = hp_60; // [250 - 500] Hz. + lp_out_ptr = lp_60; // [0 - 250] Hz. + SplitFilter(in_ptr, length, &self->upper_state[frequency_band], + &self->lower_state[frequency_band], hp_out_ptr, lp_out_ptr); + + // Energy in 250 Hz - 500 Hz. + length >>= 1; // |data_length| / 16 <=> bandwidth = 250 Hz. + LogOfEnergy(hp_60, length, kOffsetVector[1], &total_energy, &features[1]); + + // Remove 0 Hz - 80 Hz, by high pass filtering the lower band. + HighPassFilter(lp_60, length, self->hp_filter_state, hp_120); + + // Energy in 80 Hz - 250 Hz. + LogOfEnergy(hp_120, length, kOffsetVector[0], &total_energy, &features[0]); + + return total_energy; +} diff --git a/runtime/engine/asr/server/brpc/paraformerCPP/webrtc/common_audio/vad/vad_filterbank.h b/runtime/engine/asr/server/brpc/paraformerCPP/webrtc/common_audio/vad/vad_filterbank.h new file mode 100644 index 00000000..b4005051 --- /dev/null +++ b/runtime/engine/asr/server/brpc/paraformerCPP/webrtc/common_audio/vad/vad_filterbank.h @@ -0,0 +1,45 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/* + * This file includes feature calculating functionality used in vad_core.c. + */ + +#ifndef COMMON_AUDIO_VAD_VAD_FILTERBANK_H_ +#define COMMON_AUDIO_VAD_VAD_FILTERBANK_H_ + +#include "webrtc/common_audio/vad/vad_core.h" + +// Takes |data_length| samples of |data_in| and calculates the logarithm of the +// energy of each of the |kNumChannels| = 6 frequency bands used by the VAD: +// 80 Hz - 250 Hz +// 250 Hz - 500 Hz +// 500 Hz - 1000 Hz +// 1000 Hz - 2000 Hz +// 2000 Hz - 3000 Hz +// 3000 Hz - 4000 Hz +// +// The values are given in Q4 and written to |features|. Further, an approximate +// overall energy is returned. The return value is used in +// WebRtcVad_GmmProbability() as a signal indicator, hence it is arbitrary above +// the threshold |kMinEnergy|. +// +// - self [i/o] : State information of the VAD. +// - data_in [i] : Input audio data, for feature extraction. +// - data_length [i] : Audio data size, in number of samples. +// - features [o] : 10 * log10(energy in each frequency band), Q4. +// - returns : Total energy of the signal (NOTE! This value is not +// exact. It is only used in a comparison.) +int16_t WebRtcVad_CalculateFeatures(VadInstT* self, + const int16_t* data_in, + size_t data_length, + int16_t* features); + +#endif // COMMON_AUDIO_VAD_VAD_FILTERBANK_H_ diff --git a/runtime/engine/asr/server/brpc/paraformerCPP/webrtc/common_audio/vad/vad_gmm.c b/runtime/engine/asr/server/brpc/paraformerCPP/webrtc/common_audio/vad/vad_gmm.c new file mode 100644 index 00000000..176270cc --- /dev/null +++ b/runtime/engine/asr/server/brpc/paraformerCPP/webrtc/common_audio/vad/vad_gmm.c @@ -0,0 +1,82 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "webrtc/common_audio/vad/vad_gmm.h" + +#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h" + +static const int32_t kCompVar = 22005; +static const int16_t kLog2Exp = 5909; // log2(exp(1)) in Q12. + +// For a normal distribution, the probability of |input| is calculated and +// returned (in Q20). The formula for normal distributed probability is +// +// 1 / s * exp(-(x - m)^2 / (2 * s^2)) +// +// where the parameters are given in the following Q domains: +// m = |mean| (Q7) +// s = |std| (Q7) +// x = |input| (Q4) +// in addition to the probability we output |delta| (in Q11) used when updating +// the noise/speech model. +int32_t WebRtcVad_GaussianProbability(int16_t input, + int16_t mean, + int16_t std, + int16_t* delta) { + int16_t tmp16, inv_std, inv_std2, exp_value = 0; + int32_t tmp32; + + // Calculate |inv_std| = 1 / s, in Q10. + // 131072 = 1 in Q17, and (|std| >> 1) is for rounding instead of truncation. + // Q-domain: Q17 / Q7 = Q10. + tmp32 = (int32_t) 131072 + (int32_t) (std >> 1); + inv_std = (int16_t) WebRtcSpl_DivW32W16(tmp32, std); + + // Calculate |inv_std2| = 1 / s^2, in Q14. + tmp16 = (inv_std >> 2); // Q10 -> Q8. + // Q-domain: (Q8 * Q8) >> 2 = Q14. + inv_std2 = (int16_t)((tmp16 * tmp16) >> 2); + // TODO(bjornv): Investigate if changing to + // inv_std2 = (int16_t)((inv_std * inv_std) >> 6); + // gives better accuracy. + + tmp16 = (input << 3); // Q4 -> Q7 + tmp16 = tmp16 - mean; // Q7 - Q7 = Q7 + + // To be used later, when updating noise/speech model. + // |delta| = (x - m) / s^2, in Q11. + // Q-domain: (Q14 * Q7) >> 10 = Q11. + *delta = (int16_t)((inv_std2 * tmp16) >> 10); + + // Calculate the exponent |tmp32| = (x - m)^2 / (2 * s^2), in Q10. Replacing + // division by two with one shift. + // Q-domain: (Q11 * Q7) >> 8 = Q10. + tmp32 = (*delta * tmp16) >> 9; + + // If the exponent is small enough to give a non-zero probability we calculate + // |exp_value| ~= exp(-(x - m)^2 / (2 * s^2)) + // ~= exp2(-log2(exp(1)) * |tmp32|). + if (tmp32 < kCompVar) { + // Calculate |tmp16| = log2(exp(1)) * |tmp32|, in Q10. + // Q-domain: (Q12 * Q10) >> 12 = Q10. + tmp16 = (int16_t)((kLog2Exp * tmp32) >> 12); + tmp16 = -tmp16; + exp_value = (0x0400 | (tmp16 & 0x03FF)); + tmp16 ^= 0xFFFF; + tmp16 >>= 10; + tmp16 += 1; + // Get |exp_value| = exp(-|tmp32|) in Q10. + exp_value >>= tmp16; + } + + // Calculate and return (1 / s) * exp(-(x - m)^2 / (2 * s^2)), in Q20. + // Q-domain: Q10 * Q10 = Q20. + return inv_std * exp_value; +} diff --git a/runtime/engine/asr/server/brpc/paraformerCPP/webrtc/common_audio/vad/vad_gmm.h b/runtime/engine/asr/server/brpc/paraformerCPP/webrtc/common_audio/vad/vad_gmm.h new file mode 100644 index 00000000..6b2d11ba --- /dev/null +++ b/runtime/engine/asr/server/brpc/paraformerCPP/webrtc/common_audio/vad/vad_gmm.h @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +// Gaussian probability calculations internally used in vad_core.c. + +#ifndef COMMON_AUDIO_VAD_VAD_GMM_H_ +#define COMMON_AUDIO_VAD_VAD_GMM_H_ + +#include + +// Calculates the probability for |input|, given that |input| comes from a +// normal distribution with mean and standard deviation (|mean|, |std|). +// +// Inputs: +// - input : input sample in Q4. +// - mean : mean input in the statistical model, Q7. +// - std : standard deviation, Q7. +// +// Output: +// +// - delta : input used when updating the model, Q11. +// |delta| = (|input| - |mean|) / |std|^2. +// +// Return: +// (probability for |input|) = +// 1 / |std| * exp(-(|input| - |mean|)^2 / (2 * |std|^2)); +int32_t WebRtcVad_GaussianProbability(int16_t input, + int16_t mean, + int16_t std, + int16_t* delta); + +#endif // COMMON_AUDIO_VAD_VAD_GMM_H_ diff --git a/runtime/engine/asr/server/brpc/paraformerCPP/webrtc/common_audio/vad/vad_sp.c b/runtime/engine/asr/server/brpc/paraformerCPP/webrtc/common_audio/vad/vad_sp.c new file mode 100644 index 00000000..97d5d6ce --- /dev/null +++ b/runtime/engine/asr/server/brpc/paraformerCPP/webrtc/common_audio/vad/vad_sp.c @@ -0,0 +1,176 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "webrtc/common_audio/vad/vad_sp.h" + +#include "webrtc/rtc_base/checks.h" +#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h" +#include "webrtc/common_audio/vad/vad_core.h" + +// Allpass filter coefficients, upper and lower, in Q13. +// Upper: 0.64, Lower: 0.17. +static const int16_t kAllPassCoefsQ13[2] = { 5243, 1392 }; // Q13. +static const int16_t kSmoothingDown = 6553; // 0.2 in Q15. +static const int16_t kSmoothingUp = 32439; // 0.99 in Q15. + +// TODO(bjornv): Move this function to vad_filterbank.c. +// Downsampling filter based on splitting filter and allpass functions. +void WebRtcVad_Downsampling(const int16_t* signal_in, + int16_t* signal_out, + int32_t* filter_state, + size_t in_length) { + int16_t tmp16_1 = 0, tmp16_2 = 0; + int32_t tmp32_1 = filter_state[0]; + int32_t tmp32_2 = filter_state[1]; + size_t n = 0; + // Downsampling by 2 gives half length. + size_t half_length = (in_length >> 1); + + // Filter coefficients in Q13, filter state in Q0. + for (n = 0; n < half_length; n++) { + // All-pass filtering upper branch. + tmp16_1 = (int16_t) ((tmp32_1 >> 1) + + ((kAllPassCoefsQ13[0] * *signal_in) >> 14)); + *signal_out = tmp16_1; + tmp32_1 = (int32_t)(*signal_in++) - ((kAllPassCoefsQ13[0] * tmp16_1) >> 12); + + // All-pass filtering lower branch. + tmp16_2 = (int16_t) ((tmp32_2 >> 1) + + ((kAllPassCoefsQ13[1] * *signal_in) >> 14)); + *signal_out++ += tmp16_2; + tmp32_2 = (int32_t)(*signal_in++) - ((kAllPassCoefsQ13[1] * tmp16_2) >> 12); + } + // Store the filter states. + filter_state[0] = tmp32_1; + filter_state[1] = tmp32_2; +} + +// Inserts |feature_value| into |low_value_vector|, if it is one of the 16 +// smallest values the last 100 frames. Then calculates and returns the median +// of the five smallest values. +int16_t WebRtcVad_FindMinimum(VadInstT* self, + int16_t feature_value, + int channel) { + int i = 0, j = 0; + int position = -1; + // Offset to beginning of the 16 minimum values in memory. + const int offset = (channel << 4); + int16_t current_median = 1600; + int16_t alpha = 0; + int32_t tmp32 = 0; + // Pointer to memory for the 16 minimum values and the age of each value of + // the |channel|. + int16_t* age = &self->index_vector[offset]; + int16_t* smallest_values = &self->low_value_vector[offset]; + + RTC_DCHECK_LT(channel, kNumChannels); + + // Each value in |smallest_values| is getting 1 loop older. Update |age|, and + // remove old values. + for (i = 0; i < 16; i++) { + if (age[i] != 100) { + age[i]++; + } else { + // Too old value. Remove from memory and shift larger values downwards. + for (j = i; j < 15; j++) { + smallest_values[j] = smallest_values[j + 1]; + age[j] = age[j + 1]; + } + age[15] = 101; + smallest_values[15] = 10000; + } + } + + // Check if |feature_value| is smaller than any of the values in + // |smallest_values|. If so, find the |position| where to insert the new value + // (|feature_value|). + if (feature_value < smallest_values[7]) { + if (feature_value < smallest_values[3]) { + if (feature_value < smallest_values[1]) { + if (feature_value < smallest_values[0]) { + position = 0; + } else { + position = 1; + } + } else if (feature_value < smallest_values[2]) { + position = 2; + } else { + position = 3; + } + } else if (feature_value < smallest_values[5]) { + if (feature_value < smallest_values[4]) { + position = 4; + } else { + position = 5; + } + } else if (feature_value < smallest_values[6]) { + position = 6; + } else { + position = 7; + } + } else if (feature_value < smallest_values[15]) { + if (feature_value < smallest_values[11]) { + if (feature_value < smallest_values[9]) { + if (feature_value < smallest_values[8]) { + position = 8; + } else { + position = 9; + } + } else if (feature_value < smallest_values[10]) { + position = 10; + } else { + position = 11; + } + } else if (feature_value < smallest_values[13]) { + if (feature_value < smallest_values[12]) { + position = 12; + } else { + position = 13; + } + } else if (feature_value < smallest_values[14]) { + position = 14; + } else { + position = 15; + } + } + + // If we have detected a new small value, insert it at the correct position + // and shift larger values up. + if (position > -1) { + for (i = 15; i > position; i--) { + smallest_values[i] = smallest_values[i - 1]; + age[i] = age[i - 1]; + } + smallest_values[position] = feature_value; + age[position] = 1; + } + + // Get |current_median|. + if (self->frame_counter > 2) { + current_median = smallest_values[2]; + } else if (self->frame_counter > 0) { + current_median = smallest_values[0]; + } + + // Smooth the median value. + if (self->frame_counter > 0) { + if (current_median < self->mean_value[channel]) { + alpha = kSmoothingDown; // 0.2 in Q15. + } else { + alpha = kSmoothingUp; // 0.99 in Q15. + } + } + tmp32 = (alpha + 1) * self->mean_value[channel]; + tmp32 += (WEBRTC_SPL_WORD16_MAX - alpha) * current_median; + tmp32 += 16384; + self->mean_value[channel] = (int16_t) (tmp32 >> 15); + + return self->mean_value[channel]; +} diff --git a/runtime/engine/asr/server/brpc/paraformerCPP/webrtc/common_audio/vad/vad_sp.h b/runtime/engine/asr/server/brpc/paraformerCPP/webrtc/common_audio/vad/vad_sp.h new file mode 100644 index 00000000..002fcd8d --- /dev/null +++ b/runtime/engine/asr/server/brpc/paraformerCPP/webrtc/common_audio/vad/vad_sp.h @@ -0,0 +1,54 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +// This file includes specific signal processing tools used in vad_core.c. + +#ifndef COMMON_AUDIO_VAD_VAD_SP_H_ +#define COMMON_AUDIO_VAD_VAD_SP_H_ + +#include "webrtc/common_audio/vad/vad_core.h" + +// Downsamples the signal by a factor 2, eg. 32->16 or 16->8. +// +// Inputs: +// - signal_in : Input signal. +// - in_length : Length of input signal in samples. +// +// Input & Output: +// - filter_state : Current filter states of the two all-pass filters. The +// |filter_state| is updated after all samples have been +// processed. +// +// Output: +// - signal_out : Downsampled signal (of length |in_length| / 2). +void WebRtcVad_Downsampling(const int16_t* signal_in, + int16_t* signal_out, + int32_t* filter_state, + size_t in_length); + +// Updates and returns the smoothed feature minimum. As minimum we use the +// median of the five smallest feature values in a 100 frames long window. +// As long as |handle->frame_counter| is zero, that is, we haven't received any +// "valid" data, FindMinimum() outputs the default value of 1600. +// +// Inputs: +// - feature_value : New feature value to update with. +// - channel : Channel number. +// +// Input & Output: +// - handle : State information of the VAD. +// +// Returns: +// : Smoothed minimum value for a moving window. +int16_t WebRtcVad_FindMinimum(VadInstT* handle, + int16_t feature_value, + int channel); + +#endif // COMMON_AUDIO_VAD_VAD_SP_H_ diff --git a/runtime/engine/asr/server/brpc/paraformerCPP/webrtc/common_audio/vad/webrtc_vad.c b/runtime/engine/asr/server/brpc/paraformerCPP/webrtc/common_audio/vad/webrtc_vad.c new file mode 100644 index 00000000..4315b099 --- /dev/null +++ b/runtime/engine/asr/server/brpc/paraformerCPP/webrtc/common_audio/vad/webrtc_vad.c @@ -0,0 +1,115 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "webrtc/common_audio/vad/include/webrtc_vad.h" + +#include +#include + +#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h" +#include "webrtc/common_audio/vad/vad_core.h" + +static const int kInitCheck = 42; +static const int kValidRates[] = { 8000, 16000, 32000, 48000 }; +static const size_t kRatesSize = sizeof(kValidRates) / sizeof(*kValidRates); +static const int kMaxFrameLengthMs = 30; + +VadInst* WebRtcVad_Create() { + VadInstT* self = (VadInstT*)malloc(sizeof(VadInstT)); + + WebRtcSpl_Init(); + self->init_flag = 0; + + return (VadInst*)self; +} + +void WebRtcVad_Free(VadInst* handle) { + free(handle); +} + +// TODO(bjornv): Move WebRtcVad_InitCore() code here. +int WebRtcVad_Init(VadInst* handle) { + // Initialize the core VAD component. + return WebRtcVad_InitCore((VadInstT*) handle); +} + +// TODO(bjornv): Move WebRtcVad_set_mode_core() code here. +int WebRtcVad_set_mode(VadInst* handle, int mode) { + VadInstT* self = (VadInstT*) handle; + + if (handle == NULL) { + return -1; + } + if (self->init_flag != kInitCheck) { + return -1; + } + + return WebRtcVad_set_mode_core(self, mode); +} + +int WebRtcVad_Process(VadInst* handle, int fs, const int16_t* audio_frame, + size_t frame_length) { + int vad = -1; + VadInstT* self = (VadInstT*) handle; + + if (handle == NULL) { + return -1; + } + + if (self->init_flag != kInitCheck) { + return -1; + } + if (audio_frame == NULL) { + return -1; + } + if (WebRtcVad_ValidRateAndFrameLength(fs, frame_length) != 0) { + return -1; + } + + if (fs == 48000) { + vad = WebRtcVad_CalcVad48khz(self, audio_frame, frame_length); + } else if (fs == 32000) { + vad = WebRtcVad_CalcVad32khz(self, audio_frame, frame_length); + } else if (fs == 16000) { + vad = WebRtcVad_CalcVad16khz(self, audio_frame, frame_length); + } else if (fs == 8000) { + vad = WebRtcVad_CalcVad8khz(self, audio_frame, frame_length); + } + + if (vad > 0) { + vad = 1; + } + return vad; +} + +int WebRtcVad_ValidRateAndFrameLength(int rate, size_t frame_length) { + int return_value = -1; + size_t i; + int valid_length_ms; + size_t valid_length; + + // We only allow 10, 20 or 30 ms frames. Loop through valid frame rates and + // see if we have a matching pair. + for (i = 0; i < kRatesSize; i++) { + if (kValidRates[i] == rate) { + for (valid_length_ms = 10; valid_length_ms <= kMaxFrameLengthMs; + valid_length_ms += 10) { + valid_length = (size_t)(kValidRates[i] / 1000 * valid_length_ms); + if (frame_length == valid_length) { + return_value = 0; + break; + } + } + break; + } + } + + return return_value; +} diff --git a/runtime/engine/asr/server/brpc/paraformerCPP/webrtc/rtc_base/checks.cc b/runtime/engine/asr/server/brpc/paraformerCPP/webrtc/rtc_base/checks.cc new file mode 100644 index 00000000..75c71294 --- /dev/null +++ b/runtime/engine/asr/server/brpc/paraformerCPP/webrtc/rtc_base/checks.cc @@ -0,0 +1,170 @@ +/* + * Copyright 2006 The WebRTC Project Authors. All rights reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +// Most of this was borrowed (with minor modifications) from V8's and Chromium's +// src/base/logging.cc. + +#include +#include +#include + +#if defined(WEBRTC_ANDROID) +#define RTC_LOG_TAG_ANDROID "rtc" +#include // NOLINT +#endif + +#if defined(WEBRTC_WIN) +#include +#endif + +#include +#define LAST_SYSTEM_ERROR (errno) + +// #if defined(WEBRTC_WIN) +// #define LAST_SYSTEM_ERROR (::GetLastError()) +// #elif defined(__native_client__) && __native_client__ +// #define LAST_SYSTEM_ERROR (0) +// #elif defined(WEBRTC_POSIX) +// #include +// #define LAST_SYSTEM_ERROR (errno) +// #endif // WEBRTC_WIN + +#include "webrtc/rtc_base/checks.h" + +namespace { +#if defined(__GNUC__) +__attribute__((__format__(__printf__, 2, 3))) +#endif + void AppendFormat(std::string* s, const char* fmt, ...) { + va_list args, copy; + va_start(args, fmt); + va_copy(copy, args); + const int predicted_length = std::vsnprintf(nullptr, 0, fmt, copy); + va_end(copy); + + if (predicted_length > 0) { + const size_t size = s->size(); + s->resize(size + predicted_length); + // Pass "+ 1" to vsnprintf to include space for the '\0'. + std::vsnprintf(&((*s)[size]), predicted_length + 1, fmt, args); + } + va_end(args); +} +} + +namespace rtc { +namespace webrtc_checks_impl { + +// Reads one argument from args, appends it to s and advances fmt. +// Returns true iff an argument was sucessfully parsed. +bool ParseArg(va_list* args, const CheckArgType** fmt, std::string* s) { + if (**fmt == CheckArgType::kEnd) + return false; + + switch (**fmt) { + case CheckArgType::kInt: + AppendFormat(s, "%d", va_arg(*args, int)); + break; + case CheckArgType::kLong: + AppendFormat(s, "%ld", va_arg(*args, long)); + break; + case CheckArgType::kLongLong: + AppendFormat(s, "%lld", va_arg(*args, long long)); + break; + case CheckArgType::kUInt: + AppendFormat(s, "%u", va_arg(*args, unsigned)); + break; + case CheckArgType::kULong: + AppendFormat(s, "%lu", va_arg(*args, unsigned long)); + break; + case CheckArgType::kULongLong: + AppendFormat(s, "%llu", va_arg(*args, unsigned long long)); + break; + case CheckArgType::kDouble: + AppendFormat(s, "%g", va_arg(*args, double)); + break; + case CheckArgType::kLongDouble: + AppendFormat(s, "%Lg", va_arg(*args, long double)); + break; + case CheckArgType::kCharP: + s->append(va_arg(*args, const char*)); + break; + case CheckArgType::kStdString: + s->append(*va_arg(*args, const std::string*)); + break; + case CheckArgType::kVoidP: + AppendFormat(s, "%p", va_arg(*args, const void*)); + break; + default: + s->append("[Invalid CheckArgType]"); + return false; + } + (*fmt)++; + return true; +} + +RTC_NORETURN void FatalLog(const char* file, + int line, + const char* message, + const CheckArgType* fmt, + ...) { + va_list args; + va_start(args, fmt); + + std::string s; + AppendFormat(&s, + "\n\n" + "#\n" + "# Fatal error in: %s, line %d\n" + "# last system error: %u\n" + "# Check failed: %s", + file, line, LAST_SYSTEM_ERROR, message); + + if (*fmt == CheckArgType::kCheckOp) { + // This log message was generated by RTC_CHECK_OP, so we have to complete + // the error message using the operands that have been passed as the first + // two arguments. + fmt++; + + std::string s1, s2; + if (ParseArg(&args, &fmt, &s1) && ParseArg(&args, &fmt, &s2)) + AppendFormat(&s, " (%s vs. %s)\n# ", s1.c_str(), s2.c_str()); + } else { + s.append("\n# "); + } + + // Append all the user-supplied arguments to the message. + while (ParseArg(&args, &fmt, &s)) + ; + + va_end(args); + + const char* output = s.c_str(); + +#if defined(WEBRTC_ANDROID) + __android_log_print(ANDROID_LOG_ERROR, RTC_LOG_TAG_ANDROID, "%s\n", output); +#endif + + fflush(stdout); + fprintf(stderr, "%s", output); + fflush(stderr); + abort(); +} + +} // namespace webrtc_checks_impl +} // namespace rtc + +// Function to call from the C version of the RTC_CHECK and RTC_DCHECK macros. +RTC_NORETURN void rtc_FatalMessage(const char* file, int line, + const char* msg) { + static constexpr rtc::webrtc_checks_impl::CheckArgType t[] = { + rtc::webrtc_checks_impl::CheckArgType::kEnd}; + FatalLog(file, line, msg, t); +} diff --git a/runtime/engine/asr/server/brpc/paraformerCPP/webrtc/rtc_base/checks.h b/runtime/engine/asr/server/brpc/paraformerCPP/webrtc/rtc_base/checks.h new file mode 100644 index 00000000..3dbef18f --- /dev/null +++ b/runtime/engine/asr/server/brpc/paraformerCPP/webrtc/rtc_base/checks.h @@ -0,0 +1,400 @@ +/* + * Copyright 2006 The WebRTC Project Authors. All rights reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef RTC_BASE_CHECKS_H_ +#define RTC_BASE_CHECKS_H_ + +// If you for some reson need to know if DCHECKs are on, test the value of +// RTC_DCHECK_IS_ON. (Test its value, not if it's defined; it'll always be +// defined, to either a true or a false value.) +#if !defined(NDEBUG) || defined(DCHECK_ALWAYS_ON) +#define RTC_DCHECK_IS_ON 1 +#else +#define RTC_DCHECK_IS_ON 0 +#endif + +// Annotate a function that will not return control flow to the caller. +#if defined(_MSC_VER) +#define RTC_NORETURN __declspec(noreturn) +#elif defined(__GNUC__) +#define RTC_NORETURN __attribute__ ((__noreturn__)) +#else +#define RTC_NORETURN +#endif + +#ifdef __cplusplus +extern "C" { +#endif +RTC_NORETURN void rtc_FatalMessage(const char* file, int line, const char* msg); +#ifdef __cplusplus +} // extern "C" +#endif + +#ifdef __cplusplus +// C++ version. + +#include + +#include "webrtc/rtc_base/numerics/safe_compare.h" +#include "webrtc/rtc_base/system/inline.h" + +// The macros here print a message to stderr and abort under various +// conditions. All will accept additional stream messages. For example: +// RTC_DCHECK_EQ(foo, bar) << "I'm printed when foo != bar."; +// +// - RTC_CHECK(x) is an assertion that x is always true, and that if it isn't, +// it's better to terminate the process than to continue. During development, +// the reason that it's better to terminate might simply be that the error +// handling code isn't in place yet; in production, the reason might be that +// the author of the code truly believes that x will always be true, but that +// she recognizes that if she is wrong, abrupt and unpleasant process +// termination is still better than carrying on with the assumption violated. +// +// RTC_CHECK always evaluates its argument, so it's OK for x to have side +// effects. +// +// - RTC_DCHECK(x) is the same as RTC_CHECK(x)---an assertion that x is always +// true---except that x will only be evaluated in debug builds; in production +// builds, x is simply assumed to be true. This is useful if evaluating x is +// expensive and the expected cost of failing to detect the violated +// assumption is acceptable. You should not handle cases where a production +// build fails to spot a violated condition, even those that would result in +// crashes. If the code needs to cope with the error, make it cope, but don't +// call RTC_DCHECK; if the condition really can't occur, but you'd sleep +// better at night knowing that the process will suicide instead of carrying +// on in case you were wrong, use RTC_CHECK instead of RTC_DCHECK. +// +// RTC_DCHECK only evaluates its argument in debug builds, so if x has visible +// side effects, you need to write e.g. +// bool w = x; RTC_DCHECK(w); +// +// - RTC_CHECK_EQ, _NE, _GT, ..., and RTC_DCHECK_EQ, _NE, _GT, ... are +// specialized variants of RTC_CHECK and RTC_DCHECK that print prettier +// messages if the condition doesn't hold. Prefer them to raw RTC_CHECK and +// RTC_DCHECK. +// +// - FATAL() aborts unconditionally. +// +// TODO(ajm): Ideally, checks.h would be combined with logging.h, but +// consolidation with system_wrappers/logging.h should happen first. + +namespace rtc { +namespace webrtc_checks_impl { +enum class CheckArgType : int8_t { + kEnd = 0, + kInt, + kLong, + kLongLong, + kUInt, + kULong, + kULongLong, + kDouble, + kLongDouble, + kCharP, + kStdString, + kVoidP, + + // kCheckOp doesn't represent an argument type. Instead, it is sent as the + // first argument from RTC_CHECK_OP to make FatalLog use the next two + // arguments to build the special CHECK_OP error message + // (the "a == b (1 vs. 2)" bit). + kCheckOp, +}; + +RTC_NORETURN void FatalLog(const char* file, + int line, + const char* message, + const CheckArgType* fmt, + ...); + +// Wrapper for log arguments. Only ever make values of this type with the +// MakeVal() functions. +template +struct Val { + static constexpr CheckArgType Type() { return N; } + T GetVal() const { return val; } + T val; +}; + +inline Val MakeVal(int x) { + return {x}; +} +inline Val MakeVal(long x) { + return {x}; +} +inline Val MakeVal(long long x) { + return {x}; +} +inline Val MakeVal(unsigned int x) { + return {x}; +} +inline Val MakeVal(unsigned long x) { + return {x}; +} +inline Val MakeVal( + unsigned long long x) { + return {x}; +} + +inline Val MakeVal(double x) { + return {x}; +} +inline Val MakeVal(long double x) { + return {x}; +} + +inline Val MakeVal(const char* x) { + return {x}; +} +inline Val MakeVal( + const std::string& x) { + return {&x}; +} + +inline Val MakeVal(const void* x) { + return {x}; +} + +// Ephemeral type that represents the result of the logging << operator. +template +class LogStreamer; + +// Base case: Before the first << argument. +template <> +class LogStreamer<> final { + public: + template < + typename U, + typename std::enable_if::value>::type* = nullptr> + RTC_FORCE_INLINE LogStreamer()))> operator<<( + U arg) const { + return LogStreamer()))>(MakeVal(arg), + this); + } + + template < + typename U, + typename std::enable_if::value>::type* = nullptr> + RTC_FORCE_INLINE LogStreamer()))> operator<<( + const U& arg) const { + return LogStreamer()))>(MakeVal(arg), + this); + } + + template + RTC_NORETURN RTC_FORCE_INLINE static void Call(const char* file, + const int line, + const char* message, + const Us&... args) { + static constexpr CheckArgType t[] = {Us::Type()..., CheckArgType::kEnd}; + FatalLog(file, line, message, t, args.GetVal()...); + } + + template + RTC_NORETURN RTC_FORCE_INLINE static void CallCheckOp(const char* file, + const int line, + const char* message, + const Us&... args) { + static constexpr CheckArgType t[] = {CheckArgType::kCheckOp, Us::Type()..., + CheckArgType::kEnd}; + FatalLog(file, line, message, t, args.GetVal()...); + } +}; + +// Inductive case: We've already seen at least one << argument. The most recent +// one had type `T`, and the earlier ones had types `Ts`. +template +class LogStreamer final { + public: + RTC_FORCE_INLINE LogStreamer(T arg, const LogStreamer* prior) + : arg_(arg), prior_(prior) {} + + template < + typename U, + typename std::enable_if::value>::type* = nullptr> + RTC_FORCE_INLINE LogStreamer())), T, Ts...> + operator<<(U arg) const { + return LogStreamer())), T, Ts...>( + MakeVal(arg), this); + } + + template < + typename U, + typename std::enable_if::value>::type* = nullptr> + RTC_FORCE_INLINE LogStreamer())), T, Ts...> + operator<<(const U& arg) const { + return LogStreamer())), T, Ts...>( + MakeVal(arg), this); + } + + template + RTC_NORETURN RTC_FORCE_INLINE void Call(const char* file, + const int line, + const char* message, + const Us&... args) const { + prior_->Call(file, line, message, arg_, args...); + } + + template + RTC_NORETURN RTC_FORCE_INLINE void CallCheckOp(const char* file, + const int line, + const char* message, + const Us&... args) const { + prior_->CallCheckOp(file, line, message, arg_, args...); + } + + private: + // The most recent argument. + T arg_; + + // Earlier arguments. + const LogStreamer* prior_; +}; + +template +class FatalLogCall final { + public: + FatalLogCall(const char* file, int line, const char* message) + : file_(file), line_(line), message_(message) {} + + // This can be any binary operator with precedence lower than <<. + template + RTC_NORETURN RTC_FORCE_INLINE void operator&( + const LogStreamer& streamer) { + isCheckOp ? streamer.CallCheckOp(file_, line_, message_) + : streamer.Call(file_, line_, message_); + } + + private: + const char* file_; + int line_; + const char* message_; +}; +} // namespace webrtc_checks_impl + +// The actual stream used isn't important. We reference |ignored| in the code +// but don't evaluate it; this is to avoid "unused variable" warnings (we do so +// in a particularly convoluted way with an extra ?: because that appears to be +// the simplest construct that keeps Visual Studio from complaining about +// condition being unused). +#define RTC_EAT_STREAM_PARAMETERS(ignored) \ + (true ? true : ((void)(ignored), true)) \ + ? static_cast(0) \ + : rtc::webrtc_checks_impl::FatalLogCall("", 0, "") & \ + rtc::webrtc_checks_impl::LogStreamer<>() + +// Call RTC_EAT_STREAM_PARAMETERS with an argument that fails to compile if +// values of the same types as |a| and |b| can't be compared with the given +// operation, and that would evaluate |a| and |b| if evaluated. +#define RTC_EAT_STREAM_PARAMETERS_OP(op, a, b) \ + RTC_EAT_STREAM_PARAMETERS(((void)rtc::Safe##op(a, b))) + +// RTC_CHECK dies with a fatal error if condition is not true. It is *not* +// controlled by NDEBUG or anything else, so the check will be executed +// regardless of compilation mode. +// +// We make sure RTC_CHECK et al. always evaluates |condition|, as +// doing RTC_CHECK(FunctionWithSideEffect()) is a common idiom. +#define RTC_CHECK(condition) \ + while (!(condition)) \ + rtc::webrtc_checks_impl::FatalLogCall(__FILE__, __LINE__, \ + #condition) & \ + rtc::webrtc_checks_impl::LogStreamer<>() + +// Helper macro for binary operators. +// Don't use this macro directly in your code, use RTC_CHECK_EQ et al below. +#define RTC_CHECK_OP(name, op, val1, val2) \ + while (!rtc::Safe##name((val1), (val2))) \ + rtc::webrtc_checks_impl::FatalLogCall(__FILE__, __LINE__, \ + #val1 " " #op " " #val2) & \ + rtc::webrtc_checks_impl::LogStreamer<>() << (val1) << (val2) + +#define RTC_CHECK_EQ(val1, val2) RTC_CHECK_OP(Eq, ==, val1, val2) +#define RTC_CHECK_NE(val1, val2) RTC_CHECK_OP(Ne, !=, val1, val2) +#define RTC_CHECK_LE(val1, val2) RTC_CHECK_OP(Le, <=, val1, val2) +#define RTC_CHECK_LT(val1, val2) RTC_CHECK_OP(Lt, <, val1, val2) +#define RTC_CHECK_GE(val1, val2) RTC_CHECK_OP(Ge, >=, val1, val2) +#define RTC_CHECK_GT(val1, val2) RTC_CHECK_OP(Gt, >, val1, val2) + +// The RTC_DCHECK macro is equivalent to RTC_CHECK except that it only generates +// code in debug builds. It does reference the condition parameter in all cases, +// though, so callers won't risk getting warnings about unused variables. +#if RTC_DCHECK_IS_ON +#define RTC_DCHECK(condition) RTC_CHECK(condition) +#define RTC_DCHECK_EQ(v1, v2) RTC_CHECK_EQ(v1, v2) +#define RTC_DCHECK_NE(v1, v2) RTC_CHECK_NE(v1, v2) +#define RTC_DCHECK_LE(v1, v2) RTC_CHECK_LE(v1, v2) +#define RTC_DCHECK_LT(v1, v2) RTC_CHECK_LT(v1, v2) +#define RTC_DCHECK_GE(v1, v2) RTC_CHECK_GE(v1, v2) +#define RTC_DCHECK_GT(v1, v2) RTC_CHECK_GT(v1, v2) +#else +#define RTC_DCHECK(condition) RTC_EAT_STREAM_PARAMETERS(condition) +#define RTC_DCHECK_EQ(v1, v2) RTC_EAT_STREAM_PARAMETERS_OP(Eq, v1, v2) +#define RTC_DCHECK_NE(v1, v2) RTC_EAT_STREAM_PARAMETERS_OP(Ne, v1, v2) +#define RTC_DCHECK_LE(v1, v2) RTC_EAT_STREAM_PARAMETERS_OP(Le, v1, v2) +#define RTC_DCHECK_LT(v1, v2) RTC_EAT_STREAM_PARAMETERS_OP(Lt, v1, v2) +#define RTC_DCHECK_GE(v1, v2) RTC_EAT_STREAM_PARAMETERS_OP(Ge, v1, v2) +#define RTC_DCHECK_GT(v1, v2) RTC_EAT_STREAM_PARAMETERS_OP(Gt, v1, v2) +#endif + +#define RTC_UNREACHABLE_CODE_HIT false +#define RTC_NOTREACHED() RTC_DCHECK(RTC_UNREACHABLE_CODE_HIT) + +// TODO(bugs.webrtc.org/8454): Add an RTC_ prefix or rename differently. +#define FATAL() \ + rtc::webrtc_checks_impl::FatalLogCall(__FILE__, __LINE__, \ + "FATAL()") & \ + rtc::webrtc_checks_impl::LogStreamer<>() + +// Performs the integer division a/b and returns the result. CHECKs that the +// remainder is zero. +template +inline T CheckedDivExact(T a, T b) { + RTC_CHECK_EQ(a % b, 0) << a << " is not evenly divisible by " << b; + return a / b; +} + +} // namespace rtc + +#else // __cplusplus not defined +// C version. Lacks many features compared to the C++ version, but usage +// guidelines are the same. + +#define RTC_CHECK(condition) \ + do { \ + if (!(condition)) { \ + rtc_FatalMessage(__FILE__, __LINE__, "CHECK failed: " #condition); \ + } \ + } while (0) + +#define RTC_CHECK_EQ(a, b) RTC_CHECK((a) == (b)) +#define RTC_CHECK_NE(a, b) RTC_CHECK((a) != (b)) +#define RTC_CHECK_LE(a, b) RTC_CHECK((a) <= (b)) +#define RTC_CHECK_LT(a, b) RTC_CHECK((a) < (b)) +#define RTC_CHECK_GE(a, b) RTC_CHECK((a) >= (b)) +#define RTC_CHECK_GT(a, b) RTC_CHECK((a) > (b)) + +#define RTC_DCHECK(condition) \ + do { \ + if (RTC_DCHECK_IS_ON && !(condition)) { \ + rtc_FatalMessage(__FILE__, __LINE__, "DCHECK failed: " #condition); \ + } \ + } while (0) + +#define RTC_DCHECK_EQ(a, b) RTC_DCHECK((a) == (b)) +#define RTC_DCHECK_NE(a, b) RTC_DCHECK((a) != (b)) +#define RTC_DCHECK_LE(a, b) RTC_DCHECK((a) <= (b)) +#define RTC_DCHECK_LT(a, b) RTC_DCHECK((a) < (b)) +#define RTC_DCHECK_GE(a, b) RTC_DCHECK((a) >= (b)) +#define RTC_DCHECK_GT(a, b) RTC_DCHECK((a) > (b)) + +#endif // __cplusplus + +#endif // RTC_BASE_CHECKS_H_ diff --git a/runtime/engine/asr/server/brpc/paraformerCPP/webrtc/rtc_base/compile_assert_c.h b/runtime/engine/asr/server/brpc/paraformerCPP/webrtc/rtc_base/compile_assert_c.h new file mode 100644 index 00000000..db2e4a81 --- /dev/null +++ b/runtime/engine/asr/server/brpc/paraformerCPP/webrtc/rtc_base/compile_assert_c.h @@ -0,0 +1,25 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef RTC_BASE_COMPILE_ASSERT_C_H_ +#define RTC_BASE_COMPILE_ASSERT_C_H_ + +// Use this macro to verify at compile time that certain restrictions are met. +// The argument is the boolean expression to evaluate. +// Example: +// RTC_COMPILE_ASSERT(sizeof(foo) < 128); +// Note: In C++, use static_assert instead! +#define RTC_COMPILE_ASSERT(expression) \ + switch (0) { \ + case 0: \ + case expression:; \ + } + +#endif // RTC_BASE_COMPILE_ASSERT_C_H_ diff --git a/runtime/engine/asr/server/brpc/paraformerCPP/webrtc/rtc_base/numerics/safe_compare.h b/runtime/engine/asr/server/brpc/paraformerCPP/webrtc/rtc_base/numerics/safe_compare.h new file mode 100644 index 00000000..d62eee11 --- /dev/null +++ b/runtime/engine/asr/server/brpc/paraformerCPP/webrtc/rtc_base/numerics/safe_compare.h @@ -0,0 +1,176 @@ +/* + * Copyright 2016 The WebRTC Project Authors. All rights reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +// This file defines six constexpr functions: +// +// rtc::SafeEq // == +// rtc::SafeNe // != +// rtc::SafeLt // < +// rtc::SafeLe // <= +// rtc::SafeGt // > +// rtc::SafeGe // >= +// +// They each accept two arguments of arbitrary types, and in almost all cases, +// they simply call the appropriate comparison operator. However, if both +// arguments are integers, they don't compare them using C++'s quirky rules, +// but instead adhere to the true mathematical definitions. It is as if the +// arguments were first converted to infinite-range signed integers, and then +// compared, although of course nothing expensive like that actually takes +// place. In practice, for signed/signed and unsigned/unsigned comparisons and +// some mixed-signed comparisons with a compile-time constant, the overhead is +// zero; in the remaining cases, it is just a few machine instructions (no +// branches). + +#ifndef RTC_BASE_NUMERICS_SAFE_COMPARE_H_ +#define RTC_BASE_NUMERICS_SAFE_COMPARE_H_ + +#include +#include + +#include +#include + +#include "webrtc/rtc_base/type_traits.h" + +namespace rtc { + +namespace safe_cmp_impl { + +template +struct LargerIntImpl : std::false_type {}; +template <> +struct LargerIntImpl : std::true_type { + using type = int16_t; +}; +template <> +struct LargerIntImpl : std::true_type { + using type = int32_t; +}; +template <> +struct LargerIntImpl : std::true_type { + using type = int64_t; +}; + +// LargerInt::value is true iff there's a signed type that's larger +// than T1 (and no larger than the larger of T2 and int*, for performance +// reasons); and if there is such a type, LargerInt::type is an alias +// for it. +template +struct LargerInt + : LargerIntImpl {}; + +template +constexpr typename std::make_unsigned::type MakeUnsigned(T a) { + return static_cast::type>(a); +} + +// Overload for when both T1 and T2 have the same signedness. +template ::value == + std::is_signed::value>::type* = nullptr> +constexpr bool Cmp(T1 a, T2 b) { + return Op::Op(a, b); +} + +// Overload for signed - unsigned comparison that can be promoted to a bigger +// signed type. +template ::value && + std::is_unsigned::value && + LargerInt::value>::type* = nullptr> +constexpr bool Cmp(T1 a, T2 b) { + return Op::Op(a, static_cast::type>(b)); +} + +// Overload for unsigned - signed comparison that can be promoted to a bigger +// signed type. +template ::value && + std::is_signed::value && + LargerInt::value>::type* = nullptr> +constexpr bool Cmp(T1 a, T2 b) { + return Op::Op(static_cast::type>(a), b); +} + +// Overload for signed - unsigned comparison that can't be promoted to a bigger +// signed type. +template ::value && + std::is_unsigned::value && + !LargerInt::value>::type* = nullptr> +constexpr bool Cmp(T1 a, T2 b) { + return a < 0 ? Op::Op(-1, 0) : Op::Op(safe_cmp_impl::MakeUnsigned(a), b); +} + +// Overload for unsigned - signed comparison that can't be promoted to a bigger +// signed type. +template ::value && + std::is_signed::value && + !LargerInt::value>::type* = nullptr> +constexpr bool Cmp(T1 a, T2 b) { + return b < 0 ? Op::Op(0, -1) : Op::Op(a, safe_cmp_impl::MakeUnsigned(b)); +} + +#define RTC_SAFECMP_MAKE_OP(name, op) \ + struct name { \ + template \ + static constexpr bool Op(T1 a, T2 b) { \ + return a op b; \ + } \ + }; +RTC_SAFECMP_MAKE_OP(EqOp, ==) +RTC_SAFECMP_MAKE_OP(NeOp, !=) +RTC_SAFECMP_MAKE_OP(LtOp, <) +RTC_SAFECMP_MAKE_OP(LeOp, <=) +RTC_SAFECMP_MAKE_OP(GtOp, >) +RTC_SAFECMP_MAKE_OP(GeOp, >=) +#undef RTC_SAFECMP_MAKE_OP + +} // namespace safe_cmp_impl + +#define RTC_SAFECMP_MAKE_FUN(name) \ + template \ + constexpr \ + typename std::enable_if::value && IsIntlike::value, \ + bool>::type Safe##name(T1 a, T2 b) { \ + /* Unary plus here turns enums into real integral types. */ \ + return safe_cmp_impl::Cmp(+a, +b); \ + } \ + template \ + constexpr \ + typename std::enable_if::value || !IsIntlike::value, \ + bool>::type Safe##name(const T1& a, \ + const T2& b) { \ + return safe_cmp_impl::name##Op::Op(a, b); \ + } +RTC_SAFECMP_MAKE_FUN(Eq) +RTC_SAFECMP_MAKE_FUN(Ne) +RTC_SAFECMP_MAKE_FUN(Lt) +RTC_SAFECMP_MAKE_FUN(Le) +RTC_SAFECMP_MAKE_FUN(Gt) +RTC_SAFECMP_MAKE_FUN(Ge) +#undef RTC_SAFECMP_MAKE_FUN + +} // namespace rtc + +#endif // RTC_BASE_NUMERICS_SAFE_COMPARE_H_ diff --git a/runtime/engine/asr/server/brpc/paraformerCPP/webrtc/rtc_base/sanitizer.h b/runtime/engine/asr/server/brpc/paraformerCPP/webrtc/rtc_base/sanitizer.h new file mode 100644 index 00000000..a9eccfcc --- /dev/null +++ b/runtime/engine/asr/server/brpc/paraformerCPP/webrtc/rtc_base/sanitizer.h @@ -0,0 +1,144 @@ +/* + * Copyright 2016 The WebRTC Project Authors. All rights reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef RTC_BASE_SANITIZER_H_ +#define RTC_BASE_SANITIZER_H_ + +#include // For size_t. + +#ifdef __cplusplus +#include +#endif + +#if defined(__has_feature) +#if __has_feature(address_sanitizer) +#define RTC_HAS_ASAN 1 +#endif +#if __has_feature(memory_sanitizer) +#define RTC_HAS_MSAN 1 +#endif +#endif +#ifndef RTC_HAS_ASAN +#define RTC_HAS_ASAN 0 +#endif +#ifndef RTC_HAS_MSAN +#define RTC_HAS_MSAN 0 +#endif + +#if RTC_HAS_ASAN +#include +#endif +#if RTC_HAS_MSAN +#include +#endif + +#ifdef __has_attribute +#if __has_attribute(no_sanitize) +#define RTC_NO_SANITIZE(what) __attribute__((no_sanitize(what))) +#endif +#endif +#ifndef RTC_NO_SANITIZE +#define RTC_NO_SANITIZE(what) +#endif + +// Ask ASan to mark the memory range [ptr, ptr + element_size * num_elements) +// as being unaddressable, so that reads and writes are not allowed. ASan may +// narrow the range to the nearest alignment boundaries. +static inline void rtc_AsanPoison(const volatile void* ptr, + size_t element_size, + size_t num_elements) { +#if RTC_HAS_ASAN + ASAN_POISON_MEMORY_REGION(ptr, element_size * num_elements); +#endif +} + +// Ask ASan to mark the memory range [ptr, ptr + element_size * num_elements) +// as being addressable, so that reads and writes are allowed. ASan may widen +// the range to the nearest alignment boundaries. +static inline void rtc_AsanUnpoison(const volatile void* ptr, + size_t element_size, + size_t num_elements) { +#if RTC_HAS_ASAN + ASAN_UNPOISON_MEMORY_REGION(ptr, element_size * num_elements); +#endif +} + +// Ask MSan to mark the memory range [ptr, ptr + element_size * num_elements) +// as being uninitialized. +static inline void rtc_MsanMarkUninitialized(const volatile void* ptr, + size_t element_size, + size_t num_elements) { +#if RTC_HAS_MSAN + __msan_poison(ptr, element_size * num_elements); +#endif +} + +// Force an MSan check (if any bits in the memory range [ptr, ptr + +// element_size * num_elements) are uninitialized the call will crash with an +// MSan report). +static inline void rtc_MsanCheckInitialized(const volatile void* ptr, + size_t element_size, + size_t num_elements) { +#if RTC_HAS_MSAN + __msan_check_mem_is_initialized(ptr, element_size * num_elements); +#endif +} + +#ifdef __cplusplus + +namespace rtc { +namespace sanitizer_impl { + +template +constexpr bool IsTriviallyCopyable() { + return static_cast(std::is_trivially_copy_constructible::value && + (std::is_trivially_copy_assignable::value || + !std::is_copy_assignable::value) && + std::is_trivially_destructible::value); +} + +} // namespace sanitizer_impl + +template +inline void AsanPoison(const T& mem) { + rtc_AsanPoison(mem.data(), sizeof(mem.data()[0]), mem.size()); +} + +template +inline void AsanUnpoison(const T& mem) { + rtc_AsanUnpoison(mem.data(), sizeof(mem.data()[0]), mem.size()); +} + +template +inline void MsanMarkUninitialized(const T& mem) { + rtc_MsanMarkUninitialized(mem.data(), sizeof(mem.data()[0]), mem.size()); +} + +template +inline T MsanUninitialized(T t) { +#if RTC_HAS_MSAN + // TODO(bugs.webrtc.org/8762): Switch to std::is_trivially_copyable when it + // becomes available in downstream projects. + static_assert(sanitizer_impl::IsTriviallyCopyable(), ""); +#endif + rtc_MsanMarkUninitialized(&t, sizeof(T), 1); + return t; +} + +template +inline void MsanCheckInitialized(const T& mem) { + rtc_MsanCheckInitialized(mem.data(), sizeof(mem.data()[0]), mem.size()); +} + +} // namespace rtc + +#endif // __cplusplus + +#endif // RTC_BASE_SANITIZER_H_ diff --git a/runtime/engine/asr/server/brpc/paraformerCPP/webrtc/rtc_base/system/arch.h b/runtime/engine/asr/server/brpc/paraformerCPP/webrtc/rtc_base/system/arch.h new file mode 100644 index 00000000..a2a11806 --- /dev/null +++ b/runtime/engine/asr/server/brpc/paraformerCPP/webrtc/rtc_base/system/arch.h @@ -0,0 +1,58 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +// This file contains platform-specific typedefs and defines. +// Much of it is derived from Chromium's build/build_config.h. + +#ifndef RTC_BASE_SYSTEM_ARCH_H_ +#define RTC_BASE_SYSTEM_ARCH_H_ + +// Processor architecture detection. For more info on what's defined, see: +// http://msdn.microsoft.com/en-us/library/b0084kay.aspx +// http://www.agner.org/optimize/calling_conventions.pdf +// or with gcc, run: "echo | gcc -E -dM -" +#if defined(_M_X64) || defined(__x86_64__) +#define WEBRTC_ARCH_X86_FAMILY +#define WEBRTC_ARCH_X86_64 +#define WEBRTC_ARCH_64_BITS +#define WEBRTC_ARCH_LITTLE_ENDIAN +#elif defined(__aarch64__) +#define WEBRTC_ARCH_ARM_FAMILY +#define WEBRTC_ARCH_64_BITS +#define WEBRTC_ARCH_LITTLE_ENDIAN +#elif defined(_M_IX86) || defined(__i386__) +#define WEBRTC_ARCH_X86_FAMILY +#define WEBRTC_ARCH_X86 +#define WEBRTC_ARCH_32_BITS +#define WEBRTC_ARCH_LITTLE_ENDIAN +#elif defined(__ARMEL__) +#define WEBRTC_ARCH_ARM_FAMILY +#define WEBRTC_ARCH_32_BITS +#define WEBRTC_ARCH_LITTLE_ENDIAN +#elif defined(__MIPSEL__) +#define WEBRTC_ARCH_MIPS_FAMILY +#if defined(__LP64__) +#define WEBRTC_ARCH_64_BITS +#else +#define WEBRTC_ARCH_32_BITS +#endif +#define WEBRTC_ARCH_LITTLE_ENDIAN +#elif defined(__pnacl__) +#define WEBRTC_ARCH_32_BITS +#define WEBRTC_ARCH_LITTLE_ENDIAN +#else +#error Please add support for your architecture in typedefs.h +#endif + +#if !(defined(WEBRTC_ARCH_LITTLE_ENDIAN) ^ defined(WEBRTC_ARCH_BIG_ENDIAN)) +#error Define either WEBRTC_ARCH_LITTLE_ENDIAN or WEBRTC_ARCH_BIG_ENDIAN +#endif + +#endif // RTC_BASE_SYSTEM_ARCH_H_ diff --git a/runtime/engine/asr/server/brpc/paraformerCPP/webrtc/rtc_base/system/inline.h b/runtime/engine/asr/server/brpc/paraformerCPP/webrtc/rtc_base/system/inline.h new file mode 100644 index 00000000..f585d34d --- /dev/null +++ b/runtime/engine/asr/server/brpc/paraformerCPP/webrtc/rtc_base/system/inline.h @@ -0,0 +1,31 @@ +/* + * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef RTC_BASE_SYSTEM_INLINE_H_ +#define RTC_BASE_SYSTEM_INLINE_H_ + +#if defined(_MSC_VER) + +#define RTC_FORCE_INLINE __forceinline +#define RTC_NO_INLINE __declspec(noinline) + +#elif defined(__GNUC__) + +#define RTC_FORCE_INLINE __attribute__((__always_inline__)) +#define RTC_NO_INLINE __attribute__((__noinline__)) + +#else + +#define RTC_FORCE_INLINE +#define RTC_NO_INLINE + +#endif + +#endif // RTC_BASE_SYSTEM_INLINE_H_ diff --git a/runtime/engine/asr/server/brpc/paraformerCPP/webrtc/rtc_base/type_traits.h b/runtime/engine/asr/server/brpc/paraformerCPP/webrtc/rtc_base/type_traits.h new file mode 100644 index 00000000..0cb899c4 --- /dev/null +++ b/runtime/engine/asr/server/brpc/paraformerCPP/webrtc/rtc_base/type_traits.h @@ -0,0 +1,140 @@ +/* + * Copyright 2016 The WebRTC Project Authors. All rights reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef RTC_BASE_TYPE_TRAITS_H_ +#define RTC_BASE_TYPE_TRAITS_H_ + +#include +#include + +namespace rtc { + +// Determines if the given class has zero-argument .data() and .size() methods +// whose return values are convertible to T* and size_t, respectively. +template +class HasDataAndSize { + private: + template < + typename C, + typename std::enable_if< + std::is_convertible().data()), T*>::value && + std::is_convertible().size()), + std::size_t>::value>::type* = nullptr> + static int Test(int); + + template + static char Test(...); + + public: + static constexpr bool value = std::is_same(0)), int>::value; +}; + +namespace test_has_data_and_size { + +template +struct Test1 { + DR data(); + SR size(); +}; +static_assert(HasDataAndSize, int>::value, ""); +static_assert(HasDataAndSize, const int>::value, ""); +static_assert(HasDataAndSize, const int>::value, ""); +static_assert(!HasDataAndSize, int>::value, + "implicit cast of const int* to int*"); +static_assert(!HasDataAndSize, int>::value, + "implicit cast of char* to int*"); + +struct Test2 { + int* data; + size_t size; +}; +static_assert(!HasDataAndSize::value, + ".data and .size aren't functions"); + +struct Test3 { + int* data(); +}; +static_assert(!HasDataAndSize::value, ".size() is missing"); + +class Test4 { + int* data(); + size_t size(); +}; +static_assert(!HasDataAndSize::value, + ".data() and .size() are private"); + +} // namespace test_has_data_and_size + +namespace type_traits_impl { + +// Determines if the given type is an enum that converts implicitly to +// an integral type. +template +struct IsIntEnum { + private: + // This overload is used if the type is an enum, and unary plus + // compiles and turns it into an integral type. + template ::value && + std::is_integral())>::value>::type* = + nullptr> + static int Test(int); + + // Otherwise, this overload is used. + template + static char Test(...); + + public: + static constexpr bool value = + std::is_same::type>(0)), + int>::value; +}; + +} // namespace type_traits_impl + +// Determines if the given type is integral, or an enum that +// converts implicitly to an integral type. +template +struct IsIntlike { + private: + using X = typename std::remove_reference::type; + + public: + static constexpr bool value = + std::is_integral::value || type_traits_impl::IsIntEnum::value; +}; + +namespace test_enum_intlike { + +enum E1 { e1 }; +enum { e2 }; +enum class E3 { e3 }; +struct S {}; + +static_assert(type_traits_impl::IsIntEnum::value, ""); +static_assert(type_traits_impl::IsIntEnum::value, ""); +static_assert(!type_traits_impl::IsIntEnum::value, ""); +static_assert(!type_traits_impl::IsIntEnum::value, ""); +static_assert(!type_traits_impl::IsIntEnum::value, ""); +static_assert(!type_traits_impl::IsIntEnum::value, ""); + +static_assert(IsIntlike::value, ""); +static_assert(IsIntlike::value, ""); +static_assert(!IsIntlike::value, ""); +static_assert(IsIntlike::value, ""); +static_assert(!IsIntlike::value, ""); +static_assert(!IsIntlike::value, ""); + +} // namespace test_enum_intlike + +} // namespace rtc + +#endif // RTC_BASE_TYPE_TRAITS_H_ diff --git a/runtime/engine/asr/server/brpc/paraformerCPP/webrtc/system_wrappers/include/cpu_features_wrapper.h b/runtime/engine/asr/server/brpc/paraformerCPP/webrtc/system_wrappers/include/cpu_features_wrapper.h new file mode 100644 index 00000000..739161af --- /dev/null +++ b/runtime/engine/asr/server/brpc/paraformerCPP/webrtc/system_wrappers/include/cpu_features_wrapper.h @@ -0,0 +1,48 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef SYSTEM_WRAPPERS_INCLUDE_CPU_FEATURES_WRAPPER_H_ +#define SYSTEM_WRAPPERS_INCLUDE_CPU_FEATURES_WRAPPER_H_ + +#include + +#if defined(__cplusplus) || defined(c_plusplus) +extern "C" { +#endif + +// List of features in x86. +typedef enum { kSSE2, kSSE3 } CPUFeature; + +// List of features in ARM. +enum { + kCPUFeatureARMv7 = (1 << 0), + kCPUFeatureVFPv3 = (1 << 1), + kCPUFeatureNEON = (1 << 2), + kCPUFeatureLDREXSTREX = (1 << 3) +}; + +typedef int (*WebRtc_CPUInfo)(CPUFeature feature); + +// Returns true if the CPU supports the feature. +extern WebRtc_CPUInfo WebRtc_GetCPUInfo; + +// No CPU feature is available => straight C path. +extern WebRtc_CPUInfo WebRtc_GetCPUInfoNoASM; + +// Return the features in an ARM device. +// It detects the features in the hardware platform, and returns supported +// values in the above enum definition as a bitmask. +extern uint64_t WebRtc_GetCPUFeaturesARM(void); + +#if defined(__cplusplus) || defined(c_plusplus) +} // extern "C" +#endif + +#endif // SYSTEM_WRAPPERS_INCLUDE_CPU_FEATURES_WRAPPER_H_ diff --git a/runtime/engine/asr/server/brpc/paraformerCPP/webrtc/typedefs.h b/runtime/engine/asr/server/brpc/paraformerCPP/webrtc/typedefs.h new file mode 100644 index 00000000..8286ecae --- /dev/null +++ b/runtime/engine/asr/server/brpc/paraformerCPP/webrtc/typedefs.h @@ -0,0 +1,149 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +// This file contains platform-specific typedefs and defines. +// Much of it is derived from Chromium's build/build_config.h. + +#ifndef WEBRTC_TYPEDEFS_H_ +#define WEBRTC_TYPEDEFS_H_ + +// Processor architecture detection. For more info on what's defined, see: +// http://msdn.microsoft.com/en-us/library/b0084kay.aspx +// http://www.agner.org/optimize/calling_conventions.pdf +// or with gcc, run: "echo | gcc -E -dM -" +#if defined(_M_X64) || defined(__x86_64__) +#define WEBRTC_ARCH_X86_FAMILY +#define WEBRTC_ARCH_X86_64 +#define WEBRTC_ARCH_64_BITS +#define WEBRTC_ARCH_LITTLE_ENDIAN +#elif defined(__aarch64__) +#define WEBRTC_ARCH_64_BITS +#define WEBRTC_ARCH_LITTLE_ENDIAN +#elif defined(_M_IX86) || defined(__i386__) +#define WEBRTC_ARCH_X86_FAMILY +#define WEBRTC_ARCH_X86 +#define WEBRTC_ARCH_32_BITS +#define WEBRTC_ARCH_LITTLE_ENDIAN +#elif defined(__ARMEL__) +// TODO(ajm): We'd prefer to control platform defines here, but this is +// currently provided by the Android makefiles. Commented to avoid duplicate +// definition warnings. +//#define WEBRTC_ARCH_ARM +// TODO(ajm): Chromium uses the following two defines. Should we switch? +//#define WEBRTC_ARCH_ARM_FAMILY +//#define WEBRTC_ARCH_ARMEL +#define WEBRTC_ARCH_32_BITS +#define WEBRTC_ARCH_LITTLE_ENDIAN +#elif defined(__MIPSEL__) +#define WEBRTC_ARCH_32_BITS +#define WEBRTC_ARCH_LITTLE_ENDIAN +#elif defined(__pnacl__) +#define WEBRTC_ARCH_32_BITS +#define WEBRTC_ARCH_LITTLE_ENDIAN +#elif defined(__PPC__) +#if defined(__PPC64__) +#define WEBRTC_ARCH_64_BITS +#else +#define WEBRTC_ARCH_32_BITS +#endif +#define WEBRTC_ARCH_BIG_ENDIAN +#else +#error Please add support for your architecture in typedefs.h +#endif + +#if !(defined(WEBRTC_ARCH_LITTLE_ENDIAN) ^ defined(WEBRTC_ARCH_BIG_ENDIAN)) +#error Define either WEBRTC_ARCH_LITTLE_ENDIAN or WEBRTC_ARCH_BIG_ENDIAN +#endif + +#if (defined(WEBRTC_ARCH_X86_FAMILY) && !defined(__SSE2__)) || \ + (defined(WEBRTC_ARCH_ARM_V7) && !defined(WEBRTC_ARCH_ARM_NEON)) +#define WEBRTC_CPU_DETECTION +#endif + +#if !defined(_MSC_VER) +#include +#else +// Define C99 equivalent types, since pre-2010 MSVC doesn't provide stdint.h. +typedef signed char int8_t; +typedef signed short int16_t; +typedef signed int int32_t; +typedef __int64 int64_t; +typedef unsigned char uint8_t; +typedef unsigned short uint16_t; +typedef unsigned int uint32_t; +typedef unsigned __int64 uint64_t; +#endif + +// Borrowed from Chromium's base/compiler_specific.h. +// Annotate a virtual method indicating it must be overriding a virtual +// method in the parent class. +// Use like: +// virtual void foo() OVERRIDE; +#if defined(_MSC_VER) +#define OVERRIDE override +#elif defined(__clang__) +// Clang defaults to C++03 and warns about using override. Squelch that. +// Intentionally no push/pop here so all users of OVERRIDE ignore the warning +// too. This is like passing -Wno-c++11-extensions, except that GCC won't die +// (because it won't see this pragma). +#pragma clang diagnostic ignored "-Wc++11-extensions" +#define OVERRIDE override +#elif defined(__GNUC__) && __cplusplus >= 201103 && \ + (__GNUC__ * 10000 + __GNUC_MINOR__ * 100) >= 40700 +// GCC 4.7 supports explicit virtual overrides when C++11 support is enabled. +#define OVERRIDE override +#else +#define OVERRIDE +#endif + +// Annotate a function indicating the caller must examine the return value. +// Use like: +// int foo() WARN_UNUSED_RESULT; +// TODO(ajm): Hack to avoid multiple definitions until the base/ of webrtc and +// libjingle are merged. +#if !defined(WARN_UNUSED_RESULT) +#if defined(__GNUC__) +#define WARN_UNUSED_RESULT __attribute__((warn_unused_result)) +#else +#define WARN_UNUSED_RESULT +#endif +#endif // WARN_UNUSED_RESULT + +// Put after a variable that might not be used, to prevent compiler warnings: +// int result ATTRIBUTE_UNUSED = DoSomething(); +// assert(result == 17); +#ifndef ATTRIBUTE_UNUSED +#if defined(__GNUC__) || defined(__clang__) +#define ATTRIBUTE_UNUSED __attribute__((unused)) +#else +#define ATTRIBUTE_UNUSED +#endif +#endif + +// Macro to be used for switch-case fallthrough (required for enabling +// -Wimplicit-fallthrough warning on Clang). +#ifndef FALLTHROUGH +#if defined(__clang__) +#define FALLTHROUGH() [[clang::fallthrough]] +#else +#define FALLTHROUGH() do { } while (0) +#endif +#endif + +// Annotate a function that will not return control flow to the caller. +#if defined(_MSC_VER) +#define NO_RETURN __declspec(noreturn) +#elif defined(__GNUC__) +#define NO_RETURN __attribute__((noreturn)) +#else +#define NO_RETURN +#endif + +#endif // WEBRTC_TYPEDEFS_H_ diff --git a/runtime/engine/asr/server/brpc/paraformerCPP/webrtc_vad.h b/runtime/engine/asr/server/brpc/paraformerCPP/webrtc_vad.h new file mode 100644 index 00000000..f5bbadf5 --- /dev/null +++ b/runtime/engine/asr/server/brpc/paraformerCPP/webrtc_vad.h @@ -0,0 +1,87 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/* + * This header file includes the VAD API calls. Specific function calls are + * given below. + */ + +#ifndef COMMON_AUDIO_VAD_INCLUDE_WEBRTC_VAD_H_ // NOLINT +#define COMMON_AUDIO_VAD_INCLUDE_WEBRTC_VAD_H_ + +#include +#include + +typedef struct WebRtcVadInst VadInst; + +#ifdef __cplusplus +extern "C" { +#endif + +// Creates an instance to the VAD structure. +VadInst* WebRtcVad_Create(void); + +// Frees the dynamic memory of a specified VAD instance. +// +// - handle [i] : Pointer to VAD instance that should be freed. +void WebRtcVad_Free(VadInst* handle); + +// Initializes a VAD instance. +// +// - handle [i/o] : Instance that should be initialized. +// +// returns : 0 - (OK), +// -1 - (null pointer or Default mode could not be set). +int WebRtcVad_Init(VadInst* handle); + +// Sets the VAD operating mode. A more aggressive (higher mode) VAD is more +// restrictive in reporting speech. Put in other words the probability of being +// speech when the VAD returns 1 is increased with increasing mode. As a +// consequence also the missed detection rate goes up. +// +// - handle [i/o] : VAD instance. +// - mode [i] : Aggressiveness mode (0, 1, 2, or 3). +// +// returns : 0 - (OK), +// -1 - (null pointer, mode could not be set or the VAD instance +// has not been initialized). +int WebRtcVad_set_mode(VadInst* handle, int mode); + +// Calculates a VAD decision for the |audio_frame|. For valid sampling rates +// frame lengths, see the description of WebRtcVad_ValidRatesAndFrameLengths(). +// +// - handle [i/o] : VAD Instance. Needs to be initialized by +// WebRtcVad_Init() before call. +// - fs [i] : Sampling frequency (Hz): 8000, 16000, or 32000 +// - audio_frame [i] : Audio frame buffer. +// - frame_length [i] : Length of audio frame buffer in number of samples. +// +// returns : 1 - (Active Voice), +// 0 - (Non-active Voice), +// -1 - (Error) +int WebRtcVad_Process(VadInst* handle, + int fs, + const int16_t* audio_frame, + size_t frame_length); + +// Checks for valid combinations of |rate| and |frame_length|. We support 10, +// 20 and 30 ms frames and the rates 8000, 16000 and 32000 Hz. +// +// - rate [i] : Sampling frequency (Hz). +// - frame_length [i] : Speech frame buffer length in number of samples. +// +// returns : 0 - (valid combination), -1 - (invalid combination) +int WebRtcVad_ValidRateAndFrameLength(int rate, size_t frame_length); + +#ifdef __cplusplus +} +#endif + +#endif // COMMON_AUDIO_VAD_INCLUDE_WEBRTC_VAD_H_ // NOLINT diff --git a/runtime/engine/asr/server/brpc/paraformerCPP/win_func.h b/runtime/engine/asr/server/brpc/paraformerCPP/win_func.h new file mode 100644 index 00000000..90206f36 --- /dev/null +++ b/runtime/engine/asr/server/brpc/paraformerCPP/win_func.h @@ -0,0 +1,28 @@ +#include +#ifdef WIN32 +#include +#else +#include +#endif +#ifdef WIN32 +int gettimeofday(struct timeval* tp, void* tzp) +{ + time_t clock; + struct tm tm; + SYSTEMTIME wtm; + + GetLocalTime(&wtm); + tm.tm_year = wtm.wYear - 1900; + tm.tm_mon = wtm.wMonth - 1; + tm.tm_mday = wtm.wDay; + tm.tm_hour = wtm.wHour; + tm.tm_min = wtm.wMinute; + tm.tm_sec = wtm.wSecond; + tm.tm_isdst = -1; + + clock = mktime(&tm); + tp->tv_sec = clock; + tp->tv_usec = wtm.wMilliseconds * 1000; + return (0); +} +#endif \ No newline at end of file diff --git a/runtime/engine/asr/server/brpc/proto/echo.proto b/runtime/engine/asr/server/brpc/proto/echo.proto new file mode 100644 index 00000000..baf0364a --- /dev/null +++ b/runtime/engine/asr/server/brpc/proto/echo.proto @@ -0,0 +1,22 @@ +syntax="proto2"; +package example; + +option cc_generic_services = true; + + + +message AudioRequest { + required string audio = 1; + required string extra = 2; +}; + +message AudioResponse { + required int32 err_no = 1; + required string err_msg = 2; + required string result = 3; + required float cost_time = 4; +}; + +service EchoService { + rpc audiorecognition(AudioRequest) returns (AudioResponse); +};