diff --git a/runtime/CMakeLists.txt b/runtime/CMakeLists.txt index 8bd3f28c..44ee3a58 100644 --- a/runtime/CMakeLists.txt +++ b/runtime/CMakeLists.txt @@ -139,7 +139,7 @@ out=':'.join([libs_dir, fluid_dir]); print(out); \ OUTPUT_VARIABLE PADDLE_LIB_DIRS) message(STATUS PADDLE_LIB_DIRS= ${PADDLE_LIB_DIRS}) - +add_compile_options(-fPIC) ############################################################################### # Add local library ############################################################################### diff --git a/runtime/cmake/fastdeploy.cmake b/runtime/cmake/fastdeploy.cmake new file mode 100644 index 00000000..773414c1 --- /dev/null +++ b/runtime/cmake/fastdeploy.cmake @@ -0,0 +1,39 @@ +cmake_minimum_required(VERSION 3.14 FATAL_ERROR) + +set(ARCH "mserver_x86_64" CACHE STRING "Target Architecture: +android_arm, android_armv7, android_armv8, android_x86, android_x86_64, +mserver_x86_64, ubuntu_x86_64, ios_armv7, ios_armv7s, ios_armv8, ios_x86_64, ios_x86, +windows_x86") + +set(CMAKE_VERBOSE_MAKEFILE ON) + +set(FASTDEPLOY_DIR ${CMAKE_SOURCE_DIR}/fc_patch/fastdeploy) +if(NOT EXISTS ${FASTDEPLOY_DIR}/fastdeploy-linux-x64-1.0.2.tgz) + exec_program("mkdir -p ${FASTDEPLOY_DIR} && + wget https://bj.bcebos.com/fastdeploy/release/cpp/fastdeploy-linux-x64-1.0.2.tgz -P ${FASTDEPLOY_DIR} && + tar xzvf ${FASTDEPLOY_DIR}/fastdeploy-linux-x64-1.0.2.tgz -C ${FASTDEPLOY_DIR} && + mv ${FASTDEPLOY_DIR}/fastdeploy-linux-x64-1.0.2 ${FASTDEPLOY_DIR}/linux-x64") +endif() + +if(NOT EXISTS ${FASTDEPLOY_DIR}/fastdeploy-android-1.0.0-shared.tgz) + exec_program("mkdir -p ${FASTDEPLOY_DIR} && + wget https://bj.bcebos.com/fastdeploy/release/android/fastdeploy-android-1.0.0-shared.tgz -P ${FASTDEPLOY_DIR} && + tar xzvf ${FASTDEPLOY_DIR}/fastdeploy-android-1.0.0-shared.tgz -C ${FASTDEPLOY_DIR} && + mv ${FASTDEPLOY_DIR}/fastdeploy-android-1.0.0-shared ${FASTDEPLOY_DIR}/android-armv7v8") +endif() + +if (ARCH STREQUAL "mserver_x86_64") + set(FASTDEPLOY_INSTALL_DIR ${FASTDEPLOY_DIR}/linux-x64) + add_definitions("-DUSE_PADDLE_INFERENCE_BACKEND") + # add_definitions("-DUSE_ORT_BACKEND") + set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -msse -msse2") + set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -msse -msse2 -mavx -O3") +elseif (ARCH STREQUAL "android_armv7") + set(FASTDEPLOY_INSTALL_DIR ${FASTDEPLOY_DIR}/android-armv7v8) + add_definitions("-DUSE_PADDLE_LITE_BAKEND") + set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -g -mfloat-abi=softfp -mfpu=vfpv3 -mfpu=neon -fPIC -pie -fPIE") + set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -g0 -O3 -mfloat-abi=softfp -mfpu=vfpv3 -mfpu=neon -fPIC -pie -fPIE") +endif() + +include(${FASTDEPLOY_INSTALL_DIR}/FastDeploy.cmake) +include_directories(${FASTDEPLOY_INCS}) \ No newline at end of file diff --git a/runtime/engine/CMakeLists.txt b/runtime/engine/CMakeLists.txt index b522e158..42399fe9 100644 --- a/runtime/engine/CMakeLists.txt +++ b/runtime/engine/CMakeLists.txt @@ -10,3 +10,4 @@ add_subdirectory(asr) add_subdirectory(common) add_subdirectory(kaldi) add_subdirectory(codelab) +add_subdirectory(cls) \ No newline at end of file diff --git a/runtime/engine/cls/CMakeLists.txt b/runtime/engine/cls/CMakeLists.txt new file mode 100644 index 00000000..4d5e0cff --- /dev/null +++ b/runtime/engine/cls/CMakeLists.txt @@ -0,0 +1,7 @@ +project(cls) + +include(fastdeploy) +# add_definitions("-DTEST_DEBUG") +# add_definitions("-DPRINT_TIME") + +add_subdirectory(nnet) \ No newline at end of file diff --git a/runtime/engine/cls/nnet/CMakeLists.txt b/runtime/engine/cls/nnet/CMakeLists.txt new file mode 100644 index 00000000..b4b76120 --- /dev/null +++ b/runtime/engine/cls/nnet/CMakeLists.txt @@ -0,0 +1,8 @@ +set(srcs panns_nnet.cc panns_interface.cc) + +add_library(cls SHARED ${srcs}) +target_link_libraries(cls -static-libstdc++;-Wl,-Bsymbolic ${FASTDEPLOY_LIBS} kaldi-matrix kaldi-base frontend utils) + +set(bin_name panns_nnet_main) +add_executable(${bin_name} ${CMAKE_CURRENT_SOURCE_DIR}/${bin_name}.cc) +target_link_libraries(${bin_name} -static-libstdc++;-Wl,-Bsymbolic cls gflags glog) \ No newline at end of file diff --git a/runtime/engine/cls/nnet/panns_interface.cc b/runtime/engine/cls/nnet/panns_interface.cc new file mode 100644 index 00000000..257ee44f --- /dev/null +++ b/runtime/engine/cls/nnet/panns_interface.cc @@ -0,0 +1,78 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "cls/nnet/panns_interface.h" +#include "cls/nnet/panns_nnet.h" +#include "common/base/config.h" + +namespace ppspeech { + +void* ClsCreateInstance(const char* conf_path) { + Config conf(conf_path); + // cls init + ppspeech::ClsNnetConf cls_nnet_conf; + cls_nnet_conf.wav_normal_ = conf.Read("wav_normal", true); + cls_nnet_conf.wav_normal_type_ = + conf.Read("wav_normal_type", std::string("linear")); + cls_nnet_conf.wav_norm_mul_factor_ = conf.Read("wav_norm_mul_factor", 1.0); + cls_nnet_conf.model_file_path_ = conf.Read("model_path", std::string("")); + cls_nnet_conf.param_file_path_ = conf.Read("param_path", std::string("")); + cls_nnet_conf.dict_file_path_ = conf.Read("dict_path", std::string("")); + cls_nnet_conf.num_cpu_thread_ = conf.Read("num_cpu_thread", 12); + cls_nnet_conf.samp_freq = conf.Read("samp_freq", 32000); + cls_nnet_conf.frame_length_ms = conf.Read("frame_length_ms", 32); + cls_nnet_conf.frame_shift_ms = conf.Read("frame_shift_ms", 10); + cls_nnet_conf.num_bins = conf.Read("num_bins", 64); + cls_nnet_conf.low_freq = conf.Read("low_freq", 50); + cls_nnet_conf.high_freq = conf.Read("high_freq", 14000); + cls_nnet_conf.dither = conf.Read("dither", 0.0); + + ppspeech::ClsNnet* cls_model = new ppspeech::ClsNnet(); + int ret = cls_model->Init(cls_nnet_conf); + return static_cast(cls_model); +} + +int ClsDestroyInstance(void* instance) { + ppspeech::ClsNnet* cls_model = static_cast(instance); + if (cls_model != NULL) { + delete cls_model; + cls_model = NULL; + } + return 0; +} + +int ClsFeedForward(void* instance, + const char* wav_path, + int topk, + char* result, + int result_max_len) { + ppspeech::ClsNnet* cls_model = static_cast(instance); + if (cls_model == NULL) { + printf("instance is null\n"); + return -1; + } + int ret = cls_model->Forward(wav_path, topk, result, result_max_len); + return 0; +} + +int ClsReset(void* instance) { + ppspeech::ClsNnet* cls_model = static_cast(instance); + if (cls_model == NULL) { + printf("instance is null\n"); + return -1; + } + cls_model->Reset(); + return 0; +} +} // namespace ppspeech \ No newline at end of file diff --git a/runtime/engine/cls/nnet/panns_interface.h b/runtime/engine/cls/nnet/panns_interface.h new file mode 100644 index 00000000..0d1ce95f --- /dev/null +++ b/runtime/engine/cls/nnet/panns_interface.h @@ -0,0 +1,27 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +namespace ppspeech { + +void* ClsCreateInstance(const char* conf_path); +int ClsDestroyInstance(void* instance); +int ClsFeedForward(void* instance, + const char* wav_path, + int topk, + char* result, + int result_max_len); +int ClsReset(void* instance); +} // namespace ppspeech \ No newline at end of file diff --git a/runtime/engine/cls/nnet/panns_nnet.cc b/runtime/engine/cls/nnet/panns_nnet.cc new file mode 100644 index 00000000..6b8213f6 --- /dev/null +++ b/runtime/engine/cls/nnet/panns_nnet.cc @@ -0,0 +1,228 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "cls/nnet/panns_nnet.h" +#ifdef PRINT_TIME +#include "kaldi/base/timer.h" +#endif + +namespace ppspeech { + +ClsNnet::ClsNnet() { + // wav_reader_ = NULL; + runtime_ = NULL; +} + +void ClsNnet::Reset() { + // wav_reader_->Clear(); + ss_.str(""); +} + +int ClsNnet::Init(const ClsNnetConf& conf) { + conf_ = conf; + // init fbank opts + fbank_opts_.frame_opts.samp_freq = conf.samp_freq; + fbank_opts_.frame_opts.frame_length_ms = conf.frame_length_ms; + fbank_opts_.frame_opts.frame_shift_ms = conf.frame_shift_ms; + fbank_opts_.mel_opts.num_bins = conf.num_bins; + fbank_opts_.mel_opts.low_freq = conf.low_freq; + fbank_opts_.mel_opts.high_freq = conf.high_freq; + fbank_opts_.frame_opts.dither = conf.dither; + fbank_opts_.use_log_fbank = false; + + // init dict + if (conf.dict_file_path_ != "") { + ReadFileToVector(conf.dict_file_path_, &dict_); + } + + // init model + fastdeploy::RuntimeOption runtime_option; + +#ifdef USE_ORT_BACKEND + runtime_option.SetModelPath( + conf.model_file_path_, "", fastdeploy::ModelFormat::ONNX); // onnx + runtime_option.UseOrtBackend(); // onnx +#endif +#ifdef USE_PADDLE_LITE_BACKEND + runtime_option.SetModelPath(conf.model_file_path_, + conf.param_file_path_, + fastdeploy::ModelFormat::PADDLE); + runtime_option.UseLiteBackend(); +#endif +#ifdef USE_PADDLE_INFERENCE_BACKEND + runtime_option.SetModelPath(conf.model_file_path_, + conf.param_file_path_, + fastdeploy::ModelFormat::PADDLE); + runtime_option.UsePaddleInferBackend(); +#endif + runtime_option.SetCpuThreadNum(conf.num_cpu_thread_); + runtime_option.DeletePaddleBackendPass("simplify_with_basic_ops_pass"); + runtime_ = std::unique_ptr(new fastdeploy::Runtime()); + if (!runtime_->Init(runtime_option)) { + std::cerr << "--- Init FastDeploy Runitme Failed! " + << "\n--- Model: " << conf.model_file_path_ << std::endl; + return -1; + } else { + std::cout << "--- Init FastDeploy Runitme Done! " + << "\n--- Model: " << conf.model_file_path_ << std::endl; + } + + Reset(); + return 0; +} + +int ClsNnet::Forward(const char* wav_path, + int topk, + char* result, + int result_max_len) { +#ifdef PRINT_TIME + kaldi::Timer timer; + timer.Reset(); +#endif + // read wav + std::ifstream infile(wav_path, std::ifstream::in); + kaldi::WaveData wave_data; + wave_data.Read(infile); + int32 this_channel = 0; + kaldi::Matrix wavform_kaldi = wave_data.Data(); + // only get channel 0 + int wavform_len = wavform_kaldi.NumCols(); + std::vector wavform(wavform_kaldi.Data(), + wavform_kaldi.Data() + wavform_len); + WaveformFloatNormal(&wavform); + WaveformNormal(&wavform, + conf_.wav_normal_, + conf_.wav_normal_type_, + conf_.wav_norm_mul_factor_); +#ifdef TEST_DEBUG + { + std::ofstream fp("cls.wavform", std::ios::out); + for (int i = 0; i < wavform.size(); ++i) { + fp << std::setprecision(18) << wavform[i] << " "; + } + fp << "\n"; + } +#endif +#ifdef PRINT_TIME + printf("wav read consume: %fs\n", timer.Elapsed()); +#endif + +#ifdef PRINT_TIME + timer.Reset(); +#endif + + std::vector feats; + std::unique_ptr data_source( + new ppspeech::DataCache()); + ppspeech::Fbank fbank(fbank_opts_, std::move(data_source)); + fbank.Accept(wavform); + fbank.SetFinished(); + fbank.Read(&feats); + + int feat_dim = fbank_opts_.mel_opts.num_bins; + int num_frames = feats.size() / feat_dim; + + for (int i = 0; i < num_frames; ++i) { + for (int j = 0; j < feat_dim; ++j) { + feats[i * feat_dim + j] = PowerTodb(feats[i * feat_dim + j]); + } + } +#ifdef TEST_DEBUG + { + std::ofstream fp("cls.feat", std::ios::out); + for (int i = 0; i < num_frames; ++i) { + for (int j = 0; j < feat_dim; ++j) { + fp << std::setprecision(18) << feats[i * feat_dim + j] << " "; + } + fp << "\n"; + } + } +#endif +#ifdef PRINT_TIME + printf("extract fbank consume: %fs\n", timer.Elapsed()); +#endif + + // infer + std::vector model_out; +#ifdef PRINT_TIME + timer.Reset(); +#endif + ModelForward(feats.data(), num_frames, feat_dim, &model_out); +#ifdef PRINT_TIME + printf("fast deploy infer consume: %fs\n", timer.Elapsed()); +#endif +#ifdef TEST_DEBUG + { + std::ofstream fp("cls.logits", std::ios::out); + for (int i = 0; i < model_out.size(); ++i) { + fp << std::setprecision(18) << model_out[i] << "\n"; + } + } +#endif + + // construct result str + ss_ << "{"; + GetTopkResult(topk, model_out); + ss_ << "}"; + + if (result_max_len <= ss_.str().size()) { + printf("result_max_len is short than result len\n"); + } + snprintf(result, result_max_len, "%s", ss_.str().c_str()); + return 0; +} + +int ClsNnet::ModelForward(float* features, + const int num_frames, + const int feat_dim, + std::vector* model_out) { + // init input tensor shape + fastdeploy::TensorInfo info = runtime_->GetInputInfo(0); + info.shape = {1, num_frames, feat_dim}; + + std::vector input_tensors(1); + std::vector output_tensors(1); + + input_tensors[0].SetExternalData({1, num_frames, feat_dim}, + fastdeploy::FDDataType::FP32, + static_cast(features)); + + // get input name + input_tensors[0].name = info.name; + + runtime_->Infer(input_tensors, &output_tensors); + + // output_tensors[0].PrintInfo(); + std::vector output_shape = output_tensors[0].Shape(); + model_out->resize(output_shape[0] * output_shape[1]); + memcpy(static_cast(model_out->data()), + output_tensors[0].Data(), + output_shape[0] * output_shape[1] * sizeof(float)); + return 0; +} + +int ClsNnet::GetTopkResult(int k, const std::vector& model_out) { + std::vector values; + std::vector indics; + TopK(model_out, k, &values, &indics); + for (int i = 0; i < k; ++i) { + if (i != 0) { + ss_ << ","; + } + ss_ << "\"" << dict_[indics[i]] << "\":\"" << values[i] << "\""; + } + return 0; +} + +} // namespace ppspeech \ No newline at end of file diff --git a/runtime/engine/cls/nnet/panns_nnet.h b/runtime/engine/cls/nnet/panns_nnet.h new file mode 100644 index 00000000..3a4a5718 --- /dev/null +++ b/runtime/engine/cls/nnet/panns_nnet.h @@ -0,0 +1,74 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "common/frontend/data_cache.h" +#include "common/frontend/fbank.h" +#include "common/frontend/feature-fbank.h" +#include "common/frontend/frontend_itf.h" +#include "common/frontend/wave-reader.h" +#include "common/utils/audio_process.h" +#include "common/utils/file_utils.h" +#include "fastdeploy/runtime.h" +#include "kaldi/util/kaldi-io.h" +#include "kaldi/util/table-types.h" + +namespace ppspeech { +struct ClsNnetConf { + // wav + bool wav_normal_; + std::string wav_normal_type_; + float wav_norm_mul_factor_; + // model + std::string model_file_path_; + std::string param_file_path_; + std::string dict_file_path_; + int num_cpu_thread_; + // fbank + float samp_freq; + float frame_length_ms; + float frame_shift_ms; + int num_bins; + float low_freq; + float high_freq; + float dither; +}; + +class ClsNnet { + public: + ClsNnet(); + int Init(const ClsNnetConf& conf); + int Forward(const char* wav_path, + int topk, + char* result, + int result_max_len); + void Reset(); + + private: + int ModelForward(float* features, + const int num_frames, + const int feat_dim, + std::vector* model_out); + int ModelForwardStream(std::vector* feats); + int GetTopkResult(int k, const std::vector& model_out); + + ClsNnetConf conf_; + knf::FbankOptions fbank_opts_; + std::unique_ptr runtime_; + std::vector dict_; + std::stringstream ss_; +}; + +} // namespace ppspeech \ No newline at end of file diff --git a/runtime/engine/cls/nnet/panns_nnet_main.cc b/runtime/engine/cls/nnet/panns_nnet_main.cc new file mode 100644 index 00000000..4280d14c --- /dev/null +++ b/runtime/engine/cls/nnet/panns_nnet_main.cc @@ -0,0 +1,49 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include "base/flags.h" +#include "cls/nnet/panns_interface.h" + +DEFINE_string(conf_path, "", "config path"); +DEFINE_string(scp_path, "", "wav scp path"); +DEFINE_string(topk, "", "print topk results"); + +int main(int argc, char* argv[]) { + gflags::SetUsageMessage("Usage:"); + gflags::ParseCommandLineFlags(&argc, &argv, false); + google::InitGoogleLogging(argv[0]); + google::InstallFailureSignalHandler(); + FLAGS_logtostderr = 1; + CHECK_GT(FLAGS_conf_path.size(), 0); + CHECK_GT(FLAGS_scp_path.size(), 0); + CHECK_GT(FLAGS_topk.size(), 0); + void* instance = ppspeech::ClsCreateInstance(FLAGS_conf_path.c_str()); + int ret = 0; + // read wav + std::ifstream ifs(FLAGS_scp_path); + std::string line = ""; + int topk = std::atoi(FLAGS_topk.c_str()); + while (getline(ifs, line)) { + // read wav + char result[1024] = {0}; + ret = ppspeech::ClsFeedForward( + instance, line.c_str(), topk, result, 1024); + printf("%s %s\n", line.c_str(), result); + ret = ppspeech::ClsReset(instance); + } + ret = ppspeech::ClsDestroyInstance(instance); + return 0; +} diff --git a/runtime/engine/common/base/config.h b/runtime/engine/common/base/config.h new file mode 100644 index 00000000..c59c3ab8 --- /dev/null +++ b/runtime/engine/common/base/config.h @@ -0,0 +1,338 @@ +// Copyright (c) code is from +// https://blog.csdn.net/huixingshao/article/details/45969887. + +#include +#include +#include +#include +#include +using namespace std; + +#pragma once + +#pragma region ParseIniFile +/* +* \brief Generic configuration Class +* +*/ +class Config { + // Data + protected: + std::string m_Delimiter; //!< separator between key and value + std::string m_Comment; //!< separator between value and comments + std::map + m_Contents; //!< extracted keys and values + + typedef std::map::iterator mapi; + typedef std::map::const_iterator mapci; + // Methods + public: + Config(std::string filename, + std::string delimiter = "=", + std::string comment = "#"); + Config(); + template + T Read(const std::string& in_key) const; //! + template + T Read(const std::string& in_key, const T& in_value) const; + template + bool ReadInto(T* out_var, const std::string& in_key) const; + template + bool ReadInto(T* out_var, + const std::string& in_key, + const T& in_value) const; + bool FileExist(std::string filename); + void ReadFile(std::string filename, + std::string delimiter = "=", + std::string comment = "#"); + + // Check whether key exists in configuration + bool KeyExists(const std::string& in_key) const; + + // Modify keys and values + template + void Add(const std::string& in_key, const T& in_value); + void Remove(const std::string& in_key); + + // Check or change configuration syntax + std::string GetDelimiter() const { return m_Delimiter; } + std::string GetComment() const { return m_Comment; } + std::string SetDelimiter(const std::string& in_s) { + std::string old = m_Delimiter; + m_Delimiter = in_s; + return old; + } + std::string SetComment(const std::string& in_s) { + std::string old = m_Comment; + m_Comment = in_s; + return old; + } + + // Write or read configuration + friend std::ostream& operator<<(std::ostream& os, const Config& cf); + friend std::istream& operator>>(std::istream& is, Config& cf); + + protected: + template + static std::string T_as_string(const T& t); + template + static T string_as_T(const std::string& s); + static void Trim(std::string* inout_s); + + + // Exception types + public: + struct File_not_found { + std::string filename; + explicit File_not_found(const std::string& filename_ = std::string()) + : filename(filename_) {} + }; + struct Key_not_found { // thrown only by T read(key) variant of read() + std::string key; + explicit Key_not_found(const std::string& key_ = std::string()) + : key(key_) {} + }; +}; + +/* static */ +template +std::string Config::T_as_string(const T& t) { + // Convert from a T to a string + // Type T must support << operator + std::ostringstream ost; + ost << t; + return ost.str(); +} + + +/* static */ +template +T Config::string_as_T(const std::string& s) { + // Convert from a string to a T + // Type T must support >> operator + T t; + std::istringstream ist(s); + ist >> t; + return t; +} + + +/* static */ +template <> +inline std::string Config::string_as_T(const std::string& s) { + // Convert from a string to a string + // In other words, do nothing + return s; +} + + +/* static */ +template <> +inline bool Config::string_as_T(const std::string& s) { + // Convert from a string to a bool + // Interpret "false", "F", "no", "n", "0" as false + // Interpret "true", "T", "yes", "y", "1", "-1", or anything else as true + bool b = true; + std::string sup = s; + for (std::string::iterator p = sup.begin(); p != sup.end(); ++p) + *p = toupper(*p); // make string all caps + if (sup == std::string("FALSE") || sup == std::string("F") || + sup == std::string("NO") || sup == std::string("N") || + sup == std::string("0") || sup == std::string("NONE")) + b = false; + return b; +} + + +template +T Config::Read(const std::string& key) const { + // Read the value corresponding to key + mapci p = m_Contents.find(key); + if (p == m_Contents.end()) throw Key_not_found(key); + return string_as_T(p->second); +} + + +template +T Config::Read(const std::string& key, const T& value) const { + // Return the value corresponding to key or given default value + // if key is not found + mapci p = m_Contents.find(key); + if (p == m_Contents.end()) { + printf("%s = %s(default)\n", key.c_str(), T_as_string(value).c_str()); + return value; + } else { + printf("%s = %s\n", key.c_str(), T_as_string(p->second).c_str()); + return string_as_T(p->second); + } +} + + +template +bool Config::ReadInto(T* var, const std::string& key) const { + // Get the value corresponding to key and store in var + // Return true if key is found + // Otherwise leave var untouched + mapci p = m_Contents.find(key); + bool found = (p != m_Contents.end()); + if (found) *var = string_as_T(p->second); + return found; +} + + +template +bool Config::ReadInto(T* var, const std::string& key, const T& value) const { + // Get the value corresponding to key and store in var + // Return true if key is found + // Otherwise set var to given default + mapci p = m_Contents.find(key); + bool found = (p != m_Contents.end()); + if (found) + *var = string_as_T(p->second); + else + var = value; + return found; +} + + +template +void Config::Add(const std::string& in_key, const T& value) { + // Add a key with given value + std::string v = T_as_string(value); + std::string key = in_key; + Trim(&key); + Trim(&v); + m_Contents[key] = v; + return; +} + +Config::Config(string filename, string delimiter, string comment) + : m_Delimiter(delimiter), m_Comment(comment) { + // Construct a Config, getting keys and values from given file + + std::ifstream in(filename.c_str()); + + if (!in) throw File_not_found(filename); + + in >> (*this); +} + + +Config::Config() : m_Delimiter(string(1, '=')), m_Comment(string(1, '#')) { + // Construct a Config without a file; empty +} + + +bool Config::KeyExists(const string& key) const { + // Indicate whether key is found + mapci p = m_Contents.find(key); + return (p != m_Contents.end()); +} + + +/* static */ +void Config::Trim(string* inout_s) { + // Remove leading and trailing whitespace + static const char whitespace[] = " \n\t\v\r\f"; + inout_s->erase(0, inout_s->find_first_not_of(whitespace)); + inout_s->erase(inout_s->find_last_not_of(whitespace) + 1U); +} + + +std::ostream& operator<<(std::ostream& os, const Config& cf) { + // Save a Config to os + for (Config::mapci p = cf.m_Contents.begin(); p != cf.m_Contents.end(); + ++p) { + os << p->first << " " << cf.m_Delimiter << " "; + os << p->second << std::endl; + } + return os; +} + +void Config::Remove(const string& key) { + // Remove key and its value + m_Contents.erase(m_Contents.find(key)); + return; +} + +std::istream& operator>>(std::istream& is, Config& cf) { + // Load a Config from is + // Read in keys and values, keeping internal whitespace + typedef string::size_type pos; + const string& delim = cf.m_Delimiter; // separator + const string& comm = cf.m_Comment; // comment + const pos skip = delim.length(); // length of separator + + string nextline = ""; // might need to read ahead to see where value ends + + while (is || nextline.length() > 0) { + // Read an entire line at a time + string line; + if (nextline.length() > 0) { + line = nextline; // we read ahead; use it now + nextline = ""; + } else { + std::getline(is, line); + } + + // Ignore comments + line = line.substr(0, line.find(comm)); + + // Parse the line if it contains a delimiter + pos delimPos = line.find(delim); + if (delimPos < string::npos) { + // Extract the key + string key = line.substr(0, delimPos); + line.replace(0, delimPos + skip, ""); + + // See if value continues on the next line + // Stop at blank line, next line with a key, end of stream, + // or end of file sentry + bool terminate = false; + while (!terminate && is) { + std::getline(is, nextline); + terminate = true; + + string nlcopy = nextline; + Config::Trim(&nlcopy); + if (nlcopy == "") continue; + + nextline = nextline.substr(0, nextline.find(comm)); + if (nextline.find(delim) != string::npos) continue; + + nlcopy = nextline; + Config::Trim(&nlcopy); + if (nlcopy != "") line += "\n"; + line += nextline; + terminate = false; + } + + // Store key and value + Config::Trim(&key); + Config::Trim(&line); + cf.m_Contents[key] = line; // overwrites if key is repeated + } + } + + return is; +} +bool Config::FileExist(std::string filename) { + bool exist = false; + std::ifstream in(filename.c_str()); + if (in) exist = true; + return exist; +} + +void Config::ReadFile(string filename, string delimiter, string comment) { + m_Delimiter = delimiter; + m_Comment = comment; + std::ifstream in(filename.c_str()); + + if (!in) throw File_not_found(filename); + + in >> (*this); +} + +#pragma endregion ParseIniFIle diff --git a/runtime/engine/common/utils/CMakeLists.txt b/runtime/engine/common/utils/CMakeLists.txt index c47b25c0..8589b19a 100644 --- a/runtime/engine/common/utils/CMakeLists.txt +++ b/runtime/engine/common/utils/CMakeLists.txt @@ -3,6 +3,7 @@ add_library(utils file_utils.cc math.cc strings.cc + audio_process.cc ) diff --git a/runtime/engine/common/utils/audio_process.cc b/runtime/engine/common/utils/audio_process.cc new file mode 100644 index 00000000..54540b85 --- /dev/null +++ b/runtime/engine/common/utils/audio_process.cc @@ -0,0 +1,83 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "utils/audio_process.h" + +namespace ppspeech{ + +int WaveformFloatNormal(std::vector* waveform) { + int tot_samples = waveform->size(); + for (int i = 0; i < tot_samples; i++) { + (*waveform)[i] = (*waveform)[i] / 32768.0; + } + return 0; +} + +int WaveformNormal(std::vector* waveform, + bool wav_normal, + const std::string& wav_normal_type, + float wav_norm_mul_factor) { + if (wav_normal == false) { + return 0; + } + if (wav_normal_type == "linear") { + float amax = INT32_MIN; + for (int i = 0; i < waveform->size(); ++i) { + float tmp = std::abs((*waveform)[i]); + amax = std::max(amax, tmp); + } + float factor = 1.0 / (amax + 1e-8); + for (int i = 0; i < waveform->size(); ++i) { + (*waveform)[i] = (*waveform)[i] * factor * wav_norm_mul_factor; + } + } else if (wav_normal_type == "gaussian") { + double sum = std::accumulate(waveform->begin(), waveform->end(), 0.0); + double mean = sum / waveform->size(); //均值 + + double accum = 0.0; + std::for_each(waveform->begin(), waveform->end(), [&](const double d) { + accum += (d - mean) * (d - mean); + }); + + double stdev = sqrt(accum / (waveform->size() - 1)); //方差 + stdev = std::max(stdev, 1e-8); + + for (int i = 0; i < waveform->size(); ++i) { + (*waveform)[i] = + wav_norm_mul_factor * ((*waveform)[i] - mean) / stdev; + } + } else { + printf("don't support\n"); + return -1; + } + return 0; +} + +float PowerTodb(float in, float ref_value, float amin, float top_db) { + if (amin <= 0) { + printf("amin must be strictly positive\n"); + return -1; + } + + if (ref_value <= 0) { + printf("ref_value must be strictly positive\n"); + return -1; + } + + float out = 10.0 * log10(std::max(amin, in)); + out -= 10.0 * log10(std::max(ref_value, amin)); + return out; +} + +} // namespace ppspeech \ No newline at end of file diff --git a/runtime/engine/common/utils/audio_process.h b/runtime/engine/common/utils/audio_process.h new file mode 100644 index 00000000..164d4c07 --- /dev/null +++ b/runtime/engine/common/utils/audio_process.h @@ -0,0 +1,32 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include +#include +#include +#include + +namespace ppspeech{ +int WaveformFloatNormal(std::vector* waveform); +int WaveformNormal(std::vector* waveform, + bool wav_normal, + const std::string& wav_normal_type, + float wav_norm_mul_factor); +float PowerTodb(float in, + float ref_value = 1.0, + float amin = 1e-10, + float top_db = 80.0); +} // namespace ppspeech \ No newline at end of file