add cls engine (#2923)

pull/2968/head
masimeng1994 1 year ago committed by GitHub
parent 2f8aad95e0
commit 78e29c8ec4
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -139,7 +139,7 @@ out=':'.join([libs_dir, fluid_dir]); print(out); \
OUTPUT_VARIABLE PADDLE_LIB_DIRS)
message(STATUS PADDLE_LIB_DIRS= ${PADDLE_LIB_DIRS})
add_compile_options(-fPIC)
###############################################################################
# Add local library
###############################################################################

@ -0,0 +1,39 @@
cmake_minimum_required(VERSION 3.14 FATAL_ERROR)
set(ARCH "mserver_x86_64" CACHE STRING "Target Architecture:
android_arm, android_armv7, android_armv8, android_x86, android_x86_64,
mserver_x86_64, ubuntu_x86_64, ios_armv7, ios_armv7s, ios_armv8, ios_x86_64, ios_x86,
windows_x86")
set(CMAKE_VERBOSE_MAKEFILE ON)
set(FASTDEPLOY_DIR ${CMAKE_SOURCE_DIR}/fc_patch/fastdeploy)
if(NOT EXISTS ${FASTDEPLOY_DIR}/fastdeploy-linux-x64-1.0.2.tgz)
exec_program("mkdir -p ${FASTDEPLOY_DIR} &&
wget https://bj.bcebos.com/fastdeploy/release/cpp/fastdeploy-linux-x64-1.0.2.tgz -P ${FASTDEPLOY_DIR} &&
tar xzvf ${FASTDEPLOY_DIR}/fastdeploy-linux-x64-1.0.2.tgz -C ${FASTDEPLOY_DIR} &&
mv ${FASTDEPLOY_DIR}/fastdeploy-linux-x64-1.0.2 ${FASTDEPLOY_DIR}/linux-x64")
endif()
if(NOT EXISTS ${FASTDEPLOY_DIR}/fastdeploy-android-1.0.0-shared.tgz)
exec_program("mkdir -p ${FASTDEPLOY_DIR} &&
wget https://bj.bcebos.com/fastdeploy/release/android/fastdeploy-android-1.0.0-shared.tgz -P ${FASTDEPLOY_DIR} &&
tar xzvf ${FASTDEPLOY_DIR}/fastdeploy-android-1.0.0-shared.tgz -C ${FASTDEPLOY_DIR} &&
mv ${FASTDEPLOY_DIR}/fastdeploy-android-1.0.0-shared ${FASTDEPLOY_DIR}/android-armv7v8")
endif()
if (ARCH STREQUAL "mserver_x86_64")
set(FASTDEPLOY_INSTALL_DIR ${FASTDEPLOY_DIR}/linux-x64)
add_definitions("-DUSE_PADDLE_INFERENCE_BACKEND")
# add_definitions("-DUSE_ORT_BACKEND")
set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -msse -msse2")
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -msse -msse2 -mavx -O3")
elseif (ARCH STREQUAL "android_armv7")
set(FASTDEPLOY_INSTALL_DIR ${FASTDEPLOY_DIR}/android-armv7v8)
add_definitions("-DUSE_PADDLE_LITE_BAKEND")
set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -g -mfloat-abi=softfp -mfpu=vfpv3 -mfpu=neon -fPIC -pie -fPIE")
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -g0 -O3 -mfloat-abi=softfp -mfpu=vfpv3 -mfpu=neon -fPIC -pie -fPIE")
endif()
include(${FASTDEPLOY_INSTALL_DIR}/FastDeploy.cmake)
include_directories(${FASTDEPLOY_INCS})

@ -10,3 +10,4 @@ add_subdirectory(asr)
add_subdirectory(common)
add_subdirectory(kaldi)
add_subdirectory(codelab)
add_subdirectory(cls)

@ -0,0 +1,7 @@
project(cls)
include(fastdeploy)
# add_definitions("-DTEST_DEBUG")
# add_definitions("-DPRINT_TIME")
add_subdirectory(nnet)

@ -0,0 +1,8 @@
set(srcs panns_nnet.cc panns_interface.cc)
add_library(cls SHARED ${srcs})
target_link_libraries(cls -static-libstdc++;-Wl,-Bsymbolic ${FASTDEPLOY_LIBS} kaldi-matrix kaldi-base frontend utils)
set(bin_name panns_nnet_main)
add_executable(${bin_name} ${CMAKE_CURRENT_SOURCE_DIR}/${bin_name}.cc)
target_link_libraries(${bin_name} -static-libstdc++;-Wl,-Bsymbolic cls gflags glog)

@ -0,0 +1,78 @@
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "cls/nnet/panns_interface.h"
#include "cls/nnet/panns_nnet.h"
#include "common/base/config.h"
namespace ppspeech {
void* ClsCreateInstance(const char* conf_path) {
Config conf(conf_path);
// cls init
ppspeech::ClsNnetConf cls_nnet_conf;
cls_nnet_conf.wav_normal_ = conf.Read("wav_normal", true);
cls_nnet_conf.wav_normal_type_ =
conf.Read("wav_normal_type", std::string("linear"));
cls_nnet_conf.wav_norm_mul_factor_ = conf.Read("wav_norm_mul_factor", 1.0);
cls_nnet_conf.model_file_path_ = conf.Read("model_path", std::string(""));
cls_nnet_conf.param_file_path_ = conf.Read("param_path", std::string(""));
cls_nnet_conf.dict_file_path_ = conf.Read("dict_path", std::string(""));
cls_nnet_conf.num_cpu_thread_ = conf.Read("num_cpu_thread", 12);
cls_nnet_conf.samp_freq = conf.Read("samp_freq", 32000);
cls_nnet_conf.frame_length_ms = conf.Read("frame_length_ms", 32);
cls_nnet_conf.frame_shift_ms = conf.Read("frame_shift_ms", 10);
cls_nnet_conf.num_bins = conf.Read("num_bins", 64);
cls_nnet_conf.low_freq = conf.Read("low_freq", 50);
cls_nnet_conf.high_freq = conf.Read("high_freq", 14000);
cls_nnet_conf.dither = conf.Read("dither", 0.0);
ppspeech::ClsNnet* cls_model = new ppspeech::ClsNnet();
int ret = cls_model->Init(cls_nnet_conf);
return static_cast<void*>(cls_model);
}
int ClsDestroyInstance(void* instance) {
ppspeech::ClsNnet* cls_model = static_cast<ppspeech::ClsNnet*>(instance);
if (cls_model != NULL) {
delete cls_model;
cls_model = NULL;
}
return 0;
}
int ClsFeedForward(void* instance,
const char* wav_path,
int topk,
char* result,
int result_max_len) {
ppspeech::ClsNnet* cls_model = static_cast<ppspeech::ClsNnet*>(instance);
if (cls_model == NULL) {
printf("instance is null\n");
return -1;
}
int ret = cls_model->Forward(wav_path, topk, result, result_max_len);
return 0;
}
int ClsReset(void* instance) {
ppspeech::ClsNnet* cls_model = static_cast<ppspeech::ClsNnet*>(instance);
if (cls_model == NULL) {
printf("instance is null\n");
return -1;
}
cls_model->Reset();
return 0;
}
} // namespace ppspeech

@ -0,0 +1,27 @@
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
namespace ppspeech {
void* ClsCreateInstance(const char* conf_path);
int ClsDestroyInstance(void* instance);
int ClsFeedForward(void* instance,
const char* wav_path,
int topk,
char* result,
int result_max_len);
int ClsReset(void* instance);
} // namespace ppspeech

@ -0,0 +1,228 @@
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "cls/nnet/panns_nnet.h"
#ifdef PRINT_TIME
#include "kaldi/base/timer.h"
#endif
namespace ppspeech {
ClsNnet::ClsNnet() {
// wav_reader_ = NULL;
runtime_ = NULL;
}
void ClsNnet::Reset() {
// wav_reader_->Clear();
ss_.str("");
}
int ClsNnet::Init(const ClsNnetConf& conf) {
conf_ = conf;
// init fbank opts
fbank_opts_.frame_opts.samp_freq = conf.samp_freq;
fbank_opts_.frame_opts.frame_length_ms = conf.frame_length_ms;
fbank_opts_.frame_opts.frame_shift_ms = conf.frame_shift_ms;
fbank_opts_.mel_opts.num_bins = conf.num_bins;
fbank_opts_.mel_opts.low_freq = conf.low_freq;
fbank_opts_.mel_opts.high_freq = conf.high_freq;
fbank_opts_.frame_opts.dither = conf.dither;
fbank_opts_.use_log_fbank = false;
// init dict
if (conf.dict_file_path_ != "") {
ReadFileToVector(conf.dict_file_path_, &dict_);
}
// init model
fastdeploy::RuntimeOption runtime_option;
#ifdef USE_ORT_BACKEND
runtime_option.SetModelPath(
conf.model_file_path_, "", fastdeploy::ModelFormat::ONNX); // onnx
runtime_option.UseOrtBackend(); // onnx
#endif
#ifdef USE_PADDLE_LITE_BACKEND
runtime_option.SetModelPath(conf.model_file_path_,
conf.param_file_path_,
fastdeploy::ModelFormat::PADDLE);
runtime_option.UseLiteBackend();
#endif
#ifdef USE_PADDLE_INFERENCE_BACKEND
runtime_option.SetModelPath(conf.model_file_path_,
conf.param_file_path_,
fastdeploy::ModelFormat::PADDLE);
runtime_option.UsePaddleInferBackend();
#endif
runtime_option.SetCpuThreadNum(conf.num_cpu_thread_);
runtime_option.DeletePaddleBackendPass("simplify_with_basic_ops_pass");
runtime_ = std::unique_ptr<fastdeploy::Runtime>(new fastdeploy::Runtime());
if (!runtime_->Init(runtime_option)) {
std::cerr << "--- Init FastDeploy Runitme Failed! "
<< "\n--- Model: " << conf.model_file_path_ << std::endl;
return -1;
} else {
std::cout << "--- Init FastDeploy Runitme Done! "
<< "\n--- Model: " << conf.model_file_path_ << std::endl;
}
Reset();
return 0;
}
int ClsNnet::Forward(const char* wav_path,
int topk,
char* result,
int result_max_len) {
#ifdef PRINT_TIME
kaldi::Timer timer;
timer.Reset();
#endif
// read wav
std::ifstream infile(wav_path, std::ifstream::in);
kaldi::WaveData wave_data;
wave_data.Read(infile);
int32 this_channel = 0;
kaldi::Matrix<float> wavform_kaldi = wave_data.Data();
// only get channel 0
int wavform_len = wavform_kaldi.NumCols();
std::vector<float> wavform(wavform_kaldi.Data(),
wavform_kaldi.Data() + wavform_len);
WaveformFloatNormal(&wavform);
WaveformNormal(&wavform,
conf_.wav_normal_,
conf_.wav_normal_type_,
conf_.wav_norm_mul_factor_);
#ifdef TEST_DEBUG
{
std::ofstream fp("cls.wavform", std::ios::out);
for (int i = 0; i < wavform.size(); ++i) {
fp << std::setprecision(18) << wavform[i] << " ";
}
fp << "\n";
}
#endif
#ifdef PRINT_TIME
printf("wav read consume: %fs\n", timer.Elapsed());
#endif
#ifdef PRINT_TIME
timer.Reset();
#endif
std::vector<float> feats;
std::unique_ptr<ppspeech::FrontendInterface> data_source(
new ppspeech::DataCache());
ppspeech::Fbank fbank(fbank_opts_, std::move(data_source));
fbank.Accept(wavform);
fbank.SetFinished();
fbank.Read(&feats);
int feat_dim = fbank_opts_.mel_opts.num_bins;
int num_frames = feats.size() / feat_dim;
for (int i = 0; i < num_frames; ++i) {
for (int j = 0; j < feat_dim; ++j) {
feats[i * feat_dim + j] = PowerTodb(feats[i * feat_dim + j]);
}
}
#ifdef TEST_DEBUG
{
std::ofstream fp("cls.feat", std::ios::out);
for (int i = 0; i < num_frames; ++i) {
for (int j = 0; j < feat_dim; ++j) {
fp << std::setprecision(18) << feats[i * feat_dim + j] << " ";
}
fp << "\n";
}
}
#endif
#ifdef PRINT_TIME
printf("extract fbank consume: %fs\n", timer.Elapsed());
#endif
// infer
std::vector<float> model_out;
#ifdef PRINT_TIME
timer.Reset();
#endif
ModelForward(feats.data(), num_frames, feat_dim, &model_out);
#ifdef PRINT_TIME
printf("fast deploy infer consume: %fs\n", timer.Elapsed());
#endif
#ifdef TEST_DEBUG
{
std::ofstream fp("cls.logits", std::ios::out);
for (int i = 0; i < model_out.size(); ++i) {
fp << std::setprecision(18) << model_out[i] << "\n";
}
}
#endif
// construct result str
ss_ << "{";
GetTopkResult(topk, model_out);
ss_ << "}";
if (result_max_len <= ss_.str().size()) {
printf("result_max_len is short than result len\n");
}
snprintf(result, result_max_len, "%s", ss_.str().c_str());
return 0;
}
int ClsNnet::ModelForward(float* features,
const int num_frames,
const int feat_dim,
std::vector<float>* model_out) {
// init input tensor shape
fastdeploy::TensorInfo info = runtime_->GetInputInfo(0);
info.shape = {1, num_frames, feat_dim};
std::vector<fastdeploy::FDTensor> input_tensors(1);
std::vector<fastdeploy::FDTensor> output_tensors(1);
input_tensors[0].SetExternalData({1, num_frames, feat_dim},
fastdeploy::FDDataType::FP32,
static_cast<void*>(features));
// get input name
input_tensors[0].name = info.name;
runtime_->Infer(input_tensors, &output_tensors);
// output_tensors[0].PrintInfo();
std::vector<int64_t> output_shape = output_tensors[0].Shape();
model_out->resize(output_shape[0] * output_shape[1]);
memcpy(static_cast<void*>(model_out->data()),
output_tensors[0].Data(),
output_shape[0] * output_shape[1] * sizeof(float));
return 0;
}
int ClsNnet::GetTopkResult(int k, const std::vector<float>& model_out) {
std::vector<float> values;
std::vector<int> indics;
TopK(model_out, k, &values, &indics);
for (int i = 0; i < k; ++i) {
if (i != 0) {
ss_ << ",";
}
ss_ << "\"" << dict_[indics[i]] << "\":\"" << values[i] << "\"";
}
return 0;
}
} // namespace ppspeech

@ -0,0 +1,74 @@
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "common/frontend/data_cache.h"
#include "common/frontend/fbank.h"
#include "common/frontend/feature-fbank.h"
#include "common/frontend/frontend_itf.h"
#include "common/frontend/wave-reader.h"
#include "common/utils/audio_process.h"
#include "common/utils/file_utils.h"
#include "fastdeploy/runtime.h"
#include "kaldi/util/kaldi-io.h"
#include "kaldi/util/table-types.h"
namespace ppspeech {
struct ClsNnetConf {
// wav
bool wav_normal_;
std::string wav_normal_type_;
float wav_norm_mul_factor_;
// model
std::string model_file_path_;
std::string param_file_path_;
std::string dict_file_path_;
int num_cpu_thread_;
// fbank
float samp_freq;
float frame_length_ms;
float frame_shift_ms;
int num_bins;
float low_freq;
float high_freq;
float dither;
};
class ClsNnet {
public:
ClsNnet();
int Init(const ClsNnetConf& conf);
int Forward(const char* wav_path,
int topk,
char* result,
int result_max_len);
void Reset();
private:
int ModelForward(float* features,
const int num_frames,
const int feat_dim,
std::vector<float>* model_out);
int ModelForwardStream(std::vector<float>* feats);
int GetTopkResult(int k, const std::vector<float>& model_out);
ClsNnetConf conf_;
knf::FbankOptions fbank_opts_;
std::unique_ptr<fastdeploy::Runtime> runtime_;
std::vector<std::string> dict_;
std::stringstream ss_;
};
} // namespace ppspeech

@ -0,0 +1,49 @@
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <fstream>
#include <string>
#include "base/flags.h"
#include "cls/nnet/panns_interface.h"
DEFINE_string(conf_path, "", "config path");
DEFINE_string(scp_path, "", "wav scp path");
DEFINE_string(topk, "", "print topk results");
int main(int argc, char* argv[]) {
gflags::SetUsageMessage("Usage:");
gflags::ParseCommandLineFlags(&argc, &argv, false);
google::InitGoogleLogging(argv[0]);
google::InstallFailureSignalHandler();
FLAGS_logtostderr = 1;
CHECK_GT(FLAGS_conf_path.size(), 0);
CHECK_GT(FLAGS_scp_path.size(), 0);
CHECK_GT(FLAGS_topk.size(), 0);
void* instance = ppspeech::ClsCreateInstance(FLAGS_conf_path.c_str());
int ret = 0;
// read wav
std::ifstream ifs(FLAGS_scp_path);
std::string line = "";
int topk = std::atoi(FLAGS_topk.c_str());
while (getline(ifs, line)) {
// read wav
char result[1024] = {0};
ret = ppspeech::ClsFeedForward(
instance, line.c_str(), topk, result, 1024);
printf("%s %s\n", line.c_str(), result);
ret = ppspeech::ClsReset(instance);
}
ret = ppspeech::ClsDestroyInstance(instance);
return 0;
}

@ -0,0 +1,338 @@
// Copyright (c) code is from
// https://blog.csdn.net/huixingshao/article/details/45969887.
#include <fstream>
#include <iostream>
#include <map>
#include <sstream>
#include <string>
using namespace std;
#pragma once
#pragma region ParseIniFile
/*
* \brief Generic configuration Class
*
*/
class Config {
// Data
protected:
std::string m_Delimiter; //!< separator between key and value
std::string m_Comment; //!< separator between value and comments
std::map<std::string, std::string>
m_Contents; //!< extracted keys and values
typedef std::map<std::string, std::string>::iterator mapi;
typedef std::map<std::string, std::string>::const_iterator mapci;
// Methods
public:
Config(std::string filename,
std::string delimiter = "=",
std::string comment = "#");
Config();
template <class T>
T Read(const std::string& in_key) const; //!<Search for key and read value
//! or optional default value, call
//! as read<T>
template <class T>
T Read(const std::string& in_key, const T& in_value) const;
template <class T>
bool ReadInto(T* out_var, const std::string& in_key) const;
template <class T>
bool ReadInto(T* out_var,
const std::string& in_key,
const T& in_value) const;
bool FileExist(std::string filename);
void ReadFile(std::string filename,
std::string delimiter = "=",
std::string comment = "#");
// Check whether key exists in configuration
bool KeyExists(const std::string& in_key) const;
// Modify keys and values
template <class T>
void Add(const std::string& in_key, const T& in_value);
void Remove(const std::string& in_key);
// Check or change configuration syntax
std::string GetDelimiter() const { return m_Delimiter; }
std::string GetComment() const { return m_Comment; }
std::string SetDelimiter(const std::string& in_s) {
std::string old = m_Delimiter;
m_Delimiter = in_s;
return old;
}
std::string SetComment(const std::string& in_s) {
std::string old = m_Comment;
m_Comment = in_s;
return old;
}
// Write or read configuration
friend std::ostream& operator<<(std::ostream& os, const Config& cf);
friend std::istream& operator>>(std::istream& is, Config& cf);
protected:
template <class T>
static std::string T_as_string(const T& t);
template <class T>
static T string_as_T(const std::string& s);
static void Trim(std::string* inout_s);
// Exception types
public:
struct File_not_found {
std::string filename;
explicit File_not_found(const std::string& filename_ = std::string())
: filename(filename_) {}
};
struct Key_not_found { // thrown only by T read(key) variant of read()
std::string key;
explicit Key_not_found(const std::string& key_ = std::string())
: key(key_) {}
};
};
/* static */
template <class T>
std::string Config::T_as_string(const T& t) {
// Convert from a T to a string
// Type T must support << operator
std::ostringstream ost;
ost << t;
return ost.str();
}
/* static */
template <class T>
T Config::string_as_T(const std::string& s) {
// Convert from a string to a T
// Type T must support >> operator
T t;
std::istringstream ist(s);
ist >> t;
return t;
}
/* static */
template <>
inline std::string Config::string_as_T<std::string>(const std::string& s) {
// Convert from a string to a string
// In other words, do nothing
return s;
}
/* static */
template <>
inline bool Config::string_as_T<bool>(const std::string& s) {
// Convert from a string to a bool
// Interpret "false", "F", "no", "n", "0" as false
// Interpret "true", "T", "yes", "y", "1", "-1", or anything else as true
bool b = true;
std::string sup = s;
for (std::string::iterator p = sup.begin(); p != sup.end(); ++p)
*p = toupper(*p); // make string all caps
if (sup == std::string("FALSE") || sup == std::string("F") ||
sup == std::string("NO") || sup == std::string("N") ||
sup == std::string("0") || sup == std::string("NONE"))
b = false;
return b;
}
template <class T>
T Config::Read(const std::string& key) const {
// Read the value corresponding to key
mapci p = m_Contents.find(key);
if (p == m_Contents.end()) throw Key_not_found(key);
return string_as_T<T>(p->second);
}
template <class T>
T Config::Read(const std::string& key, const T& value) const {
// Return the value corresponding to key or given default value
// if key is not found
mapci p = m_Contents.find(key);
if (p == m_Contents.end()) {
printf("%s = %s(default)\n", key.c_str(), T_as_string(value).c_str());
return value;
} else {
printf("%s = %s\n", key.c_str(), T_as_string(p->second).c_str());
return string_as_T<T>(p->second);
}
}
template <class T>
bool Config::ReadInto(T* var, const std::string& key) const {
// Get the value corresponding to key and store in var
// Return true if key is found
// Otherwise leave var untouched
mapci p = m_Contents.find(key);
bool found = (p != m_Contents.end());
if (found) *var = string_as_T<T>(p->second);
return found;
}
template <class T>
bool Config::ReadInto(T* var, const std::string& key, const T& value) const {
// Get the value corresponding to key and store in var
// Return true if key is found
// Otherwise set var to given default
mapci p = m_Contents.find(key);
bool found = (p != m_Contents.end());
if (found)
*var = string_as_T<T>(p->second);
else
var = value;
return found;
}
template <class T>
void Config::Add(const std::string& in_key, const T& value) {
// Add a key with given value
std::string v = T_as_string(value);
std::string key = in_key;
Trim(&key);
Trim(&v);
m_Contents[key] = v;
return;
}
Config::Config(string filename, string delimiter, string comment)
: m_Delimiter(delimiter), m_Comment(comment) {
// Construct a Config, getting keys and values from given file
std::ifstream in(filename.c_str());
if (!in) throw File_not_found(filename);
in >> (*this);
}
Config::Config() : m_Delimiter(string(1, '=')), m_Comment(string(1, '#')) {
// Construct a Config without a file; empty
}
bool Config::KeyExists(const string& key) const {
// Indicate whether key is found
mapci p = m_Contents.find(key);
return (p != m_Contents.end());
}
/* static */
void Config::Trim(string* inout_s) {
// Remove leading and trailing whitespace
static const char whitespace[] = " \n\t\v\r\f";
inout_s->erase(0, inout_s->find_first_not_of(whitespace));
inout_s->erase(inout_s->find_last_not_of(whitespace) + 1U);
}
std::ostream& operator<<(std::ostream& os, const Config& cf) {
// Save a Config to os
for (Config::mapci p = cf.m_Contents.begin(); p != cf.m_Contents.end();
++p) {
os << p->first << " " << cf.m_Delimiter << " ";
os << p->second << std::endl;
}
return os;
}
void Config::Remove(const string& key) {
// Remove key and its value
m_Contents.erase(m_Contents.find(key));
return;
}
std::istream& operator>>(std::istream& is, Config& cf) {
// Load a Config from is
// Read in keys and values, keeping internal whitespace
typedef string::size_type pos;
const string& delim = cf.m_Delimiter; // separator
const string& comm = cf.m_Comment; // comment
const pos skip = delim.length(); // length of separator
string nextline = ""; // might need to read ahead to see where value ends
while (is || nextline.length() > 0) {
// Read an entire line at a time
string line;
if (nextline.length() > 0) {
line = nextline; // we read ahead; use it now
nextline = "";
} else {
std::getline(is, line);
}
// Ignore comments
line = line.substr(0, line.find(comm));
// Parse the line if it contains a delimiter
pos delimPos = line.find(delim);
if (delimPos < string::npos) {
// Extract the key
string key = line.substr(0, delimPos);
line.replace(0, delimPos + skip, "");
// See if value continues on the next line
// Stop at blank line, next line with a key, end of stream,
// or end of file sentry
bool terminate = false;
while (!terminate && is) {
std::getline(is, nextline);
terminate = true;
string nlcopy = nextline;
Config::Trim(&nlcopy);
if (nlcopy == "") continue;
nextline = nextline.substr(0, nextline.find(comm));
if (nextline.find(delim) != string::npos) continue;
nlcopy = nextline;
Config::Trim(&nlcopy);
if (nlcopy != "") line += "\n";
line += nextline;
terminate = false;
}
// Store key and value
Config::Trim(&key);
Config::Trim(&line);
cf.m_Contents[key] = line; // overwrites if key is repeated
}
}
return is;
}
bool Config::FileExist(std::string filename) {
bool exist = false;
std::ifstream in(filename.c_str());
if (in) exist = true;
return exist;
}
void Config::ReadFile(string filename, string delimiter, string comment) {
m_Delimiter = delimiter;
m_Comment = comment;
std::ifstream in(filename.c_str());
if (!in) throw File_not_found(filename);
in >> (*this);
}
#pragma endregion ParseIniFIle

@ -3,6 +3,7 @@ add_library(utils
file_utils.cc
math.cc
strings.cc
audio_process.cc
)

@ -0,0 +1,83 @@
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "utils/audio_process.h"
namespace ppspeech{
int WaveformFloatNormal(std::vector<float>* waveform) {
int tot_samples = waveform->size();
for (int i = 0; i < tot_samples; i++) {
(*waveform)[i] = (*waveform)[i] / 32768.0;
}
return 0;
}
int WaveformNormal(std::vector<float>* waveform,
bool wav_normal,
const std::string& wav_normal_type,
float wav_norm_mul_factor) {
if (wav_normal == false) {
return 0;
}
if (wav_normal_type == "linear") {
float amax = INT32_MIN;
for (int i = 0; i < waveform->size(); ++i) {
float tmp = std::abs((*waveform)[i]);
amax = std::max(amax, tmp);
}
float factor = 1.0 / (amax + 1e-8);
for (int i = 0; i < waveform->size(); ++i) {
(*waveform)[i] = (*waveform)[i] * factor * wav_norm_mul_factor;
}
} else if (wav_normal_type == "gaussian") {
double sum = std::accumulate(waveform->begin(), waveform->end(), 0.0);
double mean = sum / waveform->size(); //均值
double accum = 0.0;
std::for_each(waveform->begin(), waveform->end(), [&](const double d) {
accum += (d - mean) * (d - mean);
});
double stdev = sqrt(accum / (waveform->size() - 1)); //方差
stdev = std::max(stdev, 1e-8);
for (int i = 0; i < waveform->size(); ++i) {
(*waveform)[i] =
wav_norm_mul_factor * ((*waveform)[i] - mean) / stdev;
}
} else {
printf("don't support\n");
return -1;
}
return 0;
}
float PowerTodb(float in, float ref_value, float amin, float top_db) {
if (amin <= 0) {
printf("amin must be strictly positive\n");
return -1;
}
if (ref_value <= 0) {
printf("ref_value must be strictly positive\n");
return -1;
}
float out = 10.0 * log10(std::max(amin, in));
out -= 10.0 * log10(std::max(ref_value, amin));
return out;
}
} // namespace ppspeech

@ -0,0 +1,32 @@
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <vector>
#include <string>
#include <algorithm>
#include <numeric>
#include <iomanip>
#include <math.h>
namespace ppspeech{
int WaveformFloatNormal(std::vector<float>* waveform);
int WaveformNormal(std::vector<float>* waveform,
bool wav_normal,
const std::string& wav_normal_type,
float wav_norm_mul_factor);
float PowerTodb(float in,
float ref_value = 1.0,
float amin = 1e-10,
float top_db = 80.0);
} // namespace ppspeech
Loading…
Cancel
Save