add cls engine (#2923)
parent
2f8aad95e0
commit
78e29c8ec4
@ -0,0 +1,39 @@
|
||||
cmake_minimum_required(VERSION 3.14 FATAL_ERROR)
|
||||
|
||||
set(ARCH "mserver_x86_64" CACHE STRING "Target Architecture:
|
||||
android_arm, android_armv7, android_armv8, android_x86, android_x86_64,
|
||||
mserver_x86_64, ubuntu_x86_64, ios_armv7, ios_armv7s, ios_armv8, ios_x86_64, ios_x86,
|
||||
windows_x86")
|
||||
|
||||
set(CMAKE_VERBOSE_MAKEFILE ON)
|
||||
|
||||
set(FASTDEPLOY_DIR ${CMAKE_SOURCE_DIR}/fc_patch/fastdeploy)
|
||||
if(NOT EXISTS ${FASTDEPLOY_DIR}/fastdeploy-linux-x64-1.0.2.tgz)
|
||||
exec_program("mkdir -p ${FASTDEPLOY_DIR} &&
|
||||
wget https://bj.bcebos.com/fastdeploy/release/cpp/fastdeploy-linux-x64-1.0.2.tgz -P ${FASTDEPLOY_DIR} &&
|
||||
tar xzvf ${FASTDEPLOY_DIR}/fastdeploy-linux-x64-1.0.2.tgz -C ${FASTDEPLOY_DIR} &&
|
||||
mv ${FASTDEPLOY_DIR}/fastdeploy-linux-x64-1.0.2 ${FASTDEPLOY_DIR}/linux-x64")
|
||||
endif()
|
||||
|
||||
if(NOT EXISTS ${FASTDEPLOY_DIR}/fastdeploy-android-1.0.0-shared.tgz)
|
||||
exec_program("mkdir -p ${FASTDEPLOY_DIR} &&
|
||||
wget https://bj.bcebos.com/fastdeploy/release/android/fastdeploy-android-1.0.0-shared.tgz -P ${FASTDEPLOY_DIR} &&
|
||||
tar xzvf ${FASTDEPLOY_DIR}/fastdeploy-android-1.0.0-shared.tgz -C ${FASTDEPLOY_DIR} &&
|
||||
mv ${FASTDEPLOY_DIR}/fastdeploy-android-1.0.0-shared ${FASTDEPLOY_DIR}/android-armv7v8")
|
||||
endif()
|
||||
|
||||
if (ARCH STREQUAL "mserver_x86_64")
|
||||
set(FASTDEPLOY_INSTALL_DIR ${FASTDEPLOY_DIR}/linux-x64)
|
||||
add_definitions("-DUSE_PADDLE_INFERENCE_BACKEND")
|
||||
# add_definitions("-DUSE_ORT_BACKEND")
|
||||
set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -msse -msse2")
|
||||
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -msse -msse2 -mavx -O3")
|
||||
elseif (ARCH STREQUAL "android_armv7")
|
||||
set(FASTDEPLOY_INSTALL_DIR ${FASTDEPLOY_DIR}/android-armv7v8)
|
||||
add_definitions("-DUSE_PADDLE_LITE_BAKEND")
|
||||
set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -g -mfloat-abi=softfp -mfpu=vfpv3 -mfpu=neon -fPIC -pie -fPIE")
|
||||
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -g0 -O3 -mfloat-abi=softfp -mfpu=vfpv3 -mfpu=neon -fPIC -pie -fPIE")
|
||||
endif()
|
||||
|
||||
include(${FASTDEPLOY_INSTALL_DIR}/FastDeploy.cmake)
|
||||
include_directories(${FASTDEPLOY_INCS})
|
@ -0,0 +1,7 @@
|
||||
project(cls)
|
||||
|
||||
include(fastdeploy)
|
||||
# add_definitions("-DTEST_DEBUG")
|
||||
# add_definitions("-DPRINT_TIME")
|
||||
|
||||
add_subdirectory(nnet)
|
@ -0,0 +1,8 @@
|
||||
set(srcs panns_nnet.cc panns_interface.cc)
|
||||
|
||||
add_library(cls SHARED ${srcs})
|
||||
target_link_libraries(cls -static-libstdc++;-Wl,-Bsymbolic ${FASTDEPLOY_LIBS} kaldi-matrix kaldi-base frontend utils)
|
||||
|
||||
set(bin_name panns_nnet_main)
|
||||
add_executable(${bin_name} ${CMAKE_CURRENT_SOURCE_DIR}/${bin_name}.cc)
|
||||
target_link_libraries(${bin_name} -static-libstdc++;-Wl,-Bsymbolic cls gflags glog)
|
@ -0,0 +1,78 @@
|
||||
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "cls/nnet/panns_interface.h"
|
||||
#include "cls/nnet/panns_nnet.h"
|
||||
#include "common/base/config.h"
|
||||
|
||||
namespace ppspeech {
|
||||
|
||||
void* ClsCreateInstance(const char* conf_path) {
|
||||
Config conf(conf_path);
|
||||
// cls init
|
||||
ppspeech::ClsNnetConf cls_nnet_conf;
|
||||
cls_nnet_conf.wav_normal_ = conf.Read("wav_normal", true);
|
||||
cls_nnet_conf.wav_normal_type_ =
|
||||
conf.Read("wav_normal_type", std::string("linear"));
|
||||
cls_nnet_conf.wav_norm_mul_factor_ = conf.Read("wav_norm_mul_factor", 1.0);
|
||||
cls_nnet_conf.model_file_path_ = conf.Read("model_path", std::string(""));
|
||||
cls_nnet_conf.param_file_path_ = conf.Read("param_path", std::string(""));
|
||||
cls_nnet_conf.dict_file_path_ = conf.Read("dict_path", std::string(""));
|
||||
cls_nnet_conf.num_cpu_thread_ = conf.Read("num_cpu_thread", 12);
|
||||
cls_nnet_conf.samp_freq = conf.Read("samp_freq", 32000);
|
||||
cls_nnet_conf.frame_length_ms = conf.Read("frame_length_ms", 32);
|
||||
cls_nnet_conf.frame_shift_ms = conf.Read("frame_shift_ms", 10);
|
||||
cls_nnet_conf.num_bins = conf.Read("num_bins", 64);
|
||||
cls_nnet_conf.low_freq = conf.Read("low_freq", 50);
|
||||
cls_nnet_conf.high_freq = conf.Read("high_freq", 14000);
|
||||
cls_nnet_conf.dither = conf.Read("dither", 0.0);
|
||||
|
||||
ppspeech::ClsNnet* cls_model = new ppspeech::ClsNnet();
|
||||
int ret = cls_model->Init(cls_nnet_conf);
|
||||
return static_cast<void*>(cls_model);
|
||||
}
|
||||
|
||||
int ClsDestroyInstance(void* instance) {
|
||||
ppspeech::ClsNnet* cls_model = static_cast<ppspeech::ClsNnet*>(instance);
|
||||
if (cls_model != NULL) {
|
||||
delete cls_model;
|
||||
cls_model = NULL;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int ClsFeedForward(void* instance,
|
||||
const char* wav_path,
|
||||
int topk,
|
||||
char* result,
|
||||
int result_max_len) {
|
||||
ppspeech::ClsNnet* cls_model = static_cast<ppspeech::ClsNnet*>(instance);
|
||||
if (cls_model == NULL) {
|
||||
printf("instance is null\n");
|
||||
return -1;
|
||||
}
|
||||
int ret = cls_model->Forward(wav_path, topk, result, result_max_len);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int ClsReset(void* instance) {
|
||||
ppspeech::ClsNnet* cls_model = static_cast<ppspeech::ClsNnet*>(instance);
|
||||
if (cls_model == NULL) {
|
||||
printf("instance is null\n");
|
||||
return -1;
|
||||
}
|
||||
cls_model->Reset();
|
||||
return 0;
|
||||
}
|
||||
} // namespace ppspeech
|
@ -0,0 +1,27 @@
|
||||
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#pragma once
|
||||
|
||||
namespace ppspeech {
|
||||
|
||||
void* ClsCreateInstance(const char* conf_path);
|
||||
int ClsDestroyInstance(void* instance);
|
||||
int ClsFeedForward(void* instance,
|
||||
const char* wav_path,
|
||||
int topk,
|
||||
char* result,
|
||||
int result_max_len);
|
||||
int ClsReset(void* instance);
|
||||
} // namespace ppspeech
|
@ -0,0 +1,228 @@
|
||||
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "cls/nnet/panns_nnet.h"
|
||||
#ifdef PRINT_TIME
|
||||
#include "kaldi/base/timer.h"
|
||||
#endif
|
||||
|
||||
namespace ppspeech {
|
||||
|
||||
ClsNnet::ClsNnet() {
|
||||
// wav_reader_ = NULL;
|
||||
runtime_ = NULL;
|
||||
}
|
||||
|
||||
void ClsNnet::Reset() {
|
||||
// wav_reader_->Clear();
|
||||
ss_.str("");
|
||||
}
|
||||
|
||||
int ClsNnet::Init(const ClsNnetConf& conf) {
|
||||
conf_ = conf;
|
||||
// init fbank opts
|
||||
fbank_opts_.frame_opts.samp_freq = conf.samp_freq;
|
||||
fbank_opts_.frame_opts.frame_length_ms = conf.frame_length_ms;
|
||||
fbank_opts_.frame_opts.frame_shift_ms = conf.frame_shift_ms;
|
||||
fbank_opts_.mel_opts.num_bins = conf.num_bins;
|
||||
fbank_opts_.mel_opts.low_freq = conf.low_freq;
|
||||
fbank_opts_.mel_opts.high_freq = conf.high_freq;
|
||||
fbank_opts_.frame_opts.dither = conf.dither;
|
||||
fbank_opts_.use_log_fbank = false;
|
||||
|
||||
// init dict
|
||||
if (conf.dict_file_path_ != "") {
|
||||
ReadFileToVector(conf.dict_file_path_, &dict_);
|
||||
}
|
||||
|
||||
// init model
|
||||
fastdeploy::RuntimeOption runtime_option;
|
||||
|
||||
#ifdef USE_ORT_BACKEND
|
||||
runtime_option.SetModelPath(
|
||||
conf.model_file_path_, "", fastdeploy::ModelFormat::ONNX); // onnx
|
||||
runtime_option.UseOrtBackend(); // onnx
|
||||
#endif
|
||||
#ifdef USE_PADDLE_LITE_BACKEND
|
||||
runtime_option.SetModelPath(conf.model_file_path_,
|
||||
conf.param_file_path_,
|
||||
fastdeploy::ModelFormat::PADDLE);
|
||||
runtime_option.UseLiteBackend();
|
||||
#endif
|
||||
#ifdef USE_PADDLE_INFERENCE_BACKEND
|
||||
runtime_option.SetModelPath(conf.model_file_path_,
|
||||
conf.param_file_path_,
|
||||
fastdeploy::ModelFormat::PADDLE);
|
||||
runtime_option.UsePaddleInferBackend();
|
||||
#endif
|
||||
runtime_option.SetCpuThreadNum(conf.num_cpu_thread_);
|
||||
runtime_option.DeletePaddleBackendPass("simplify_with_basic_ops_pass");
|
||||
runtime_ = std::unique_ptr<fastdeploy::Runtime>(new fastdeploy::Runtime());
|
||||
if (!runtime_->Init(runtime_option)) {
|
||||
std::cerr << "--- Init FastDeploy Runitme Failed! "
|
||||
<< "\n--- Model: " << conf.model_file_path_ << std::endl;
|
||||
return -1;
|
||||
} else {
|
||||
std::cout << "--- Init FastDeploy Runitme Done! "
|
||||
<< "\n--- Model: " << conf.model_file_path_ << std::endl;
|
||||
}
|
||||
|
||||
Reset();
|
||||
return 0;
|
||||
}
|
||||
|
||||
int ClsNnet::Forward(const char* wav_path,
|
||||
int topk,
|
||||
char* result,
|
||||
int result_max_len) {
|
||||
#ifdef PRINT_TIME
|
||||
kaldi::Timer timer;
|
||||
timer.Reset();
|
||||
#endif
|
||||
// read wav
|
||||
std::ifstream infile(wav_path, std::ifstream::in);
|
||||
kaldi::WaveData wave_data;
|
||||
wave_data.Read(infile);
|
||||
int32 this_channel = 0;
|
||||
kaldi::Matrix<float> wavform_kaldi = wave_data.Data();
|
||||
// only get channel 0
|
||||
int wavform_len = wavform_kaldi.NumCols();
|
||||
std::vector<float> wavform(wavform_kaldi.Data(),
|
||||
wavform_kaldi.Data() + wavform_len);
|
||||
WaveformFloatNormal(&wavform);
|
||||
WaveformNormal(&wavform,
|
||||
conf_.wav_normal_,
|
||||
conf_.wav_normal_type_,
|
||||
conf_.wav_norm_mul_factor_);
|
||||
#ifdef TEST_DEBUG
|
||||
{
|
||||
std::ofstream fp("cls.wavform", std::ios::out);
|
||||
for (int i = 0; i < wavform.size(); ++i) {
|
||||
fp << std::setprecision(18) << wavform[i] << " ";
|
||||
}
|
||||
fp << "\n";
|
||||
}
|
||||
#endif
|
||||
#ifdef PRINT_TIME
|
||||
printf("wav read consume: %fs\n", timer.Elapsed());
|
||||
#endif
|
||||
|
||||
#ifdef PRINT_TIME
|
||||
timer.Reset();
|
||||
#endif
|
||||
|
||||
std::vector<float> feats;
|
||||
std::unique_ptr<ppspeech::FrontendInterface> data_source(
|
||||
new ppspeech::DataCache());
|
||||
ppspeech::Fbank fbank(fbank_opts_, std::move(data_source));
|
||||
fbank.Accept(wavform);
|
||||
fbank.SetFinished();
|
||||
fbank.Read(&feats);
|
||||
|
||||
int feat_dim = fbank_opts_.mel_opts.num_bins;
|
||||
int num_frames = feats.size() / feat_dim;
|
||||
|
||||
for (int i = 0; i < num_frames; ++i) {
|
||||
for (int j = 0; j < feat_dim; ++j) {
|
||||
feats[i * feat_dim + j] = PowerTodb(feats[i * feat_dim + j]);
|
||||
}
|
||||
}
|
||||
#ifdef TEST_DEBUG
|
||||
{
|
||||
std::ofstream fp("cls.feat", std::ios::out);
|
||||
for (int i = 0; i < num_frames; ++i) {
|
||||
for (int j = 0; j < feat_dim; ++j) {
|
||||
fp << std::setprecision(18) << feats[i * feat_dim + j] << " ";
|
||||
}
|
||||
fp << "\n";
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#ifdef PRINT_TIME
|
||||
printf("extract fbank consume: %fs\n", timer.Elapsed());
|
||||
#endif
|
||||
|
||||
// infer
|
||||
std::vector<float> model_out;
|
||||
#ifdef PRINT_TIME
|
||||
timer.Reset();
|
||||
#endif
|
||||
ModelForward(feats.data(), num_frames, feat_dim, &model_out);
|
||||
#ifdef PRINT_TIME
|
||||
printf("fast deploy infer consume: %fs\n", timer.Elapsed());
|
||||
#endif
|
||||
#ifdef TEST_DEBUG
|
||||
{
|
||||
std::ofstream fp("cls.logits", std::ios::out);
|
||||
for (int i = 0; i < model_out.size(); ++i) {
|
||||
fp << std::setprecision(18) << model_out[i] << "\n";
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
// construct result str
|
||||
ss_ << "{";
|
||||
GetTopkResult(topk, model_out);
|
||||
ss_ << "}";
|
||||
|
||||
if (result_max_len <= ss_.str().size()) {
|
||||
printf("result_max_len is short than result len\n");
|
||||
}
|
||||
snprintf(result, result_max_len, "%s", ss_.str().c_str());
|
||||
return 0;
|
||||
}
|
||||
|
||||
int ClsNnet::ModelForward(float* features,
|
||||
const int num_frames,
|
||||
const int feat_dim,
|
||||
std::vector<float>* model_out) {
|
||||
// init input tensor shape
|
||||
fastdeploy::TensorInfo info = runtime_->GetInputInfo(0);
|
||||
info.shape = {1, num_frames, feat_dim};
|
||||
|
||||
std::vector<fastdeploy::FDTensor> input_tensors(1);
|
||||
std::vector<fastdeploy::FDTensor> output_tensors(1);
|
||||
|
||||
input_tensors[0].SetExternalData({1, num_frames, feat_dim},
|
||||
fastdeploy::FDDataType::FP32,
|
||||
static_cast<void*>(features));
|
||||
|
||||
// get input name
|
||||
input_tensors[0].name = info.name;
|
||||
|
||||
runtime_->Infer(input_tensors, &output_tensors);
|
||||
|
||||
// output_tensors[0].PrintInfo();
|
||||
std::vector<int64_t> output_shape = output_tensors[0].Shape();
|
||||
model_out->resize(output_shape[0] * output_shape[1]);
|
||||
memcpy(static_cast<void*>(model_out->data()),
|
||||
output_tensors[0].Data(),
|
||||
output_shape[0] * output_shape[1] * sizeof(float));
|
||||
return 0;
|
||||
}
|
||||
|
||||
int ClsNnet::GetTopkResult(int k, const std::vector<float>& model_out) {
|
||||
std::vector<float> values;
|
||||
std::vector<int> indics;
|
||||
TopK(model_out, k, &values, &indics);
|
||||
for (int i = 0; i < k; ++i) {
|
||||
if (i != 0) {
|
||||
ss_ << ",";
|
||||
}
|
||||
ss_ << "\"" << dict_[indics[i]] << "\":\"" << values[i] << "\"";
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
} // namespace ppspeech
|
@ -0,0 +1,74 @@
|
||||
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "common/frontend/data_cache.h"
|
||||
#include "common/frontend/fbank.h"
|
||||
#include "common/frontend/feature-fbank.h"
|
||||
#include "common/frontend/frontend_itf.h"
|
||||
#include "common/frontend/wave-reader.h"
|
||||
#include "common/utils/audio_process.h"
|
||||
#include "common/utils/file_utils.h"
|
||||
#include "fastdeploy/runtime.h"
|
||||
#include "kaldi/util/kaldi-io.h"
|
||||
#include "kaldi/util/table-types.h"
|
||||
|
||||
namespace ppspeech {
|
||||
struct ClsNnetConf {
|
||||
// wav
|
||||
bool wav_normal_;
|
||||
std::string wav_normal_type_;
|
||||
float wav_norm_mul_factor_;
|
||||
// model
|
||||
std::string model_file_path_;
|
||||
std::string param_file_path_;
|
||||
std::string dict_file_path_;
|
||||
int num_cpu_thread_;
|
||||
// fbank
|
||||
float samp_freq;
|
||||
float frame_length_ms;
|
||||
float frame_shift_ms;
|
||||
int num_bins;
|
||||
float low_freq;
|
||||
float high_freq;
|
||||
float dither;
|
||||
};
|
||||
|
||||
class ClsNnet {
|
||||
public:
|
||||
ClsNnet();
|
||||
int Init(const ClsNnetConf& conf);
|
||||
int Forward(const char* wav_path,
|
||||
int topk,
|
||||
char* result,
|
||||
int result_max_len);
|
||||
void Reset();
|
||||
|
||||
private:
|
||||
int ModelForward(float* features,
|
||||
const int num_frames,
|
||||
const int feat_dim,
|
||||
std::vector<float>* model_out);
|
||||
int ModelForwardStream(std::vector<float>* feats);
|
||||
int GetTopkResult(int k, const std::vector<float>& model_out);
|
||||
|
||||
ClsNnetConf conf_;
|
||||
knf::FbankOptions fbank_opts_;
|
||||
std::unique_ptr<fastdeploy::Runtime> runtime_;
|
||||
std::vector<std::string> dict_;
|
||||
std::stringstream ss_;
|
||||
};
|
||||
|
||||
} // namespace ppspeech
|
@ -0,0 +1,49 @@
|
||||
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include <fstream>
|
||||
#include <string>
|
||||
#include "base/flags.h"
|
||||
#include "cls/nnet/panns_interface.h"
|
||||
|
||||
DEFINE_string(conf_path, "", "config path");
|
||||
DEFINE_string(scp_path, "", "wav scp path");
|
||||
DEFINE_string(topk, "", "print topk results");
|
||||
|
||||
int main(int argc, char* argv[]) {
|
||||
gflags::SetUsageMessage("Usage:");
|
||||
gflags::ParseCommandLineFlags(&argc, &argv, false);
|
||||
google::InitGoogleLogging(argv[0]);
|
||||
google::InstallFailureSignalHandler();
|
||||
FLAGS_logtostderr = 1;
|
||||
CHECK_GT(FLAGS_conf_path.size(), 0);
|
||||
CHECK_GT(FLAGS_scp_path.size(), 0);
|
||||
CHECK_GT(FLAGS_topk.size(), 0);
|
||||
void* instance = ppspeech::ClsCreateInstance(FLAGS_conf_path.c_str());
|
||||
int ret = 0;
|
||||
// read wav
|
||||
std::ifstream ifs(FLAGS_scp_path);
|
||||
std::string line = "";
|
||||
int topk = std::atoi(FLAGS_topk.c_str());
|
||||
while (getline(ifs, line)) {
|
||||
// read wav
|
||||
char result[1024] = {0};
|
||||
ret = ppspeech::ClsFeedForward(
|
||||
instance, line.c_str(), topk, result, 1024);
|
||||
printf("%s %s\n", line.c_str(), result);
|
||||
ret = ppspeech::ClsReset(instance);
|
||||
}
|
||||
ret = ppspeech::ClsDestroyInstance(instance);
|
||||
return 0;
|
||||
}
|
@ -0,0 +1,338 @@
|
||||
// Copyright (c) code is from
|
||||
// https://blog.csdn.net/huixingshao/article/details/45969887.
|
||||
|
||||
#include <fstream>
|
||||
#include <iostream>
|
||||
#include <map>
|
||||
#include <sstream>
|
||||
#include <string>
|
||||
using namespace std;
|
||||
|
||||
#pragma once
|
||||
|
||||
#pragma region ParseIniFile
|
||||
/*
|
||||
* \brief Generic configuration Class
|
||||
*
|
||||
*/
|
||||
class Config {
|
||||
// Data
|
||||
protected:
|
||||
std::string m_Delimiter; //!< separator between key and value
|
||||
std::string m_Comment; //!< separator between value and comments
|
||||
std::map<std::string, std::string>
|
||||
m_Contents; //!< extracted keys and values
|
||||
|
||||
typedef std::map<std::string, std::string>::iterator mapi;
|
||||
typedef std::map<std::string, std::string>::const_iterator mapci;
|
||||
// Methods
|
||||
public:
|
||||
Config(std::string filename,
|
||||
std::string delimiter = "=",
|
||||
std::string comment = "#");
|
||||
Config();
|
||||
template <class T>
|
||||
T Read(const std::string& in_key) const; //!<Search for key and read value
|
||||
//! or optional default value, call
|
||||
//! as read<T>
|
||||
template <class T>
|
||||
T Read(const std::string& in_key, const T& in_value) const;
|
||||
template <class T>
|
||||
bool ReadInto(T* out_var, const std::string& in_key) const;
|
||||
template <class T>
|
||||
bool ReadInto(T* out_var,
|
||||
const std::string& in_key,
|
||||
const T& in_value) const;
|
||||
bool FileExist(std::string filename);
|
||||
void ReadFile(std::string filename,
|
||||
std::string delimiter = "=",
|
||||
std::string comment = "#");
|
||||
|
||||
// Check whether key exists in configuration
|
||||
bool KeyExists(const std::string& in_key) const;
|
||||
|
||||
// Modify keys and values
|
||||
template <class T>
|
||||
void Add(const std::string& in_key, const T& in_value);
|
||||
void Remove(const std::string& in_key);
|
||||
|
||||
// Check or change configuration syntax
|
||||
std::string GetDelimiter() const { return m_Delimiter; }
|
||||
std::string GetComment() const { return m_Comment; }
|
||||
std::string SetDelimiter(const std::string& in_s) {
|
||||
std::string old = m_Delimiter;
|
||||
m_Delimiter = in_s;
|
||||
return old;
|
||||
}
|
||||
std::string SetComment(const std::string& in_s) {
|
||||
std::string old = m_Comment;
|
||||
m_Comment = in_s;
|
||||
return old;
|
||||
}
|
||||
|
||||
// Write or read configuration
|
||||
friend std::ostream& operator<<(std::ostream& os, const Config& cf);
|
||||
friend std::istream& operator>>(std::istream& is, Config& cf);
|
||||
|
||||
protected:
|
||||
template <class T>
|
||||
static std::string T_as_string(const T& t);
|
||||
template <class T>
|
||||
static T string_as_T(const std::string& s);
|
||||
static void Trim(std::string* inout_s);
|
||||
|
||||
|
||||
// Exception types
|
||||
public:
|
||||
struct File_not_found {
|
||||
std::string filename;
|
||||
explicit File_not_found(const std::string& filename_ = std::string())
|
||||
: filename(filename_) {}
|
||||
};
|
||||
struct Key_not_found { // thrown only by T read(key) variant of read()
|
||||
std::string key;
|
||||
explicit Key_not_found(const std::string& key_ = std::string())
|
||||
: key(key_) {}
|
||||
};
|
||||
};
|
||||
|
||||
/* static */
|
||||
template <class T>
|
||||
std::string Config::T_as_string(const T& t) {
|
||||
// Convert from a T to a string
|
||||
// Type T must support << operator
|
||||
std::ostringstream ost;
|
||||
ost << t;
|
||||
return ost.str();
|
||||
}
|
||||
|
||||
|
||||
/* static */
|
||||
template <class T>
|
||||
T Config::string_as_T(const std::string& s) {
|
||||
// Convert from a string to a T
|
||||
// Type T must support >> operator
|
||||
T t;
|
||||
std::istringstream ist(s);
|
||||
ist >> t;
|
||||
return t;
|
||||
}
|
||||
|
||||
|
||||
/* static */
|
||||
template <>
|
||||
inline std::string Config::string_as_T<std::string>(const std::string& s) {
|
||||
// Convert from a string to a string
|
||||
// In other words, do nothing
|
||||
return s;
|
||||
}
|
||||
|
||||
|
||||
/* static */
|
||||
template <>
|
||||
inline bool Config::string_as_T<bool>(const std::string& s) {
|
||||
// Convert from a string to a bool
|
||||
// Interpret "false", "F", "no", "n", "0" as false
|
||||
// Interpret "true", "T", "yes", "y", "1", "-1", or anything else as true
|
||||
bool b = true;
|
||||
std::string sup = s;
|
||||
for (std::string::iterator p = sup.begin(); p != sup.end(); ++p)
|
||||
*p = toupper(*p); // make string all caps
|
||||
if (sup == std::string("FALSE") || sup == std::string("F") ||
|
||||
sup == std::string("NO") || sup == std::string("N") ||
|
||||
sup == std::string("0") || sup == std::string("NONE"))
|
||||
b = false;
|
||||
return b;
|
||||
}
|
||||
|
||||
|
||||
template <class T>
|
||||
T Config::Read(const std::string& key) const {
|
||||
// Read the value corresponding to key
|
||||
mapci p = m_Contents.find(key);
|
||||
if (p == m_Contents.end()) throw Key_not_found(key);
|
||||
return string_as_T<T>(p->second);
|
||||
}
|
||||
|
||||
|
||||
template <class T>
|
||||
T Config::Read(const std::string& key, const T& value) const {
|
||||
// Return the value corresponding to key or given default value
|
||||
// if key is not found
|
||||
mapci p = m_Contents.find(key);
|
||||
if (p == m_Contents.end()) {
|
||||
printf("%s = %s(default)\n", key.c_str(), T_as_string(value).c_str());
|
||||
return value;
|
||||
} else {
|
||||
printf("%s = %s\n", key.c_str(), T_as_string(p->second).c_str());
|
||||
return string_as_T<T>(p->second);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
template <class T>
|
||||
bool Config::ReadInto(T* var, const std::string& key) const {
|
||||
// Get the value corresponding to key and store in var
|
||||
// Return true if key is found
|
||||
// Otherwise leave var untouched
|
||||
mapci p = m_Contents.find(key);
|
||||
bool found = (p != m_Contents.end());
|
||||
if (found) *var = string_as_T<T>(p->second);
|
||||
return found;
|
||||
}
|
||||
|
||||
|
||||
template <class T>
|
||||
bool Config::ReadInto(T* var, const std::string& key, const T& value) const {
|
||||
// Get the value corresponding to key and store in var
|
||||
// Return true if key is found
|
||||
// Otherwise set var to given default
|
||||
mapci p = m_Contents.find(key);
|
||||
bool found = (p != m_Contents.end());
|
||||
if (found)
|
||||
*var = string_as_T<T>(p->second);
|
||||
else
|
||||
var = value;
|
||||
return found;
|
||||
}
|
||||
|
||||
|
||||
template <class T>
|
||||
void Config::Add(const std::string& in_key, const T& value) {
|
||||
// Add a key with given value
|
||||
std::string v = T_as_string(value);
|
||||
std::string key = in_key;
|
||||
Trim(&key);
|
||||
Trim(&v);
|
||||
m_Contents[key] = v;
|
||||
return;
|
||||
}
|
||||
|
||||
Config::Config(string filename, string delimiter, string comment)
|
||||
: m_Delimiter(delimiter), m_Comment(comment) {
|
||||
// Construct a Config, getting keys and values from given file
|
||||
|
||||
std::ifstream in(filename.c_str());
|
||||
|
||||
if (!in) throw File_not_found(filename);
|
||||
|
||||
in >> (*this);
|
||||
}
|
||||
|
||||
|
||||
Config::Config() : m_Delimiter(string(1, '=')), m_Comment(string(1, '#')) {
|
||||
// Construct a Config without a file; empty
|
||||
}
|
||||
|
||||
|
||||
bool Config::KeyExists(const string& key) const {
|
||||
// Indicate whether key is found
|
||||
mapci p = m_Contents.find(key);
|
||||
return (p != m_Contents.end());
|
||||
}
|
||||
|
||||
|
||||
/* static */
|
||||
void Config::Trim(string* inout_s) {
|
||||
// Remove leading and trailing whitespace
|
||||
static const char whitespace[] = " \n\t\v\r\f";
|
||||
inout_s->erase(0, inout_s->find_first_not_of(whitespace));
|
||||
inout_s->erase(inout_s->find_last_not_of(whitespace) + 1U);
|
||||
}
|
||||
|
||||
|
||||
std::ostream& operator<<(std::ostream& os, const Config& cf) {
|
||||
// Save a Config to os
|
||||
for (Config::mapci p = cf.m_Contents.begin(); p != cf.m_Contents.end();
|
||||
++p) {
|
||||
os << p->first << " " << cf.m_Delimiter << " ";
|
||||
os << p->second << std::endl;
|
||||
}
|
||||
return os;
|
||||
}
|
||||
|
||||
void Config::Remove(const string& key) {
|
||||
// Remove key and its value
|
||||
m_Contents.erase(m_Contents.find(key));
|
||||
return;
|
||||
}
|
||||
|
||||
std::istream& operator>>(std::istream& is, Config& cf) {
|
||||
// Load a Config from is
|
||||
// Read in keys and values, keeping internal whitespace
|
||||
typedef string::size_type pos;
|
||||
const string& delim = cf.m_Delimiter; // separator
|
||||
const string& comm = cf.m_Comment; // comment
|
||||
const pos skip = delim.length(); // length of separator
|
||||
|
||||
string nextline = ""; // might need to read ahead to see where value ends
|
||||
|
||||
while (is || nextline.length() > 0) {
|
||||
// Read an entire line at a time
|
||||
string line;
|
||||
if (nextline.length() > 0) {
|
||||
line = nextline; // we read ahead; use it now
|
||||
nextline = "";
|
||||
} else {
|
||||
std::getline(is, line);
|
||||
}
|
||||
|
||||
// Ignore comments
|
||||
line = line.substr(0, line.find(comm));
|
||||
|
||||
// Parse the line if it contains a delimiter
|
||||
pos delimPos = line.find(delim);
|
||||
if (delimPos < string::npos) {
|
||||
// Extract the key
|
||||
string key = line.substr(0, delimPos);
|
||||
line.replace(0, delimPos + skip, "");
|
||||
|
||||
// See if value continues on the next line
|
||||
// Stop at blank line, next line with a key, end of stream,
|
||||
// or end of file sentry
|
||||
bool terminate = false;
|
||||
while (!terminate && is) {
|
||||
std::getline(is, nextline);
|
||||
terminate = true;
|
||||
|
||||
string nlcopy = nextline;
|
||||
Config::Trim(&nlcopy);
|
||||
if (nlcopy == "") continue;
|
||||
|
||||
nextline = nextline.substr(0, nextline.find(comm));
|
||||
if (nextline.find(delim) != string::npos) continue;
|
||||
|
||||
nlcopy = nextline;
|
||||
Config::Trim(&nlcopy);
|
||||
if (nlcopy != "") line += "\n";
|
||||
line += nextline;
|
||||
terminate = false;
|
||||
}
|
||||
|
||||
// Store key and value
|
||||
Config::Trim(&key);
|
||||
Config::Trim(&line);
|
||||
cf.m_Contents[key] = line; // overwrites if key is repeated
|
||||
}
|
||||
}
|
||||
|
||||
return is;
|
||||
}
|
||||
bool Config::FileExist(std::string filename) {
|
||||
bool exist = false;
|
||||
std::ifstream in(filename.c_str());
|
||||
if (in) exist = true;
|
||||
return exist;
|
||||
}
|
||||
|
||||
void Config::ReadFile(string filename, string delimiter, string comment) {
|
||||
m_Delimiter = delimiter;
|
||||
m_Comment = comment;
|
||||
std::ifstream in(filename.c_str());
|
||||
|
||||
if (!in) throw File_not_found(filename);
|
||||
|
||||
in >> (*this);
|
||||
}
|
||||
|
||||
#pragma endregion ParseIniFIle
|
@ -0,0 +1,83 @@
|
||||
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "utils/audio_process.h"
|
||||
|
||||
namespace ppspeech{
|
||||
|
||||
int WaveformFloatNormal(std::vector<float>* waveform) {
|
||||
int tot_samples = waveform->size();
|
||||
for (int i = 0; i < tot_samples; i++) {
|
||||
(*waveform)[i] = (*waveform)[i] / 32768.0;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int WaveformNormal(std::vector<float>* waveform,
|
||||
bool wav_normal,
|
||||
const std::string& wav_normal_type,
|
||||
float wav_norm_mul_factor) {
|
||||
if (wav_normal == false) {
|
||||
return 0;
|
||||
}
|
||||
if (wav_normal_type == "linear") {
|
||||
float amax = INT32_MIN;
|
||||
for (int i = 0; i < waveform->size(); ++i) {
|
||||
float tmp = std::abs((*waveform)[i]);
|
||||
amax = std::max(amax, tmp);
|
||||
}
|
||||
float factor = 1.0 / (amax + 1e-8);
|
||||
for (int i = 0; i < waveform->size(); ++i) {
|
||||
(*waveform)[i] = (*waveform)[i] * factor * wav_norm_mul_factor;
|
||||
}
|
||||
} else if (wav_normal_type == "gaussian") {
|
||||
double sum = std::accumulate(waveform->begin(), waveform->end(), 0.0);
|
||||
double mean = sum / waveform->size(); //均值
|
||||
|
||||
double accum = 0.0;
|
||||
std::for_each(waveform->begin(), waveform->end(), [&](const double d) {
|
||||
accum += (d - mean) * (d - mean);
|
||||
});
|
||||
|
||||
double stdev = sqrt(accum / (waveform->size() - 1)); //方差
|
||||
stdev = std::max(stdev, 1e-8);
|
||||
|
||||
for (int i = 0; i < waveform->size(); ++i) {
|
||||
(*waveform)[i] =
|
||||
wav_norm_mul_factor * ((*waveform)[i] - mean) / stdev;
|
||||
}
|
||||
} else {
|
||||
printf("don't support\n");
|
||||
return -1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
float PowerTodb(float in, float ref_value, float amin, float top_db) {
|
||||
if (amin <= 0) {
|
||||
printf("amin must be strictly positive\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (ref_value <= 0) {
|
||||
printf("ref_value must be strictly positive\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
float out = 10.0 * log10(std::max(amin, in));
|
||||
out -= 10.0 * log10(std::max(ref_value, amin));
|
||||
return out;
|
||||
}
|
||||
|
||||
} // namespace ppspeech
|
@ -0,0 +1,32 @@
|
||||
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include <algorithm>
|
||||
#include <numeric>
|
||||
#include <iomanip>
|
||||
#include <math.h>
|
||||
|
||||
namespace ppspeech{
|
||||
int WaveformFloatNormal(std::vector<float>* waveform);
|
||||
int WaveformNormal(std::vector<float>* waveform,
|
||||
bool wav_normal,
|
||||
const std::string& wav_normal_type,
|
||||
float wav_norm_mul_factor);
|
||||
float PowerTodb(float in,
|
||||
float ref_value = 1.0,
|
||||
float amin = 1e-10,
|
||||
float top_db = 80.0);
|
||||
} // namespace ppspeech
|
Loading…
Reference in new issue