add nnet module

pull/1400/head
SmileGoat 2 years ago
parent 42c8d0dd97
commit e57efcb314

@ -39,6 +39,7 @@ FetchContent_Declare(
GIT_TAG "20210324.1"
)
FetchContent_MakeAvailable(absl)
include_directories(${absl_SOURCE_DIR}/absl)
# libsndfile
include(FetchContent)

@ -1,15 +1,16 @@
#pragma once
#include ""
#include "base/basic_types.h"
#include "kaldi/base/kaldi-types.h"
namespace ppspeech {
class NnetForwardInterface {
class NnetInterface {
public:
virtual ~NnetForwardInterface() {}
virtual void FeedForward(const kaldi::Matrix<BaseFloat>& features,
kaldi::Vector<kaldi::BaseFloat>* inference) const = 0;
kaldi::Matrix<kaldi::BaseFloat>* inferences) const = 0;
};

@ -0,0 +1,179 @@
#include "nnet/paddle_nnet.h"
#include "absl/strings/str_split.h"
namespace ppspeech {
void PaddleNnet::init_cache_encouts(const ModelOptions& opts) {
std::vector<std::string> cache_names;
cache_names = absl::StrSplit(opts.cache_names, ", ");
std::vector<std::string> cache_shapes;
cache_shapes = absl::StrSplit(opts.cache_shape, ", ");
assert(cache_shapes.size() == cache_names.size());
for (size_t i = 0; i < cache_shapes.size(); i++) {
std::vector<std::string> tmp_shape;
tmp_shape = absl::StrSplit(cache_shapes[i], "- ");
std::vector<int> cur_shape;
std::transform(tmp_shape.begin(), tmp_shape.end(),
std::back_inserter(cur_shape),
[](const std::string& s) {
return atoi(s.c_str());
});
cache_names_idx_[cache_names[i]] = i;
std::shared_ptr<Tensor<BaseFloat>> cache_eout = std::make_shared<Tensor<BaseFloat>>(cur_shape);
cache_encouts_.push_back(cache_eout);
}
}
PaddleNet::PaddleNnet(const ModelOptions& opts) {
paddle_infer::Config config;
config.SetModel(opts.model_path, opts.params_path);
if (opts.use_gpu) {
config.EnableUseGpu(500, 0);
}
config.SwitchIrOptim(opts.switch_ir_optim);
if (opts.enbale_fc_padding) {
config.DisableFCPadding();
}
if (opts.enable_profile) {
config.EnableProfile();
}
pool.reset(new paddle_infer::services::PredictorPool(config, opts.thread_num));
if (pool == nullptr) {
LOG(ERROR) << "create the predictor pool failed";
}
pool_usages.resize(num_thread);
std::fill(pool_usages.begin(), pool_usages.end(), false);
LOG(INFO) << "load paddle model success";
LOG(INFO) << "start to check the predictor input and output names";
LOG(INFO) << "input names: " << opts.input_names;
LOG(INFO) << "output names: " << opts.output_names;
vector<string> input_names_vec = absl::StrSplit(opts.input_names, ", ");
vector<string> output_names_vec = absl::StrSplit(opts.output_names, ", ");
paddle_infer::Predictor* predictor = get_predictor();
std::vector<std::string> model_input_names = predictor->GetInputNames();
assert(input_names_vec.size() == model_input_names.size());
for (size_t i = 0; i < model_input_names.size(); i++) {
assert(input_names_vec[i] == model_input_names[i]);
}
std::vector<std::string> model_output_names = predictor->GetOutputNames();
assert(output_names_vec.size() == model_output_names.size());
for (size_t i = 0;i < output_names_vec.size(); i++) {
assert(output_names_vec[i] == model_output_names[i]);
}
release_predictor(predictor);
init_cache_encouts(opts);
}
paddle_infer::Predictor* PaddleNnet::get_predictor() {
LOG(INFO) << "attempt to get a new predictor instance " << std::endl;
paddle_infer::Predictor* predictor = nullptr;
std::lock_guard<std::mutex> guard(pool_mutex);
int pred_id = 0;
while (pred_id < pool_usages.size()) {
if (pool_usages[pred_id] == false) {
predictor = pool->Retrive(pred_id);
break;
}
++pred_id;
}
if (predictor) {
pool_usages[pred_id] = true;
predictor_to_thread_id[predictor] = pred_id;
LOG(INFO) << pred_id << " predictor create success";
} else {
LOG(INFO) << "Failed to get predictor from pool !!!";
}
return predictor;
}
int PaddleNnet::ReleasePredictor(paddle_infer::Predictor* predictor) {
LOG(INFO) << "attempt to releae a predictor";
std::lock_guard<std::mutex> guard(pool_mutex);
auto iter = predictor_to_thread_id.find(predictor);
if (iter == predictor_to_thread_id.end()) {
LOG(INFO) << "there is no such predictor";
return 0;
}
LOG(INFO) << iter->second << " predictor will be release";
pool_usages[iter->second] = false;
predictor_to_thread_id.erase(predictor);
LOG(INFO) << "release success";
return 0;
}
shared_ptr<Tensor<BaseFloat>> PaddleNnet::GetCacheEncoder(const string& name) {
auto iter = cache_names_idx_.find(name);
if (iter == cache_names_idx_.end()) {
return nullptr;
}
assert(iter->second < cache_encouts_.size());
return cache_encouts_[iter->second].get();
}
void PaddleNet::FeedForward(const Matrix<BaseFloat>& features, Matrix<BaseFloat>* inferences) const {
// 1. 得到所有的 input tensor 的名称
int row = features.NumRows();
int col = features.NumCols();
std::vector<std::string> input_names = predictor->GetInputNames();
std::vector<std::string> output_names = predictor->GetOutputNames();
LOG(INFO) << "feat info: row=" << row << ", col=" << col;
std::unique_ptr<paddle_infer::Tensor> input_tensor = predictor->GetInputHandle(input_names[0]);
std::vector<int> INPUT_SHAPE = {1, row, col};
input_tensor->Reshape(INPUT_SHAPE);
input_tensor->CopyFromCpu(features.Data());
// 3. 输入每个音频帧数
std::unique_ptr<paddle_infer::Tensor> input_len = predictor->GetInputHandle(input_names[1]);
std::vector<int> input_len_size = {1};
input_len->Reshape(input_len_size);
std::vector<int64_t> audio_len;
audio_len.push_back(row);
input_len->CopyFromCpu(audio_len.data());
// 输入流式的缓存数据
std::unique_ptr<paddle_infer::Tensor> h_box = predictor->GetInputHandle(input_names[2]);
share_ptr<Tensor<BaseFloat>> h_cache = GetCacheEncoder(input_names[2]));
h_box->Reshape(h_cache->get_shape());
h_box->CopyFromCpu(h_cache->get_data().data());
std::unique_ptr<paddle_infer::Tensor> c_box = predictor->GetInputHandle(input_names[3]);
share_ptr<Tensor<float>> c_cache = GetCacheEncoder(input_names[3]);
c_box->Reshape(c_cache->get_shape());
c_box->CopyFromCpu(c_cache->get_data().data());
std::thread::id this_id = std::this_thread::get_id();
LOG(INFO) << this_id << " start to compute the probability";
bool success = predictor->Run();
if (success == false) {
LOG(INFO) << "predictor run occurs error";
}
LOG(INFO) << "get the model success";
std::unique_ptr<paddle_infer::Tensor> h_out = predictor->GetOutputHandle(output_names[2]);
assert(h_cache->get_shape() == h_out->shape());
h_out->CopyToCpu(h_cache->get_data().data());
std::unique_ptr<paddle_infer::Tensor> c_out = predictor->GetOutputHandle(output_names[3]);
assert(c_cache->get_shape() == c_out->shape());
c_out->CopyToCpu(c_cache->get_data().data());
// 5. 得到最后的输出结果
std::unique_ptr<paddle_infer::Tensor> output_tensor =
predictor->GetOutputHandle(output_names[0]);
std::vector<int> output_shape = output_tensor->shape();
row = output_shape[1];
col = output_shape[2];
inference.Resize(row, col);
output_tensor->CopyToCpu(inference.Data());
}
} // namespace ppspeech

@ -0,0 +1,110 @@
#pragma once
#include "nnet/nnet_interface.h"
#include "base/common.h"
#include "paddle/paddle_inference_api.h"
namespace ppspeech {
struct ModelOptions {
std::string model_path;
std::string params_path;
int thread_num;
bool use_gpu;
bool switch_ir_optim;
std::string input_names;
std::string output_names;
std::string cache_names;
std::string cache_shape;
bool enable_fc_padding;
bool enable_profile;
ModelDecoderOptions() :
model_path("model/final.zip"),
params_path("model/avg_1.jit.pdmodel"),
thread_num(2),
use_gpu(false),
input_names("audio"),
output_names("probs"),
cache_names("enouts"),
cache_shape("1-1-1"),
switch_ir_optim(false),
enable_fc_padding(false),
enable_profile(false) {
}
void Register(kaldi::OptionsItf* opts) {
opts->Register("model-path", &model_path, "model file path");
opts->Register("model-params", &params_path, "params model file path");
opts->Register("thread-num", &thread_num, "thread num");
opts->Register("use-gpu", &use_gpu, "if use gpu");
opts->Register("input-names", &input_names, "paddle input names");
opts->Register("output-names", &output_names, "paddle output names");
opts->Register("cache-names", &cache_names, "cache names");
opts->Register("cache-shape", &cache_shape, "cache shape");
opts->Register("switch-ir-optiom", &switch_ir_optim, "paddle SwitchIrOptim option");
opts->Register("enable-fc-padding", &enable_fc_padding, "paddle EnableFCPadding option");
opts->Register("enable-profile", &enable_profile, "paddle EnableProfile option");
}
};
void Register(kaldi::OptionsItf* opts) {
_model_opts.Register(opts);
opts->Register("subsampling-rate", &subsampling_rate,
"subsampling rate for deepspeech model");
opts->Register("receptive-field-length", &receptive_field_length,
"receptive field length for deepspeech model");
}
};
template<typename T>
class Tensor {
public:
Tensor() {
}
Tensor(const std::vector<int>& shape) :
_shape(shape) {
int data_size = std::accumulate(_shape.begin(), _shape.end(),
1, std::multiplies<int>());
LOG(INFO) << "data size: " << data_size;
_data.resize(data_size, 0);
}
void reshape(const std::vector<int>& shape) {
_shape = shape;
int data_size = std::accumulate(_shape.begin(), _shape.end(),
1, std::multiplies<int>());
_data.resize(data_size, 0);
}
const std::vector<int>& get_shape() const {
return _shape;
}
std::vector<T>& get_data() {
return _data;
}
private:
std::vector<int> _shape;
std::vector<T> _data;
};
class PaddleNnet : public NnetInterface {
public:
PaddleNnet(const ModelOptions& opts);
virtual void FeedForward(const kaldi::Matrix<BaseFloat>& features,
kaldi::Matrix<kaldi::BaseFloat>* inferences) const;
std::shared_ptr<Tensor<kaldi::BaseFloat>> GetCacheEncoder(const std::string& name);
void init_cache_encouts(const ModelOptions& opts);
private:
std::unique_ptr<paddle_infer::services::PredictorPool> pool;
std::vector<bool> pool_usages;
std::mutex pool_mutex;
std::map<std::string, int> cache_names_idx_;
std::vector<std::shared_ptr<Tensor<kaldi::BaseFloat>>> cache_encouts_;
public:
DISALLOW_COPY_AND_ASSIGN(PaddleNnet);
};
} // namespace ppspeech
Loading…
Cancel
Save