parent
42c8d0dd97
commit
e57efcb314
@ -1,15 +1,16 @@
|
|||||||
|
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
#include ""
|
#include "base/basic_types.h"
|
||||||
|
#include "kaldi/base/kaldi-types.h"
|
||||||
|
|
||||||
namespace ppspeech {
|
namespace ppspeech {
|
||||||
|
|
||||||
class NnetForwardInterface {
|
class NnetInterface {
|
||||||
public:
|
public:
|
||||||
virtual ~NnetForwardInterface() {}
|
virtual ~NnetForwardInterface() {}
|
||||||
virtual void FeedForward(const kaldi::Matrix<BaseFloat>& features,
|
virtual void FeedForward(const kaldi::Matrix<BaseFloat>& features,
|
||||||
kaldi::Vector<kaldi::BaseFloat>* inference) const = 0;
|
kaldi::Matrix<kaldi::BaseFloat>* inferences) const = 0;
|
||||||
|
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -0,0 +1,179 @@
|
|||||||
|
#include "nnet/paddle_nnet.h"
|
||||||
|
#include "absl/strings/str_split.h"
|
||||||
|
|
||||||
|
namespace ppspeech {
|
||||||
|
|
||||||
|
void PaddleNnet::init_cache_encouts(const ModelOptions& opts) {
|
||||||
|
std::vector<std::string> cache_names;
|
||||||
|
cache_names = absl::StrSplit(opts.cache_names, ", ");
|
||||||
|
std::vector<std::string> cache_shapes;
|
||||||
|
cache_shapes = absl::StrSplit(opts.cache_shape, ", ");
|
||||||
|
assert(cache_shapes.size() == cache_names.size());
|
||||||
|
|
||||||
|
for (size_t i = 0; i < cache_shapes.size(); i++) {
|
||||||
|
std::vector<std::string> tmp_shape;
|
||||||
|
tmp_shape = absl::StrSplit(cache_shapes[i], "- ");
|
||||||
|
std::vector<int> cur_shape;
|
||||||
|
std::transform(tmp_shape.begin(), tmp_shape.end(),
|
||||||
|
std::back_inserter(cur_shape),
|
||||||
|
[](const std::string& s) {
|
||||||
|
return atoi(s.c_str());
|
||||||
|
});
|
||||||
|
cache_names_idx_[cache_names[i]] = i;
|
||||||
|
std::shared_ptr<Tensor<BaseFloat>> cache_eout = std::make_shared<Tensor<BaseFloat>>(cur_shape);
|
||||||
|
cache_encouts_.push_back(cache_eout);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
PaddleNet::PaddleNnet(const ModelOptions& opts) {
|
||||||
|
paddle_infer::Config config;
|
||||||
|
config.SetModel(opts.model_path, opts.params_path);
|
||||||
|
if (opts.use_gpu) {
|
||||||
|
config.EnableUseGpu(500, 0);
|
||||||
|
}
|
||||||
|
config.SwitchIrOptim(opts.switch_ir_optim);
|
||||||
|
if (opts.enbale_fc_padding) {
|
||||||
|
config.DisableFCPadding();
|
||||||
|
}
|
||||||
|
if (opts.enable_profile) {
|
||||||
|
config.EnableProfile();
|
||||||
|
}
|
||||||
|
pool.reset(new paddle_infer::services::PredictorPool(config, opts.thread_num));
|
||||||
|
if (pool == nullptr) {
|
||||||
|
LOG(ERROR) << "create the predictor pool failed";
|
||||||
|
}
|
||||||
|
pool_usages.resize(num_thread);
|
||||||
|
std::fill(pool_usages.begin(), pool_usages.end(), false);
|
||||||
|
LOG(INFO) << "load paddle model success";
|
||||||
|
|
||||||
|
LOG(INFO) << "start to check the predictor input and output names";
|
||||||
|
LOG(INFO) << "input names: " << opts.input_names;
|
||||||
|
LOG(INFO) << "output names: " << opts.output_names;
|
||||||
|
vector<string> input_names_vec = absl::StrSplit(opts.input_names, ", ");
|
||||||
|
vector<string> output_names_vec = absl::StrSplit(opts.output_names, ", ");
|
||||||
|
paddle_infer::Predictor* predictor = get_predictor();
|
||||||
|
|
||||||
|
std::vector<std::string> model_input_names = predictor->GetInputNames();
|
||||||
|
assert(input_names_vec.size() == model_input_names.size());
|
||||||
|
for (size_t i = 0; i < model_input_names.size(); i++) {
|
||||||
|
assert(input_names_vec[i] == model_input_names[i]);
|
||||||
|
}
|
||||||
|
|
||||||
|
std::vector<std::string> model_output_names = predictor->GetOutputNames();
|
||||||
|
assert(output_names_vec.size() == model_output_names.size());
|
||||||
|
for (size_t i = 0;i < output_names_vec.size(); i++) {
|
||||||
|
assert(output_names_vec[i] == model_output_names[i]);
|
||||||
|
}
|
||||||
|
release_predictor(predictor);
|
||||||
|
|
||||||
|
init_cache_encouts(opts);
|
||||||
|
}
|
||||||
|
|
||||||
|
paddle_infer::Predictor* PaddleNnet::get_predictor() {
|
||||||
|
LOG(INFO) << "attempt to get a new predictor instance " << std::endl;
|
||||||
|
paddle_infer::Predictor* predictor = nullptr;
|
||||||
|
std::lock_guard<std::mutex> guard(pool_mutex);
|
||||||
|
int pred_id = 0;
|
||||||
|
|
||||||
|
while (pred_id < pool_usages.size()) {
|
||||||
|
if (pool_usages[pred_id] == false) {
|
||||||
|
predictor = pool->Retrive(pred_id);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
++pred_id;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (predictor) {
|
||||||
|
pool_usages[pred_id] = true;
|
||||||
|
predictor_to_thread_id[predictor] = pred_id;
|
||||||
|
LOG(INFO) << pred_id << " predictor create success";
|
||||||
|
} else {
|
||||||
|
LOG(INFO) << "Failed to get predictor from pool !!!";
|
||||||
|
}
|
||||||
|
|
||||||
|
return predictor;
|
||||||
|
}
|
||||||
|
|
||||||
|
int PaddleNnet::ReleasePredictor(paddle_infer::Predictor* predictor) {
|
||||||
|
LOG(INFO) << "attempt to releae a predictor";
|
||||||
|
std::lock_guard<std::mutex> guard(pool_mutex);
|
||||||
|
auto iter = predictor_to_thread_id.find(predictor);
|
||||||
|
|
||||||
|
if (iter == predictor_to_thread_id.end()) {
|
||||||
|
LOG(INFO) << "there is no such predictor";
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
LOG(INFO) << iter->second << " predictor will be release";
|
||||||
|
pool_usages[iter->second] = false;
|
||||||
|
predictor_to_thread_id.erase(predictor);
|
||||||
|
LOG(INFO) << "release success";
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
shared_ptr<Tensor<BaseFloat>> PaddleNnet::GetCacheEncoder(const string& name) {
|
||||||
|
auto iter = cache_names_idx_.find(name);
|
||||||
|
if (iter == cache_names_idx_.end()) {
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
assert(iter->second < cache_encouts_.size());
|
||||||
|
return cache_encouts_[iter->second].get();
|
||||||
|
}
|
||||||
|
|
||||||
|
void PaddleNet::FeedForward(const Matrix<BaseFloat>& features, Matrix<BaseFloat>* inferences) const {
|
||||||
|
|
||||||
|
// 1. 得到所有的 input tensor 的名称
|
||||||
|
int row = features.NumRows();
|
||||||
|
int col = features.NumCols();
|
||||||
|
std::vector<std::string> input_names = predictor->GetInputNames();
|
||||||
|
std::vector<std::string> output_names = predictor->GetOutputNames();
|
||||||
|
LOG(INFO) << "feat info: row=" << row << ", col=" << col;
|
||||||
|
|
||||||
|
std::unique_ptr<paddle_infer::Tensor> input_tensor = predictor->GetInputHandle(input_names[0]);
|
||||||
|
std::vector<int> INPUT_SHAPE = {1, row, col};
|
||||||
|
input_tensor->Reshape(INPUT_SHAPE);
|
||||||
|
input_tensor->CopyFromCpu(features.Data());
|
||||||
|
// 3. 输入每个音频帧数
|
||||||
|
std::unique_ptr<paddle_infer::Tensor> input_len = predictor->GetInputHandle(input_names[1]);
|
||||||
|
std::vector<int> input_len_size = {1};
|
||||||
|
input_len->Reshape(input_len_size);
|
||||||
|
std::vector<int64_t> audio_len;
|
||||||
|
audio_len.push_back(row);
|
||||||
|
input_len->CopyFromCpu(audio_len.data());
|
||||||
|
// 输入流式的缓存数据
|
||||||
|
std::unique_ptr<paddle_infer::Tensor> h_box = predictor->GetInputHandle(input_names[2]);
|
||||||
|
share_ptr<Tensor<BaseFloat>> h_cache = GetCacheEncoder(input_names[2]));
|
||||||
|
h_box->Reshape(h_cache->get_shape());
|
||||||
|
h_box->CopyFromCpu(h_cache->get_data().data());
|
||||||
|
std::unique_ptr<paddle_infer::Tensor> c_box = predictor->GetInputHandle(input_names[3]);
|
||||||
|
share_ptr<Tensor<float>> c_cache = GetCacheEncoder(input_names[3]);
|
||||||
|
c_box->Reshape(c_cache->get_shape());
|
||||||
|
c_box->CopyFromCpu(c_cache->get_data().data());
|
||||||
|
std::thread::id this_id = std::this_thread::get_id();
|
||||||
|
LOG(INFO) << this_id << " start to compute the probability";
|
||||||
|
bool success = predictor->Run();
|
||||||
|
|
||||||
|
if (success == false) {
|
||||||
|
LOG(INFO) << "predictor run occurs error";
|
||||||
|
}
|
||||||
|
|
||||||
|
LOG(INFO) << "get the model success";
|
||||||
|
std::unique_ptr<paddle_infer::Tensor> h_out = predictor->GetOutputHandle(output_names[2]);
|
||||||
|
assert(h_cache->get_shape() == h_out->shape());
|
||||||
|
h_out->CopyToCpu(h_cache->get_data().data());
|
||||||
|
std::unique_ptr<paddle_infer::Tensor> c_out = predictor->GetOutputHandle(output_names[3]);
|
||||||
|
assert(c_cache->get_shape() == c_out->shape());
|
||||||
|
c_out->CopyToCpu(c_cache->get_data().data());
|
||||||
|
// 5. 得到最后的输出结果
|
||||||
|
std::unique_ptr<paddle_infer::Tensor> output_tensor =
|
||||||
|
predictor->GetOutputHandle(output_names[0]);
|
||||||
|
std::vector<int> output_shape = output_tensor->shape();
|
||||||
|
row = output_shape[1];
|
||||||
|
col = output_shape[2];
|
||||||
|
inference.Resize(row, col);
|
||||||
|
output_tensor->CopyToCpu(inference.Data());
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace ppspeech
|
@ -0,0 +1,110 @@
|
|||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include "nnet/nnet_interface.h"
|
||||||
|
#include "base/common.h"
|
||||||
|
#include "paddle/paddle_inference_api.h"
|
||||||
|
|
||||||
|
|
||||||
|
namespace ppspeech {
|
||||||
|
|
||||||
|
struct ModelOptions {
|
||||||
|
std::string model_path;
|
||||||
|
std::string params_path;
|
||||||
|
int thread_num;
|
||||||
|
bool use_gpu;
|
||||||
|
bool switch_ir_optim;
|
||||||
|
std::string input_names;
|
||||||
|
std::string output_names;
|
||||||
|
std::string cache_names;
|
||||||
|
std::string cache_shape;
|
||||||
|
bool enable_fc_padding;
|
||||||
|
bool enable_profile;
|
||||||
|
ModelDecoderOptions() :
|
||||||
|
model_path("model/final.zip"),
|
||||||
|
params_path("model/avg_1.jit.pdmodel"),
|
||||||
|
thread_num(2),
|
||||||
|
use_gpu(false),
|
||||||
|
input_names("audio"),
|
||||||
|
output_names("probs"),
|
||||||
|
cache_names("enouts"),
|
||||||
|
cache_shape("1-1-1"),
|
||||||
|
switch_ir_optim(false),
|
||||||
|
enable_fc_padding(false),
|
||||||
|
enable_profile(false) {
|
||||||
|
}
|
||||||
|
|
||||||
|
void Register(kaldi::OptionsItf* opts) {
|
||||||
|
opts->Register("model-path", &model_path, "model file path");
|
||||||
|
opts->Register("model-params", ¶ms_path, "params model file path");
|
||||||
|
opts->Register("thread-num", &thread_num, "thread num");
|
||||||
|
opts->Register("use-gpu", &use_gpu, "if use gpu");
|
||||||
|
opts->Register("input-names", &input_names, "paddle input names");
|
||||||
|
opts->Register("output-names", &output_names, "paddle output names");
|
||||||
|
opts->Register("cache-names", &cache_names, "cache names");
|
||||||
|
opts->Register("cache-shape", &cache_shape, "cache shape");
|
||||||
|
opts->Register("switch-ir-optiom", &switch_ir_optim, "paddle SwitchIrOptim option");
|
||||||
|
opts->Register("enable-fc-padding", &enable_fc_padding, "paddle EnableFCPadding option");
|
||||||
|
opts->Register("enable-profile", &enable_profile, "paddle EnableProfile option");
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
void Register(kaldi::OptionsItf* opts) {
|
||||||
|
_model_opts.Register(opts);
|
||||||
|
opts->Register("subsampling-rate", &subsampling_rate,
|
||||||
|
"subsampling rate for deepspeech model");
|
||||||
|
opts->Register("receptive-field-length", &receptive_field_length,
|
||||||
|
"receptive field length for deepspeech model");
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
template<typename T>
|
||||||
|
class Tensor {
|
||||||
|
public:
|
||||||
|
Tensor() {
|
||||||
|
}
|
||||||
|
Tensor(const std::vector<int>& shape) :
|
||||||
|
_shape(shape) {
|
||||||
|
int data_size = std::accumulate(_shape.begin(), _shape.end(),
|
||||||
|
1, std::multiplies<int>());
|
||||||
|
LOG(INFO) << "data size: " << data_size;
|
||||||
|
_data.resize(data_size, 0);
|
||||||
|
}
|
||||||
|
void reshape(const std::vector<int>& shape) {
|
||||||
|
_shape = shape;
|
||||||
|
int data_size = std::accumulate(_shape.begin(), _shape.end(),
|
||||||
|
1, std::multiplies<int>());
|
||||||
|
_data.resize(data_size, 0);
|
||||||
|
}
|
||||||
|
const std::vector<int>& get_shape() const {
|
||||||
|
return _shape;
|
||||||
|
}
|
||||||
|
std::vector<T>& get_data() {
|
||||||
|
return _data;
|
||||||
|
}
|
||||||
|
private:
|
||||||
|
std::vector<int> _shape;
|
||||||
|
std::vector<T> _data;
|
||||||
|
};
|
||||||
|
|
||||||
|
class PaddleNnet : public NnetInterface {
|
||||||
|
public:
|
||||||
|
PaddleNnet(const ModelOptions& opts);
|
||||||
|
virtual void FeedForward(const kaldi::Matrix<BaseFloat>& features,
|
||||||
|
kaldi::Matrix<kaldi::BaseFloat>* inferences) const;
|
||||||
|
std::shared_ptr<Tensor<kaldi::BaseFloat>> GetCacheEncoder(const std::string& name);
|
||||||
|
void init_cache_encouts(const ModelOptions& opts);
|
||||||
|
|
||||||
|
private:
|
||||||
|
std::unique_ptr<paddle_infer::services::PredictorPool> pool;
|
||||||
|
std::vector<bool> pool_usages;
|
||||||
|
std::mutex pool_mutex;
|
||||||
|
std::map<std::string, int> cache_names_idx_;
|
||||||
|
std::vector<std::shared_ptr<Tensor<kaldi::BaseFloat>>> cache_encouts_;
|
||||||
|
|
||||||
|
public:
|
||||||
|
DISALLOW_COPY_AND_ASSIGN(PaddleNnet);
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace ppspeech
|
Loading…
Reference in new issue