add nnet module

4 years ago · e57efcb314
parent 42c8d0dd97
commit e57efcb314
4 changed files with 294 additions and 3 deletions
--- a/speechx/CMakeLists.txt
+++ b/speechx/CMakeLists.txt
@ -39,6 +39,7 @@ FetchContent_Declare(
  GIT_TAG "20210324.1"
 )
 FetchContent_MakeAvailable(absl)
 include_directories(${absl_SOURCE_DIR}/absl)
 # libsndfile
 include(FetchContent)
--- a/speechx/speechx/nnet/nnet_interface.h
+++ b/speechx/speechx/nnet/nnet_interface.h
@ -1,15 +1,16 @@
 #pragma once
-#include ""
+#include "base/basic_types.h"
 #include "kaldi/base/kaldi-types.h"
 namespace ppspeech {
-class NnetForwardInterface {
+class NnetInterface {
  public:
    virtual ~NnetForwardInterface() {}
    virtual void FeedForward(const kaldi::Matrix<BaseFloat>& features, 
-                             kaldi::Vector<kaldi::BaseFloat>* inference) const = 0;
+                             kaldi::Matrix<kaldi::BaseFloat>* inferences) const = 0;
 };
--- a/speechx/speechx/nnet/paddle_nnet.cc
+++ b/speechx/speechx/nnet/paddle_nnet.cc
@ -0,0 +1,179 @@
 #include "nnet/paddle_nnet.h"
 #include "absl/strings/str_split.h"
 namespace ppspeech {
 void PaddleNnet::init_cache_encouts(const ModelOptions& opts) {
  std::vector<std::string> cache_names;
  cache_names = absl::StrSplit(opts.cache_names, ", ");
  std::vector<std::string> cache_shapes;
  cache_shapes = absl::StrSplit(opts.cache_shape, ", ");
  assert(cache_shapes.size() == cache_names.size());
  for (size_t i = 0; i < cache_shapes.size(); i++) {
    std::vector<std::string> tmp_shape;
    tmp_shape = absl::StrSplit(cache_shapes[i], "- ");
    std::vector<int> cur_shape;
    std::transform(tmp_shape.begin(), tmp_shape.end(),
                    std::back_inserter(cur_shape),
                    [](const std::string& s) {
                        return atoi(s.c_str());
                    });
    cache_names_idx_[cache_names[i]] = i;
    std::shared_ptr<Tensor<BaseFloat>> cache_eout = std::make_shared<Tensor<BaseFloat>>(cur_shape);
    cache_encouts_.push_back(cache_eout);
  }
 }
 PaddleNet::PaddleNnet(const ModelOptions& opts) {
    paddle_infer::Config config;
    config.SetModel(opts.model_path, opts.params_path);
    if (opts.use_gpu) {
      config.EnableUseGpu(500, 0);
    }
    config.SwitchIrOptim(opts.switch_ir_optim);
    if (opts.enbale_fc_padding) {
      config.DisableFCPadding();
    }
    if (opts.enable_profile) {
      config.EnableProfile();
    }
    pool.reset(new paddle_infer::services::PredictorPool(config, opts.thread_num));
    if (pool == nullptr) {
        LOG(ERROR) << "create the predictor pool failed";
    }
    pool_usages.resize(num_thread);
    std::fill(pool_usages.begin(), pool_usages.end(), false);
    LOG(INFO) << "load paddle model success";
    LOG(INFO) << "start to check the predictor input and output names";
    LOG(INFO) << "input names: " << opts.input_names;
    LOG(INFO) << "output names: " << opts.output_names;
    vector<string> input_names_vec = absl::StrSplit(opts.input_names, ", ");
    vector<string> output_names_vec = absl::StrSplit(opts.output_names, ", ");
    paddle_infer::Predictor* predictor = get_predictor();
    std::vector<std::string> model_input_names = predictor->GetInputNames();
    assert(input_names_vec.size() == model_input_names.size());
    for (size_t i = 0; i < model_input_names.size(); i++) {
        assert(input_names_vec[i] == model_input_names[i]);
    }
    std::vector<std::string> model_output_names = predictor->GetOutputNames();
    assert(output_names_vec.size() == model_output_names.size());
    for (size_t i = 0;i < output_names_vec.size(); i++) {
        assert(output_names_vec[i] == model_output_names[i]);
    }
    release_predictor(predictor);
    init_cache_encouts(opts);
 }
 paddle_infer::Predictor* PaddleNnet::get_predictor() {
    LOG(INFO) << "attempt to get a new predictor instance " << std::endl;
    paddle_infer::Predictor* predictor = nullptr;
    std::lock_guard<std::mutex> guard(pool_mutex);
    int pred_id = 0;
    while (pred_id < pool_usages.size()) {
        if (pool_usages[pred_id] == false) {
            predictor = pool->Retrive(pred_id);
            break;
        }
        ++pred_id;
    }
    if (predictor) {
        pool_usages[pred_id] = true;
        predictor_to_thread_id[predictor] = pred_id;
        LOG(INFO) << pred_id << " predictor create success";
    } else {
        LOG(INFO) << "Failed to get predictor from pool !!!";
    }
    return predictor;
 }
 int PaddleNnet::ReleasePredictor(paddle_infer::Predictor* predictor) {
    LOG(INFO) << "attempt to releae a predictor";
    std::lock_guard<std::mutex> guard(pool_mutex);
    auto iter = predictor_to_thread_id.find(predictor);
    if (iter == predictor_to_thread_id.end()) {
        LOG(INFO) << "there is no such predictor";
        return 0;
    }
    LOG(INFO) << iter->second << " predictor will be release";
    pool_usages[iter->second] = false;
    predictor_to_thread_id.erase(predictor);
    LOG(INFO) << "release success";
    return 0;
 }
 shared_ptr<Tensor<BaseFloat>> PaddleNnet::GetCacheEncoder(const string& name) {
  auto iter = cache_names_idx_.find(name);
  if (iter == cache_names_idx_.end()) {
    return nullptr;
  }
  assert(iter->second < cache_encouts_.size());
  return cache_encouts_[iter->second].get(); 
 }
 void PaddleNet::FeedForward(const Matrix<BaseFloat>& features, Matrix<BaseFloat>* inferences) const {
    // 1. 得到所有的 input tensor 的名称
    int row = features.NumRows();
    int col = features.NumCols();
    std::vector<std::string> input_names = predictor->GetInputNames();
    std::vector<std::string> output_names = predictor->GetOutputNames();
    LOG(INFO) << "feat info: row=" << row << ", col=" << col;
    std::unique_ptr<paddle_infer::Tensor> input_tensor = predictor->GetInputHandle(input_names[0]);
    std::vector<int> INPUT_SHAPE = {1, row, col};
    input_tensor->Reshape(INPUT_SHAPE);
    input_tensor->CopyFromCpu(features.Data());
    // 3. 输入每个音频帧数
    std::unique_ptr<paddle_infer::Tensor> input_len = predictor->GetInputHandle(input_names[1]);
    std::vector<int> input_len_size = {1};
    input_len->Reshape(input_len_size);
    std::vector<int64_t> audio_len;
    audio_len.push_back(row);
    input_len->CopyFromCpu(audio_len.data());
    // 输入流式的缓存数据
    std::unique_ptr<paddle_infer::Tensor> h_box = predictor->GetInputHandle(input_names[2]);
    share_ptr<Tensor<BaseFloat>> h_cache = GetCacheEncoder(input_names[2]));
    h_box->Reshape(h_cache->get_shape());
    h_box->CopyFromCpu(h_cache->get_data().data());
    std::unique_ptr<paddle_infer::Tensor> c_box = predictor->GetInputHandle(input_names[3]);
    share_ptr<Tensor<float>> c_cache = GetCacheEncoder(input_names[3]);
    c_box->Reshape(c_cache->get_shape());
    c_box->CopyFromCpu(c_cache->get_data().data());
    std::thread::id this_id = std::this_thread::get_id();
    LOG(INFO) << this_id << " start to compute the probability";
    bool success = predictor->Run();
    if (success == false) {
        LOG(INFO) << "predictor run occurs error";
    }
    LOG(INFO) << "get the model success";
    std::unique_ptr<paddle_infer::Tensor> h_out = predictor->GetOutputHandle(output_names[2]);
    assert(h_cache->get_shape() == h_out->shape());
    h_out->CopyToCpu(h_cache->get_data().data());
    std::unique_ptr<paddle_infer::Tensor> c_out = predictor->GetOutputHandle(output_names[3]);
    assert(c_cache->get_shape() == c_out->shape());
    c_out->CopyToCpu(c_cache->get_data().data());
    // 5. 得到最后的输出结果
    std::unique_ptr<paddle_infer::Tensor> output_tensor =
        predictor->GetOutputHandle(output_names[0]);
    std::vector<int> output_shape = output_tensor->shape();
    row = output_shape[1];
    col = output_shape[2];
    inference.Resize(row, col);
    output_tensor->CopyToCpu(inference.Data());
 }
 } // namespace ppspeech           
--- a/speechx/speechx/nnet/paddle_nnet.h
+++ b/speechx/speechx/nnet/paddle_nnet.h
@ -0,0 +1,110 @@
 #pragma once
 #include "nnet/nnet_interface.h"
 #include "base/common.h"
 #include "paddle/paddle_inference_api.h"
 namespace ppspeech {
 struct ModelOptions {
  std::string model_path;
  std::string params_path;
  int thread_num;
  bool use_gpu;
  bool switch_ir_optim;
  std::string input_names;
  std::string output_names;
  std::string cache_names;
  std::string cache_shape;
  bool enable_fc_padding;
  bool enable_profile;
  ModelDecoderOptions() : 
      model_path("model/final.zip"),
      params_path("model/avg_1.jit.pdmodel"),
      thread_num(2),
      use_gpu(false),
      input_names("audio"),
      output_names("probs"),
      cache_names("enouts"),
      cache_shape("1-1-1"),
      switch_ir_optim(false),
      enable_fc_padding(false),
      enable_profile(false) {
  }
  void Register(kaldi::OptionsItf* opts) {
    opts->Register("model-path", &model_path, "model file path");
    opts->Register("model-params", &params_path, "params model file path");
    opts->Register("thread-num", &thread_num, "thread num");
    opts->Register("use-gpu", &use_gpu, "if use gpu");
    opts->Register("input-names", &input_names, "paddle input names");
    opts->Register("output-names", &output_names, "paddle output names");
    opts->Register("cache-names", &cache_names, "cache names");
    opts->Register("cache-shape", &cache_shape, "cache shape");
    opts->Register("switch-ir-optiom", &switch_ir_optim, "paddle SwitchIrOptim option");
    opts->Register("enable-fc-padding", &enable_fc_padding, "paddle EnableFCPadding option");
    opts->Register("enable-profile", &enable_profile, "paddle EnableProfile option");
  }
 };
    void Register(kaldi::OptionsItf* opts) {
        _model_opts.Register(opts);
        opts->Register("subsampling-rate", &subsampling_rate, 
                       "subsampling rate for deepspeech model");
        opts->Register("receptive-field-length", &receptive_field_length, 
                       "receptive field length for deepspeech model");
    }
 };
 template<typename T>
 class Tensor {
 public:
    Tensor() {
    }
    Tensor(const std::vector<int>& shape) :
        _shape(shape) {
        int data_size = std::accumulate(_shape.begin(), _shape.end(),
                                            1, std::multiplies<int>());
        LOG(INFO) << "data size: " << data_size;
        _data.resize(data_size, 0);
    }
    void reshape(const std::vector<int>& shape) {
        _shape = shape;
        int data_size = std::accumulate(_shape.begin(), _shape.end(),
                                        1, std::multiplies<int>());
        _data.resize(data_size, 0);
    }
    const std::vector<int>& get_shape() const {
        return _shape;
    }
    std::vector<T>& get_data() {
        return _data;
    }
 private:
    std::vector<int> _shape;
    std::vector<T> _data;
 };
 class PaddleNnet : public NnetInterface {
  public:
    PaddleNnet(const ModelOptions& opts);
    virtual void FeedForward(const kaldi::Matrix<BaseFloat>& features, 
                             kaldi::Matrix<kaldi::BaseFloat>* inferences) const;
    std::shared_ptr<Tensor<kaldi::BaseFloat>> GetCacheEncoder(const std::string& name);
    void init_cache_encouts(const ModelOptions& opts); 
  private:
    std::unique_ptr<paddle_infer::services::PredictorPool> pool;
    std::vector<bool> pool_usages;
    std::mutex pool_mutex;
    std::map<std::string, int> cache_names_idx_;
    std::vector<std::shared_ptr<Tensor<kaldi::BaseFloat>>> cache_encouts_;
  public:
    DISALLOW_COPY_AND_ASSIGN(PaddleNnet);
 };
 } // namespace ppspeech