diff --git a/speechx/CMakeLists.txt b/speechx/CMakeLists.txt index 1876a4fa..ac3c683d 100644 --- a/speechx/CMakeLists.txt +++ b/speechx/CMakeLists.txt @@ -39,6 +39,7 @@ FetchContent_Declare( GIT_TAG "20210324.1" ) FetchContent_MakeAvailable(absl) +include_directories(${absl_SOURCE_DIR}/absl) # libsndfile include(FetchContent) diff --git a/speechx/speechx/nnet/nnet_interface.h b/speechx/speechx/nnet/nnet_interface.h index e999b8f0..c32774fc 100644 --- a/speechx/speechx/nnet/nnet_interface.h +++ b/speechx/speechx/nnet/nnet_interface.h @@ -1,15 +1,16 @@ #pragma once -#include "" +#include "base/basic_types.h" +#include "kaldi/base/kaldi-types.h" namespace ppspeech { -class NnetForwardInterface { +class NnetInterface { public: virtual ~NnetForwardInterface() {} virtual void FeedForward(const kaldi::Matrix& features, - kaldi::Vector* inference) const = 0; + kaldi::Matrix* inferences) const = 0; }; diff --git a/speechx/speechx/nnet/paddle_nnet.cc b/speechx/speechx/nnet/paddle_nnet.cc new file mode 100644 index 00000000..d6f82619 --- /dev/null +++ b/speechx/speechx/nnet/paddle_nnet.cc @@ -0,0 +1,179 @@ +#include "nnet/paddle_nnet.h" +#include "absl/strings/str_split.h" + +namespace ppspeech { + +void PaddleNnet::init_cache_encouts(const ModelOptions& opts) { + std::vector cache_names; + cache_names = absl::StrSplit(opts.cache_names, ", "); + std::vector cache_shapes; + cache_shapes = absl::StrSplit(opts.cache_shape, ", "); + assert(cache_shapes.size() == cache_names.size()); + + for (size_t i = 0; i < cache_shapes.size(); i++) { + std::vector tmp_shape; + tmp_shape = absl::StrSplit(cache_shapes[i], "- "); + std::vector cur_shape; + std::transform(tmp_shape.begin(), tmp_shape.end(), + std::back_inserter(cur_shape), + [](const std::string& s) { + return atoi(s.c_str()); + }); + cache_names_idx_[cache_names[i]] = i; + std::shared_ptr> cache_eout = std::make_shared>(cur_shape); + cache_encouts_.push_back(cache_eout); + } +} + +PaddleNet::PaddleNnet(const ModelOptions& opts) { + paddle_infer::Config config; + config.SetModel(opts.model_path, opts.params_path); + if (opts.use_gpu) { + config.EnableUseGpu(500, 0); + } + config.SwitchIrOptim(opts.switch_ir_optim); + if (opts.enbale_fc_padding) { + config.DisableFCPadding(); + } + if (opts.enable_profile) { + config.EnableProfile(); + } + pool.reset(new paddle_infer::services::PredictorPool(config, opts.thread_num)); + if (pool == nullptr) { + LOG(ERROR) << "create the predictor pool failed"; + } + pool_usages.resize(num_thread); + std::fill(pool_usages.begin(), pool_usages.end(), false); + LOG(INFO) << "load paddle model success"; + + LOG(INFO) << "start to check the predictor input and output names"; + LOG(INFO) << "input names: " << opts.input_names; + LOG(INFO) << "output names: " << opts.output_names; + vector input_names_vec = absl::StrSplit(opts.input_names, ", "); + vector output_names_vec = absl::StrSplit(opts.output_names, ", "); + paddle_infer::Predictor* predictor = get_predictor(); + + std::vector model_input_names = predictor->GetInputNames(); + assert(input_names_vec.size() == model_input_names.size()); + for (size_t i = 0; i < model_input_names.size(); i++) { + assert(input_names_vec[i] == model_input_names[i]); + } + + std::vector model_output_names = predictor->GetOutputNames(); + assert(output_names_vec.size() == model_output_names.size()); + for (size_t i = 0;i < output_names_vec.size(); i++) { + assert(output_names_vec[i] == model_output_names[i]); + } + release_predictor(predictor); + + init_cache_encouts(opts); +} + +paddle_infer::Predictor* PaddleNnet::get_predictor() { + LOG(INFO) << "attempt to get a new predictor instance " << std::endl; + paddle_infer::Predictor* predictor = nullptr; + std::lock_guard guard(pool_mutex); + int pred_id = 0; + + while (pred_id < pool_usages.size()) { + if (pool_usages[pred_id] == false) { + predictor = pool->Retrive(pred_id); + break; + } + ++pred_id; + } + + if (predictor) { + pool_usages[pred_id] = true; + predictor_to_thread_id[predictor] = pred_id; + LOG(INFO) << pred_id << " predictor create success"; + } else { + LOG(INFO) << "Failed to get predictor from pool !!!"; + } + + return predictor; +} + +int PaddleNnet::ReleasePredictor(paddle_infer::Predictor* predictor) { + LOG(INFO) << "attempt to releae a predictor"; + std::lock_guard guard(pool_mutex); + auto iter = predictor_to_thread_id.find(predictor); + + if (iter == predictor_to_thread_id.end()) { + LOG(INFO) << "there is no such predictor"; + return 0; + } + + LOG(INFO) << iter->second << " predictor will be release"; + pool_usages[iter->second] = false; + predictor_to_thread_id.erase(predictor); + LOG(INFO) << "release success"; + return 0; +} + + + +shared_ptr> PaddleNnet::GetCacheEncoder(const string& name) { + auto iter = cache_names_idx_.find(name); + if (iter == cache_names_idx_.end()) { + return nullptr; + } + assert(iter->second < cache_encouts_.size()); + return cache_encouts_[iter->second].get(); +} + +void PaddleNet::FeedForward(const Matrix& features, Matrix* inferences) const { + + // 1. 得到所有的 input tensor 的名称 + int row = features.NumRows(); + int col = features.NumCols(); + std::vector input_names = predictor->GetInputNames(); + std::vector output_names = predictor->GetOutputNames(); + LOG(INFO) << "feat info: row=" << row << ", col=" << col; + + std::unique_ptr input_tensor = predictor->GetInputHandle(input_names[0]); + std::vector INPUT_SHAPE = {1, row, col}; + input_tensor->Reshape(INPUT_SHAPE); + input_tensor->CopyFromCpu(features.Data()); + // 3. 输入每个音频帧数 + std::unique_ptr input_len = predictor->GetInputHandle(input_names[1]); + std::vector input_len_size = {1}; + input_len->Reshape(input_len_size); + std::vector audio_len; + audio_len.push_back(row); + input_len->CopyFromCpu(audio_len.data()); + // 输入流式的缓存数据 + std::unique_ptr h_box = predictor->GetInputHandle(input_names[2]); + share_ptr> h_cache = GetCacheEncoder(input_names[2])); + h_box->Reshape(h_cache->get_shape()); + h_box->CopyFromCpu(h_cache->get_data().data()); + std::unique_ptr c_box = predictor->GetInputHandle(input_names[3]); + share_ptr> c_cache = GetCacheEncoder(input_names[3]); + c_box->Reshape(c_cache->get_shape()); + c_box->CopyFromCpu(c_cache->get_data().data()); + std::thread::id this_id = std::this_thread::get_id(); + LOG(INFO) << this_id << " start to compute the probability"; + bool success = predictor->Run(); + + if (success == false) { + LOG(INFO) << "predictor run occurs error"; + } + + LOG(INFO) << "get the model success"; + std::unique_ptr h_out = predictor->GetOutputHandle(output_names[2]); + assert(h_cache->get_shape() == h_out->shape()); + h_out->CopyToCpu(h_cache->get_data().data()); + std::unique_ptr c_out = predictor->GetOutputHandle(output_names[3]); + assert(c_cache->get_shape() == c_out->shape()); + c_out->CopyToCpu(c_cache->get_data().data()); + // 5. 得到最后的输出结果 + std::unique_ptr output_tensor = + predictor->GetOutputHandle(output_names[0]); + std::vector output_shape = output_tensor->shape(); + row = output_shape[1]; + col = output_shape[2]; + inference.Resize(row, col); + output_tensor->CopyToCpu(inference.Data()); +} + +} // namespace ppspeech \ No newline at end of file diff --git a/speechx/speechx/nnet/paddle_nnet.h b/speechx/speechx/nnet/paddle_nnet.h new file mode 100644 index 00000000..1b3cad97 --- /dev/null +++ b/speechx/speechx/nnet/paddle_nnet.h @@ -0,0 +1,110 @@ + +#pragma once + +#include "nnet/nnet_interface.h" +#include "base/common.h" +#include "paddle/paddle_inference_api.h" + + +namespace ppspeech { + +struct ModelOptions { + std::string model_path; + std::string params_path; + int thread_num; + bool use_gpu; + bool switch_ir_optim; + std::string input_names; + std::string output_names; + std::string cache_names; + std::string cache_shape; + bool enable_fc_padding; + bool enable_profile; + ModelDecoderOptions() : + model_path("model/final.zip"), + params_path("model/avg_1.jit.pdmodel"), + thread_num(2), + use_gpu(false), + input_names("audio"), + output_names("probs"), + cache_names("enouts"), + cache_shape("1-1-1"), + switch_ir_optim(false), + enable_fc_padding(false), + enable_profile(false) { + } + + void Register(kaldi::OptionsItf* opts) { + opts->Register("model-path", &model_path, "model file path"); + opts->Register("model-params", ¶ms_path, "params model file path"); + opts->Register("thread-num", &thread_num, "thread num"); + opts->Register("use-gpu", &use_gpu, "if use gpu"); + opts->Register("input-names", &input_names, "paddle input names"); + opts->Register("output-names", &output_names, "paddle output names"); + opts->Register("cache-names", &cache_names, "cache names"); + opts->Register("cache-shape", &cache_shape, "cache shape"); + opts->Register("switch-ir-optiom", &switch_ir_optim, "paddle SwitchIrOptim option"); + opts->Register("enable-fc-padding", &enable_fc_padding, "paddle EnableFCPadding option"); + opts->Register("enable-profile", &enable_profile, "paddle EnableProfile option"); + } +}; + + void Register(kaldi::OptionsItf* opts) { + _model_opts.Register(opts); + opts->Register("subsampling-rate", &subsampling_rate, + "subsampling rate for deepspeech model"); + opts->Register("receptive-field-length", &receptive_field_length, + "receptive field length for deepspeech model"); + } +}; + + +template +class Tensor { +public: + Tensor() { + } + Tensor(const std::vector& shape) : + _shape(shape) { + int data_size = std::accumulate(_shape.begin(), _shape.end(), + 1, std::multiplies()); + LOG(INFO) << "data size: " << data_size; + _data.resize(data_size, 0); + } + void reshape(const std::vector& shape) { + _shape = shape; + int data_size = std::accumulate(_shape.begin(), _shape.end(), + 1, std::multiplies()); + _data.resize(data_size, 0); + } + const std::vector& get_shape() const { + return _shape; + } + std::vector& get_data() { + return _data; + } +private: + std::vector _shape; + std::vector _data; +}; + +class PaddleNnet : public NnetInterface { + public: + PaddleNnet(const ModelOptions& opts); + virtual void FeedForward(const kaldi::Matrix& features, + kaldi::Matrix* inferences) const; + std::shared_ptr> GetCacheEncoder(const std::string& name); + void init_cache_encouts(const ModelOptions& opts); + + private: + std::unique_ptr pool; + std::vector pool_usages; + std::mutex pool_mutex; + std::map cache_names_idx_; + std::vector>> cache_encouts_; + + public: + DISALLOW_COPY_AND_ASSIGN(PaddleNnet); +}; + +} // namespace ppspeech