diff --git a/speechx/CMakeLists.txt b/speechx/CMakeLists.txt index 083e180da..f1330d1da 100644 --- a/speechx/CMakeLists.txt +++ b/speechx/CMakeLists.txt @@ -117,6 +117,7 @@ set(MKLDNN_PATH "${PADDLE_LIB_THIRD_PARTY_PATH}mkldnn") include_directories("${MKLDNN_PATH}/include") set(MKLDNN_LIB ${MKLDNN_PATH}/lib/libmkldnn.so.0) set(EXTERNAL_LIB "-lrt -ldl -lpthread") + set(DEPS ${PADDLE_LIB}/paddle/lib/libpaddle_inference${CMAKE_SHARED_LIBRARY_SUFFIX}) set(DEPS ${DEPS} ${MATH_LIB} ${MKLDNN_LIB} @@ -137,4 +138,7 @@ set(DEPS ${DEPS} #target_link_libraries(lib_name item0 item1) #add_dependencies(lib_name depend-target) -add_subdirectory(speechx) \ No newline at end of file +set(SPEECHX_ROOT ${CMAKE_CURRENT_SOURCE_DIR}/speechx) + +add_subdirectory(speechx) +add_subdirectory(examples) \ No newline at end of file diff --git a/speechx/build.sh b/speechx/build.sh index b34063871..3e9600d53 100755 --- a/speechx/build.sh +++ b/speechx/build.sh @@ -16,7 +16,7 @@ if [ ! -d ${boost_SOURCE_DIR} ]; then wget -c https://boostorg.jfrog.io/artifact echo -e "\n" fi -rm -rf build +#rm -rf build mkdir -p build cd build diff --git a/speechx/cmake/external/openblas.cmake b/speechx/cmake/external/openblas.cmake index 14e171952..3c202f7f6 100644 --- a/speechx/cmake/external/openblas.cmake +++ b/speechx/cmake/external/openblas.cmake @@ -18,6 +18,8 @@ ExternalProject_Add( SOURCE_DIR ${OpenBLAS_SOURCE_DIR} CMAKE_ARGS -DCMAKE_INSTALL_PREFIX= CMAKE_GENERATOR "Unix Makefiles") + + # https://cmake.org/cmake/help/latest/module/ExternalProject.html?highlight=externalproject_get_property#external-project-definition ExternalProject_Get_Property(OPENBLAS INSTALL_DIR) set(OpenBLAS_INSTALL_PREFIX ${INSTALL_DIR}) diff --git a/speechx/examples/.gitkeep b/speechx/examples/.gitkeep deleted file mode 100644 index e69de29bb..000000000 diff --git a/speechx/examples/CMakeLists.txt b/speechx/examples/CMakeLists.txt new file mode 100644 index 000000000..ef0a72b88 --- /dev/null +++ b/speechx/examples/CMakeLists.txt @@ -0,0 +1,5 @@ +cmake_minimum_required(VERSION 3.14 FATAL_ERROR) + +add_subdirectory(feat) +add_subdirectory(nnet) +add_subdirectory(decoder) diff --git a/speechx/examples/README.md b/speechx/examples/README.md new file mode 100644 index 000000000..7e26da626 --- /dev/null +++ b/speechx/examples/README.md @@ -0,0 +1,6 @@ +# Examples + +* decoder - offline decoder +* feat - mfcc, linear +* nnet - ds2 nn + diff --git a/speechx/examples/decoder/CMakeLists.txt b/speechx/examples/decoder/CMakeLists.txt new file mode 100644 index 000000000..cf90d0949 --- /dev/null +++ b/speechx/examples/decoder/CMakeLists.txt @@ -0,0 +1,5 @@ +cmake_minimum_required(VERSION 3.14 FATAL_ERROR) + +add_executable(offline-decoder-main ${CMAKE_CURRENT_SOURCE_DIR}/offline-decoder-main.cc) +target_include_directories(offline-decoder-main PRIVATE ${SPEECHX_ROOT} ${SPEECHX_ROOT}/kaldi) +target_link_libraries(offline-decoder-main PUBLIC nnet decoder fst utils gflags glog kaldi-base kaldi-matrix kaldi-util ${DEPS}) \ No newline at end of file diff --git a/speechx/speechx/codelab/decoder_test/offline_decoder_main.cc b/speechx/examples/decoder/offline-decoder-main.cc similarity index 100% rename from speechx/speechx/codelab/decoder_test/offline_decoder_main.cc rename to speechx/examples/decoder/offline-decoder-main.cc diff --git a/speechx/examples/feat/CMakeLists.txt b/speechx/examples/feat/CMakeLists.txt new file mode 100644 index 000000000..44738e60b --- /dev/null +++ b/speechx/examples/feat/CMakeLists.txt @@ -0,0 +1,10 @@ +cmake_minimum_required(VERSION 3.14 FATAL_ERROR) + + +add_executable(mfcc-test ${CMAKE_CURRENT_SOURCE_DIR}/feature-mfcc-test.cc) +target_include_directories(mfcc-test PRIVATE ${SPEECHX_ROOT} ${SPEECHX_ROOT}/kaldi) +target_link_libraries(mfcc-test kaldi-mfcc) + +add_executable(linear-spectrogram-main ${CMAKE_CURRENT_SOURCE_DIR}/linear-spectrogram-main.cc) +target_include_directories(linear-spectrogram-main PRIVATE ${SPEECHX_ROOT} ${SPEECHX_ROOT}/kaldi) +target_link_libraries(linear-spectrogram-main frontend kaldi-util kaldi-feat-common gflags glog) \ No newline at end of file diff --git a/speechx/speechx/codelab/feat_test/feature-mfcc-test.cc b/speechx/examples/feat/feature-mfcc-test.cc similarity index 100% rename from speechx/speechx/codelab/feat_test/feature-mfcc-test.cc rename to speechx/examples/feat/feature-mfcc-test.cc diff --git a/speechx/speechx/codelab/feat_test/linear_spectrogram_main.cc b/speechx/examples/feat/linear-spectrogram-main.cc similarity index 100% rename from speechx/speechx/codelab/feat_test/linear_spectrogram_main.cc rename to speechx/examples/feat/linear-spectrogram-main.cc diff --git a/speechx/examples/nnet/CMakeLists.txt b/speechx/examples/nnet/CMakeLists.txt new file mode 100644 index 000000000..20f4008ce --- /dev/null +++ b/speechx/examples/nnet/CMakeLists.txt @@ -0,0 +1,5 @@ +cmake_minimum_required(VERSION 3.14 FATAL_ERROR) + +add_executable(pp-model-test ${CMAKE_CURRENT_SOURCE_DIR}/pp-model-test.cc) +target_include_directories(pp-model-test PRIVATE ${SPEECHX_ROOT} ${SPEECHX_ROOT}/kaldi) +target_link_libraries(pp-model-test PUBLIC nnet gflags ${DEPS}) \ No newline at end of file diff --git a/speechx/speechx/codelab/nnet_test/model_test.cc b/speechx/examples/nnet/pp-model-test.cc similarity index 81% rename from speechx/speechx/codelab/nnet_test/model_test.cc rename to speechx/examples/nnet/pp-model-test.cc index ce1e7fffd..b0e777215 100644 --- a/speechx/speechx/codelab/nnet_test/model_test.cc +++ b/speechx/examples/nnet/pp-model-test.cc @@ -8,13 +8,30 @@ #include #include +using std::cout; +using std::endl; + +DEFINE_string(model_path, "avg_1.jit.pdmodel", "xxx.pdmodel"); +DEFINE_string(param_path, "avg_1.jit.pdiparams", "xxx.pdiparams"); + + void produce_data(std::vector>* data); void model_forward_test(); -int main(int argc, char* argv[]) { - gflags::ParseCommandLineFlags(&argc, &argv, true); - model_forward_test(); - return 0; +void produce_data(std::vector>* data) { + int chunk_size = 35; // chunk_size in frame + int col_size = 161; // feat dim + cout << "chunk size: " << chunk_size << endl; + cout << "feat dim: " << col_size << endl; + + data->reserve(chunk_size); + data->back().reserve(col_size); + for (int row = 0; row < chunk_size; ++row) { + data->push_back(std::vector()); + for (int col_idx = 0; col_idx < col_size; ++col_idx) { + data->back().push_back(0.201); + } + } } void model_forward_test() { @@ -23,18 +40,23 @@ void model_forward_test() { produce_data(&feats); std::cout << "2. load the model" << std::endl;; - std::string model_graph = "../../../../model/paddle_online_deepspeech/model/avg_1.jit.pdmodel"; - std::string model_params = "../../../../model/paddle_online_deepspeech/model/avg_1.jit.pdiparams"; + std::string model_graph = FLAGS_model_path; + std::string model_params = FLAGS_param_path; + cout << "model path: " << model_graph << endl; + cout << "model param path : " << model_params << endl; + paddle_infer::Config config; config.SetModel(model_graph, model_params); config.SwitchIrOptim(false); + cout << "SwitchIrOptim: " << false << endl; config.DisableFCPadding(); + cout << "DisableFCPadding: " << endl; auto predictor = paddle_infer::CreatePredictor(config); std::cout << "3. feat shape, row=" << feats.size() << ",col=" << feats[0].size() << std::endl; - std::vector paddle_input_feature_matrix; + std::vector pp_input_mat; for(const auto& item : feats) { - paddle_input_feature_matrix.insert(paddle_input_feature_matrix.end(), item.begin(), item.end()); + pp_input_mat.insert(pp_input_mat.end(), item.begin(), item.end()); } std::cout << "4. fead the data to model" << std::endl; @@ -42,13 +64,21 @@ void model_forward_test() { int col = feats[0].size(); std::vector input_names = predictor->GetInputNames(); std::vector output_names = predictor->GetOutputNames(); + for (auto name : input_names){ + cout << "model input names: " << name << endl; + } + for (auto name : output_names){ + cout << "model output names: " << name << endl; + } + // input std::unique_ptr input_tensor = predictor->GetInputHandle(input_names[0]); std::vector INPUT_SHAPE = {1, row, col}; input_tensor->Reshape(INPUT_SHAPE); - input_tensor->CopyFromCpu(paddle_input_feature_matrix.data()); + input_tensor->CopyFromCpu(pp_input_mat.data()); + // input length std::unique_ptr input_len = predictor->GetInputHandle(input_names[1]); std::vector input_len_size = {1}; input_len->Reshape(input_len_size); @@ -56,6 +86,7 @@ void model_forward_test() { audio_len.push_back(row); input_len->CopyFromCpu(audio_len.data()); + // state_h std::unique_ptr chunk_state_h_box = predictor->GetInputHandle(input_names[2]); std::vector chunk_state_h_box_shape = {3, 1, 1024}; chunk_state_h_box->Reshape(chunk_state_h_box_shape); @@ -64,6 +95,7 @@ void model_forward_test() { std::vector chunk_state_h_box_data(chunk_state_h_box_size, 0.0f); chunk_state_h_box->CopyFromCpu(chunk_state_h_box_data.data()); + // state_c std::unique_ptr chunk_state_c_box = predictor->GetInputHandle(input_names[3]); std::vector chunk_state_c_box_shape = {3, 1, 1024}; chunk_state_c_box->Reshape(chunk_state_c_box_shape); @@ -72,8 +104,10 @@ void model_forward_test() { std::vector chunk_state_c_box_data(chunk_state_c_box_size, 0.0f); chunk_state_c_box->CopyFromCpu(chunk_state_c_box_data.data()); + // run bool success = predictor->Run(); + // state_h out std::unique_ptr h_out = predictor->GetOutputHandle(output_names[2]); std::vector h_out_shape = h_out->shape(); int h_out_size = std::accumulate(h_out_shape.begin(), h_out_shape.end(), @@ -81,6 +115,7 @@ void model_forward_test() { std::vector h_out_data(h_out_size); h_out->CopyToCpu(h_out_data.data()); + // stage_c out std::unique_ptr c_out = predictor->GetOutputHandle(output_names[3]); std::vector c_out_shape = c_out->shape(); int c_out_size = std::accumulate(c_out_shape.begin(), c_out_shape.end(), @@ -88,6 +123,7 @@ void model_forward_test() { std::vector c_out_data(c_out_size); c_out->CopyToCpu(c_out_data.data()); + // output tensor std::unique_ptr output_tensor = predictor->GetOutputHandle(output_names[0]); std::vector output_shape = output_tensor->shape(); @@ -99,6 +135,7 @@ void model_forward_test() { row = output_shape[1]; col = output_shape[2]; + // probs std::vector> probs; probs.reserve(row); for (int i = 0; i < row; i++) { @@ -120,15 +157,8 @@ void model_forward_test() { } } -void produce_data(std::vector>* data) { - int chunk_size = 35; - int col_size = 161; - data->reserve(chunk_size); - data->back().reserve(col_size); - for (int row = 0; row < chunk_size; ++row) { - data->push_back(std::vector()); - for (int col_idx = 0; col_idx < col_size; ++col_idx) { - data->back().push_back(0.201); - } - } +int main(int argc, char* argv[]) { + gflags::ParseCommandLineFlags(&argc, &argv, true); + model_forward_test(); + return 0; } diff --git a/speechx/speechx/CMakeLists.txt b/speechx/speechx/CMakeLists.txt index 4a296ec83..225abee7c 100644 --- a/speechx/speechx/CMakeLists.txt +++ b/speechx/speechx/CMakeLists.txt @@ -30,16 +30,4 @@ include_directories( ${CMAKE_CURRENT_SOURCE_DIR} ${CMAKE_CURRENT_SOURCE_DIR}/decoder ) -add_subdirectory(decoder) - -add_executable(mfcc-test codelab/feat_test/feature-mfcc-test.cc) -target_link_libraries(mfcc-test kaldi-mfcc) - -add_executable(linear_spectrogram_main codelab/feat_test/linear_spectrogram_main.cc) -target_link_libraries(linear_spectrogram_main frontend kaldi-util kaldi-feat-common gflags glog) - -add_executable(offline_decoder_main codelab/decoder_test/offline_decoder_main.cc) -target_link_libraries(offline_decoder_main PUBLIC nnet decoder fst utils gflags glog kaldi-base kaldi-matrix kaldi-util ${DEPS}) - -add_executable(model_test codelab/nnet_test/model_test.cc) -target_link_libraries(model_test PUBLIC nnet gflags ${DEPS}) +add_subdirectory(decoder) \ No newline at end of file diff --git a/speechx/speechx/codelab/README.md b/speechx/speechx/codelab/README.md deleted file mode 100644 index 95c95db13..000000000 --- a/speechx/speechx/codelab/README.md +++ /dev/null @@ -1,4 +0,0 @@ -# codelab - -This directory is here for testing some funcitons temporaril. -