diff --git a/speechx/examples/ds2_ol/README.md b/speechx/examples/ds2_ol/README.md index 3fabd3e7..64d0afaa 100644 --- a/speechx/examples/ds2_ol/README.md +++ b/speechx/examples/ds2_ol/README.md @@ -4,8 +4,11 @@ Please go to `aishell` to test it. * aishell Deepspeech2 Streaming Decoding under aishell dataset. +* websocket +Streaming ASR with websocket. The below is for developing and offline testing: * nnet * feat * decoder + diff --git a/speechx/examples/ds2_ol/aishell/path.sh b/speechx/examples/ds2_ol/aishell/path.sh old mode 100644 new mode 100755 diff --git a/speechx/examples/ds2_ol/aishell/websocket_client.sh b/speechx/examples/ds2_ol/aishell/websocket_client.sh old mode 100644 new mode 100755 diff --git a/speechx/examples/ds2_ol/aishell/websocket_server.sh b/speechx/examples/ds2_ol/aishell/websocket_server.sh old mode 100644 new mode 100755 diff --git a/speechx/examples/ds2_ol/decoder/recognizer_test_main.cc b/speechx/examples/ds2_ol/decoder/recognizer_test_main.cc index 198a8ec2..e6fed0ed 100644 --- a/speechx/examples/ds2_ol/decoder/recognizer_test_main.cc +++ b/speechx/examples/ds2_ol/decoder/recognizer_test_main.cc @@ -60,6 +60,7 @@ int main(int argc, char* argv[]) { for (int i = 0; i < cur_chunk_size; ++i) { wav_chunk(i) = waveform(sample_offset + i); } + // wav_chunk = waveform.Range(sample_offset + i, cur_chunk_size); recognizer.Accept(wav_chunk); if (cur_chunk_size < chunk_sample_size) { @@ -67,8 +68,10 @@ int main(int argc, char* argv[]) { } recognizer.Decode(); + // no overlap sample_offset += cur_chunk_size; } + std::string result; result = recognizer.GetFinalResult(); recognizer.Reset(); diff --git a/speechx/examples/ds2_ol/websocket/CMakeLists.txt b/speechx/examples/ds2_ol/websocket/CMakeLists.txt index 754b528e..ed542aad 100644 --- a/speechx/examples/ds2_ol/websocket/CMakeLists.txt +++ b/speechx/examples/ds2_ol/websocket/CMakeLists.txt @@ -6,5 +6,4 @@ target_link_libraries(websocket_server_main PUBLIC frontend kaldi-feat-common nn add_executable(websocket_client_main ${CMAKE_CURRENT_SOURCE_DIR}/websocket_client_main.cc) target_include_directories(websocket_client_main PRIVATE ${SPEECHX_ROOT} ${SPEECHX_ROOT}/kaldi) -target_link_libraries(websocket_client_main PUBLIC frontend kaldi-feat-common nnet decoder fst utils gflags glog kaldi-base kaldi-matrix kaldi-util kaldi-decoder websocket ${DEPS}) - +target_link_libraries(websocket_client_main PUBLIC frontend kaldi-feat-common nnet decoder fst utils gflags glog kaldi-base kaldi-matrix kaldi-util kaldi-decoder websocket ${DEPS}) \ No newline at end of file diff --git a/speechx/speechx/decoder/param.h b/speechx/speechx/decoder/param.h index cd50ef53..a2cbbe50 100644 --- a/speechx/speechx/decoder/param.h +++ b/speechx/speechx/decoder/param.h @@ -37,13 +37,16 @@ DEFINE_int32(receptive_field_length, DEFINE_int32(downsampling_rate, 4, "two CNN(kernel=5) module downsampling rate."); +DEFINE_string( + model_input_names, + "audio_chunk,audio_chunk_lens,chunk_state_h_box,chunk_state_c_box", + "model input names"); DEFINE_string(model_output_names, - "save_infer_model/scale_0.tmp_1,save_infer_model/" - "scale_1.tmp_1,save_infer_model/scale_2.tmp_1,save_infer_model/" - "scale_3.tmp_1", + "softmax_0.tmp_0,tmp_5,concat_0.tmp_0,concat_1.tmp_0", "model output names"); DEFINE_string(model_cache_names, "5-1-1024,5-1-1024", "model cache names"); + namespace ppspeech { // todo refactor later FeaturePipelineOptions InitFeaturePipelineOptions() {