From c403a838203ad99898912f3ec116f778e1353f09 Mon Sep 17 00:00:00 2001 From: Hui Zhang Date: Thu, 21 Apr 2022 14:56:06 +0000 Subject: [PATCH] fix param path name; ws client --- demos/streaming_asr_server/README.md | 2 +- demos/streaming_asr_server/README_cn.md | 2 +- speechx/examples/ds2_ol/README.md | 1 - speechx/examples/ds2_ol/aishell/README.md | 2 +- speechx/examples/ds2_ol/aishell/run.sh | 2 +- .../ds2_ol/aishell/websocket_server.sh | 66 ------------------- .../ctc-prefix-beam-search-decoder-ol.cc | 2 +- .../ds2_ol/decoder/wfst-decoder-ol.cc | 2 +- speechx/examples/ds2_ol/websocket/.gitignore | 2 + speechx/examples/ds2_ol/websocket/path.sh | 14 ++++ .../websocket_client.sh | 0 .../ds2_ol/websocket/websocket_client_main.cc | 8 +-- .../ds2_ol/websocket/websocket_server.sh | 57 ++++++++++++++++ speechx/speechx/decoder/param.h | 5 +- speechx/speechx/nnet/paddle_nnet.cc | 2 +- speechx/speechx/nnet/paddle_nnet.h | 16 ++--- utils/format_rsl.py | 1 + 17 files changed, 92 insertions(+), 92 deletions(-) delete mode 100755 speechx/examples/ds2_ol/aishell/websocket_server.sh create mode 100644 speechx/examples/ds2_ol/websocket/.gitignore create mode 100755 speechx/examples/ds2_ol/websocket/path.sh rename speechx/examples/ds2_ol/{aishell => websocket}/websocket_client.sh (100%) create mode 100755 speechx/examples/ds2_ol/websocket/websocket_server.sh diff --git a/demos/streaming_asr_server/README.md b/demos/streaming_asr_server/README.md index 68c3b045..0eed8e56 100644 --- a/demos/streaming_asr_server/README.md +++ b/demos/streaming_asr_server/README.md @@ -352,4 +352,4 @@ wget -c https://paddlespeech.bj.bcebos.com/PaddleAudio/zh.wav [2022-04-21 15:59:08,024] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给我带来了身体健康'} [2022-04-21 15:59:12,883] [ INFO] - final receive msg={'status': 'ok', 'signal': 'finished', 'asr_results': '我认为跑步最重要的就是给我带来了身体健康'} [2022-04-21 15:59:12,884] [ INFO] - 我认为跑步最重要的就是给我带来了身体健康 - ``` \ No newline at end of file + ``` diff --git a/demos/streaming_asr_server/README_cn.md b/demos/streaming_asr_server/README_cn.md index c58e17e9..bf122bb3 100644 --- a/demos/streaming_asr_server/README_cn.md +++ b/demos/streaming_asr_server/README_cn.md @@ -353,4 +353,4 @@ wget -c https://paddlespeech.bj.bcebos.com/PaddleAudio/zh.wav [2022-04-21 15:59:08,024] [ INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给我带来了身体健康'} [2022-04-21 15:59:12,883] [ INFO] - final receive msg={'status': 'ok', 'signal': 'finished', 'asr_results': '我认为跑步最重要的就是给我带来了身体健康'} [2022-04-21 15:59:12,884] [ INFO] - 我认为跑步最重要的就是给我带来了身体健康 - ``` \ No newline at end of file + ``` diff --git a/speechx/examples/ds2_ol/README.md b/speechx/examples/ds2_ol/README.md index 64d0afaa..18f248a1 100644 --- a/speechx/examples/ds2_ol/README.md +++ b/speechx/examples/ds2_ol/README.md @@ -11,4 +11,3 @@ The below is for developing and offline testing: * nnet * feat * decoder - diff --git a/speechx/examples/ds2_ol/aishell/README.md b/speechx/examples/ds2_ol/aishell/README.md index 115bf85f..01c89979 100644 --- a/speechx/examples/ds2_ol/aishell/README.md +++ b/speechx/examples/ds2_ol/aishell/README.md @@ -33,4 +33,4 @@ LM: [wenetspeech](http://paddlespeech.bj.bcebos.com/speechx/examples/ds2_ol/aish Overall -> 10.93 % N=104765 C=93410 S=9780 D=1575 I=95 Mandarin -> 10.93 % N=104762 C=93410 S=9779 D=1573 I=95 Other -> 100.00 % N=3 C=0 S=1 D=2 I=0 -``` \ No newline at end of file +``` diff --git a/speechx/examples/ds2_ol/aishell/run.sh b/speechx/examples/ds2_ol/aishell/run.sh index 9a63ff4d..c68a0cbc 100755 --- a/speechx/examples/ds2_ol/aishell/run.sh +++ b/speechx/examples/ds2_ol/aishell/run.sh @@ -119,7 +119,7 @@ if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ]; then pushd $wfst wget -c https://paddlespeech.bj.bcebos.com/s2t/paddle_asr_online/aishell_graph.zip unzip aishell_graph.zip - mv aishell_graph/* + mv aishell_graph/* $wfst popd fi diff --git a/speechx/examples/ds2_ol/aishell/websocket_server.sh b/speechx/examples/ds2_ol/aishell/websocket_server.sh deleted file mode 100755 index ea619d54..00000000 --- a/speechx/examples/ds2_ol/aishell/websocket_server.sh +++ /dev/null @@ -1,66 +0,0 @@ -#!/bin/bash -set +x -set -e - -. path.sh - - -# 1. compile -if [ ! -d ${SPEECHX_EXAMPLES} ]; then - pushd ${SPEECHX_ROOT} - bash build.sh - popd -fi - -# input -mkdir -p data -data=$PWD/data -ckpt_dir=$data/model -model_dir=$ckpt_dir/exp/deepspeech2_online/checkpoints/ -vocb_dir=$ckpt_dir/data/lang_char/ - -# output -aishell_wav_scp=aishell_test.scp -if [ ! -d $data/test ]; then - pushd $data - wget -c https://paddlespeech.bj.bcebos.com/s2t/paddle_asr_online/aishell_test.zip - unzip aishell_test.zip - popd - - realpath $data/test/*/*.wav > $data/wavlist - awk -F '/' '{ print $(NF) }' $data/wavlist | awk -F '.' '{ print $1 }' > $data/utt_id - paste $data/utt_id $data/wavlist > $data/$aishell_wav_scp -fi - - -if [ ! -d $ckpt_dir ]; then - mkdir -p $ckpt_dir - wget -P $ckpt_dir -c https://paddlespeech.bj.bcebos.com/s2t/aishell/asr0/asr0_deepspeech2_online_aishell_ckpt_0.2.0.model.tar.gz - tar xzfv $ckpt_dir/asr0_deepspeech2_online_aishell_ckpt_0.2.0.model.tar.gz -C $ckpt_dir -fi - - -export GLOG_logtostderr=1 - -# 3. gen cmvn -cmvn=$PWD/cmvn.ark -cmvn-json2kaldi --json_file=$ckpt_dir/data/mean_std.json --cmvn_write_path=$cmvn - -text=$data/test/text -graph_dir=./aishell_graph -if [ ! -d $graph_dir ]; then - wget -c https://paddlespeech.bj.bcebos.com/s2t/paddle_asr_online/aishell_graph.zip - unzip aishell_graph.zip -fi - -# 5. test websocket server -websocket_server_main \ - --cmvn_file=$cmvn \ - --model_path=$model_dir/avg_1.jit.pdmodel \ - --streaming_chunk=0.1 \ - --convert2PCM32=true \ - --params_path=$model_dir/avg_1.jit.pdiparams \ - --word_symbol_table=$graph_dir/words.txt \ - --model_output_names=softmax_0.tmp_0,tmp_5,concat_0.tmp_0,concat_1.tmp_0 \ - --graph_path=$graph_dir/TLG.fst --max_active=7500 \ - --acoustic_scale=1.2 diff --git a/speechx/examples/ds2_ol/decoder/ctc-prefix-beam-search-decoder-ol.cc b/speechx/examples/ds2_ol/decoder/ctc-prefix-beam-search-decoder-ol.cc index 4a39217c..6a6495aa 100644 --- a/speechx/examples/ds2_ol/decoder/ctc-prefix-beam-search-decoder-ol.cc +++ b/speechx/examples/ds2_ol/decoder/ctc-prefix-beam-search-decoder-ol.cc @@ -76,7 +76,7 @@ int main(int argc, char* argv[]) { ppspeech::ModelOptions model_opts; model_opts.model_path = model_path; - model_opts.params_path = model_params; + model_opts.param_path = model_params; model_opts.cache_shape = FLAGS_model_cache_names; model_opts.input_names = FLAGS_model_input_names; model_opts.output_names = FLAGS_model_output_names; diff --git a/speechx/examples/ds2_ol/decoder/wfst-decoder-ol.cc b/speechx/examples/ds2_ol/decoder/wfst-decoder-ol.cc index 92b3d8ec..544e59cb 100644 --- a/speechx/examples/ds2_ol/decoder/wfst-decoder-ol.cc +++ b/speechx/examples/ds2_ol/decoder/wfst-decoder-ol.cc @@ -79,7 +79,7 @@ int main(int argc, char* argv[]) { ppspeech::ModelOptions model_opts; model_opts.model_path = model_graph; - model_opts.params_path = model_params; + model_opts.param_path = model_params; model_opts.cache_shape = FLAGS_model_cache_names; model_opts.input_names = FLAGS_model_input_names; model_opts.output_names = FLAGS_model_output_names; diff --git a/speechx/examples/ds2_ol/websocket/.gitignore b/speechx/examples/ds2_ol/websocket/.gitignore new file mode 100644 index 00000000..bbd86a25 --- /dev/null +++ b/speechx/examples/ds2_ol/websocket/.gitignore @@ -0,0 +1,2 @@ +data +exp diff --git a/speechx/examples/ds2_ol/websocket/path.sh b/speechx/examples/ds2_ol/websocket/path.sh new file mode 100755 index 00000000..d66b5dcc --- /dev/null +++ b/speechx/examples/ds2_ol/websocket/path.sh @@ -0,0 +1,14 @@ +# This contains the locations of binarys build required for running the examples. + +SPEECHX_ROOT=$PWD/../../.. +SPEECHX_EXAMPLES=$SPEECHX_ROOT/build/examples + +SPEECHX_TOOLS=$SPEECHX_ROOT/tools +TOOLS_BIN=$SPEECHX_TOOLS/valgrind/install/bin + +[ -d $SPEECHX_EXAMPLES ] || { echo "Error: 'build/examples' directory not found. please ensure that the project build successfully"; } + +export LC_AL=C + +SPEECHX_BIN=$SPEECHX_EXAMPLES/ds2_ol/websocket:$SPEECHX_EXAMPLES/ds2_ol/feat +export PATH=$PATH:$SPEECHX_BIN:$TOOLS_BIN diff --git a/speechx/examples/ds2_ol/aishell/websocket_client.sh b/speechx/examples/ds2_ol/websocket/websocket_client.sh similarity index 100% rename from speechx/examples/ds2_ol/aishell/websocket_client.sh rename to speechx/examples/ds2_ol/websocket/websocket_client.sh diff --git a/speechx/examples/ds2_ol/websocket/websocket_client_main.cc b/speechx/examples/ds2_ol/websocket/websocket_client_main.cc index d6f0d480..c8c438ec 100644 --- a/speechx/examples/ds2_ol/websocket/websocket_client_main.cc +++ b/speechx/examples/ds2_ol/websocket/websocket_client_main.cc @@ -26,8 +26,7 @@ using kaldi::int16; int main(int argc, char* argv[]) { gflags::ParseCommandLineFlags(&argc, &argv, false); google::InitGoogleLogging(argv[0]); - ppspeech::WebSocketClient client(FLAGS_host, FLAGS_port); - + kaldi::SequentialTableReader wav_reader( FLAGS_wav_rspecifier); @@ -36,6 +35,8 @@ int main(int argc, char* argv[]) { const int chunk_sample_size = streaming_chunk * sample_rate; for (; !wav_reader.Done(); wav_reader.Next()) { + ppspeech::WebSocketClient client(FLAGS_host, FLAGS_port); + client.SendStartSignal(); std::string utt = wav_reader.Key(); const kaldi::WaveData& wave_data = wav_reader.Value(); @@ -74,9 +75,8 @@ int main(int argc, char* argv[]) { std::string result = client.GetResult(); LOG(INFO) << "utt: " << utt << " " << result; - client.Join(); - return 0; } + return 0; } diff --git a/speechx/examples/ds2_ol/websocket/websocket_server.sh b/speechx/examples/ds2_ol/websocket/websocket_server.sh new file mode 100755 index 00000000..0e9e796c --- /dev/null +++ b/speechx/examples/ds2_ol/websocket/websocket_server.sh @@ -0,0 +1,57 @@ +#!/bin/bash +set +x +set -e + +. path.sh + + +# 1. compile +if [ ! -d ${SPEECHX_EXAMPLES} ]; then + pushd ${SPEECHX_ROOT} + bash build.sh + popd +fi + +# input +mkdir -p data +data=$PWD/data +ckpt_dir=$data/model +model_dir=$ckpt_dir/exp/deepspeech2_online/checkpoints/ +vocb_dir=$ckpt_dir/data/lang_char/ + +if [ ! -f $ckpt_dir/data/mean_std.json ]; then + mkdir -p $ckpt_dir + pushd $ckpt_dir + wget -c https://paddlespeech.bj.bcebos.com/s2t/aishell/asr0/asr0_deepspeech2_online_aishell_ckpt_0.2.0.model.tar.gz + tar xzfv asr0_deepspeech2_online_aishell_ckpt_0.2.0.model.tar.gz + popd +fi + +export GLOG_logtostderr=1 + +# 3. gen cmvn +cmvn=$data/cmvn.ark +cmvn-json2kaldi --json_file=$ckpt_dir/data/mean_std.json --cmvn_write_path=$cmvn + + +wfst=$data/wfst/ +mkdir -p $wfst +if [ ! -f $wfst/aishell_graph.zip ]; then + pushd $wfst + wget -c https://paddlespeech.bj.bcebos.com/s2t/paddle_asr_online/aishell_graph.zip + unzip aishell_graph.zip + mv aishell_graph/* $wfst + popd +fi + +# 5. test websocket server +websocket_server_main \ + --cmvn_file=$cmvn \ + --model_path=$model_dir/avg_1.jit.pdmodel \ + --streaming_chunk=0.1 \ + --convert2PCM32=true \ + --param_path=$model_dir/avg_1.jit.pdiparams \ + --word_symbol_table=$data/wfst/words.txt \ + --model_output_names=softmax_0.tmp_0,tmp_5,concat_0.tmp_0,concat_1.tmp_0 \ + --graph_path=$data/wfst/TLG.fst --max_active=7500 \ + --acoustic_scale=1.2 diff --git a/speechx/speechx/decoder/param.h b/speechx/speechx/decoder/param.h index a2cbbe50..aff8d39a 100644 --- a/speechx/speechx/decoder/param.h +++ b/speechx/speechx/decoder/param.h @@ -15,7 +15,6 @@ #pragma once #include "base/common.h" - #include "decoder/ctc_beam_search_decoder.h" #include "decoder/ctc_tlg_decoder.h" #include "frontend/audio/feature_pipeline.h" @@ -24,7 +23,7 @@ DEFINE_string(cmvn_file, "", "read cmvn"); DEFINE_double(streaming_chunk, 0.1, "streaming feature chunk size"); DEFINE_bool(convert2PCM32, true, "audio convert to pcm32"); DEFINE_string(model_path, "avg_1.jit.pdmodel", "paddle nnet model"); -DEFINE_string(params_path, "avg_1.jit.pdiparams", "paddle nnet model param"); +DEFINE_string(param_path, "avg_1.jit.pdiparams", "paddle nnet model param"); DEFINE_string(word_symbol_table, "words.txt", "word symbol table"); DEFINE_string(graph_path, "TLG", "decoder graph"); DEFINE_double(acoustic_scale, 1.0, "acoustic scale"); @@ -70,7 +69,7 @@ FeaturePipelineOptions InitFeaturePipelineOptions() { ModelOptions InitModelOptions() { ModelOptions model_opts; model_opts.model_path = FLAGS_model_path; - model_opts.params_path = FLAGS_params_path; + model_opts.param_path = FLAGS_param_path; model_opts.cache_shape = FLAGS_model_cache_names; model_opts.output_names = FLAGS_model_output_names; return model_opts; diff --git a/speechx/speechx/nnet/paddle_nnet.cc b/speechx/speechx/nnet/paddle_nnet.cc index 5c4da11a..f8e1f697 100644 --- a/speechx/speechx/nnet/paddle_nnet.cc +++ b/speechx/speechx/nnet/paddle_nnet.cc @@ -49,7 +49,7 @@ void PaddleNnet::InitCacheEncouts(const ModelOptions& opts) { PaddleNnet::PaddleNnet(const ModelOptions& opts) : opts_(opts) { paddle_infer::Config config; - config.SetModel(opts.model_path, opts.params_path); + config.SetModel(opts.model_path, opts.param_path); if (opts.use_gpu) { config.EnableUseGpu(500, 0); } diff --git a/speechx/speechx/nnet/paddle_nnet.h b/speechx/speechx/nnet/paddle_nnet.h index 906994d0..8b4ed478 100644 --- a/speechx/speechx/nnet/paddle_nnet.h +++ b/speechx/speechx/nnet/paddle_nnet.h @@ -11,25 +11,19 @@ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. - - #pragma once - - +#include +#include "base/common.h" #include "kaldi/matrix/kaldi-matrix.h" #include "kaldi/util/options-itf.h" - -#include "base/common.h" #include "nnet/nnet_itf.h" #include "paddle_inference_api.h" -#include - namespace ppspeech { struct ModelOptions { std::string model_path; - std::string params_path; + std::string param_path; int thread_num; bool use_gpu; bool switch_ir_optim; @@ -41,7 +35,7 @@ struct ModelOptions { bool enable_profile; ModelOptions() : model_path("avg_1.jit.pdmodel"), - params_path("avg_1.jit.pdiparams"), + param_path("avg_1.jit.pdiparams"), thread_num(2), use_gpu(false), input_names( @@ -59,7 +53,7 @@ struct ModelOptions { void Register(kaldi::OptionsItf* opts) { opts->Register("model-path", &model_path, "model file path"); - opts->Register("model-params", ¶ms_path, "params model file path"); + opts->Register("model-param", ¶m_path, "params model file path"); opts->Register("thread-num", &thread_num, "thread num"); opts->Register("use-gpu", &use_gpu, "if use gpu"); opts->Register("input-names", &input_names, "paddle input names"); diff --git a/utils/format_rsl.py b/utils/format_rsl.py index 1a714253..8230416c 100644 --- a/utils/format_rsl.py +++ b/utils/format_rsl.py @@ -12,6 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. import argparse + import jsonlines