From c403a838203ad99898912f3ec116f778e1353f09 Mon Sep 17 00:00:00 2001
From: Hui Zhang <zhtclz@foxmail.com>
Date: Thu, 21 Apr 2022 14:56:06 +0000
Subject: [PATCH] fix param path name; ws client

---
 demos/streaming_asr_server/README.md          |  2 +-
 demos/streaming_asr_server/README_cn.md       |  2 +-
 speechx/examples/ds2_ol/README.md             |  1 -
 speechx/examples/ds2_ol/aishell/README.md     |  2 +-
 speechx/examples/ds2_ol/aishell/run.sh        |  2 +-
 .../ds2_ol/aishell/websocket_server.sh        | 66 -------------------
 .../ctc-prefix-beam-search-decoder-ol.cc      |  2 +-
 .../ds2_ol/decoder/wfst-decoder-ol.cc         |  2 +-
 speechx/examples/ds2_ol/websocket/.gitignore  |  2 +
 speechx/examples/ds2_ol/websocket/path.sh     | 14 ++++
 .../websocket_client.sh                       |  0
 .../ds2_ol/websocket/websocket_client_main.cc |  8 +--
 .../ds2_ol/websocket/websocket_server.sh      | 57 ++++++++++++++++
 speechx/speechx/decoder/param.h               |  5 +-
 speechx/speechx/nnet/paddle_nnet.cc           |  2 +-
 speechx/speechx/nnet/paddle_nnet.h            | 16 ++---
 utils/format_rsl.py                           |  1 +
 17 files changed, 92 insertions(+), 92 deletions(-)
 delete mode 100755 speechx/examples/ds2_ol/aishell/websocket_server.sh
 create mode 100644 speechx/examples/ds2_ol/websocket/.gitignore
 create mode 100755 speechx/examples/ds2_ol/websocket/path.sh
 rename speechx/examples/ds2_ol/{aishell => websocket}/websocket_client.sh (100%)
 create mode 100755 speechx/examples/ds2_ol/websocket/websocket_server.sh

diff --git a/demos/streaming_asr_server/README.md b/demos/streaming_asr_server/README.md
index 68c3b045..0eed8e56 100644
--- a/demos/streaming_asr_server/README.md
+++ b/demos/streaming_asr_server/README.md
@@ -352,4 +352,4 @@ wget -c https://paddlespeech.bj.bcebos.com/PaddleAudio/zh.wav
         [2022-04-21 15:59:08,024] [    INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给我带来了身体健康'}
         [2022-04-21 15:59:12,883] [    INFO] - final receive msg={'status': 'ok', 'signal': 'finished', 'asr_results': '我认为跑步最重要的就是给我带来了身体健康'}
         [2022-04-21 15:59:12,884] [    INFO] - 我认为跑步最重要的就是给我带来了身体健康
-  ```
\ No newline at end of file
+  ```
diff --git a/demos/streaming_asr_server/README_cn.md b/demos/streaming_asr_server/README_cn.md
index c58e17e9..bf122bb3 100644
--- a/demos/streaming_asr_server/README_cn.md
+++ b/demos/streaming_asr_server/README_cn.md
@@ -353,4 +353,4 @@ wget -c https://paddlespeech.bj.bcebos.com/PaddleAudio/zh.wav
         [2022-04-21 15:59:08,024] [    INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给我带来了身体健康'}
         [2022-04-21 15:59:12,883] [    INFO] - final receive msg={'status': 'ok', 'signal': 'finished', 'asr_results': '我认为跑步最重要的就是给我带来了身体健康'}
         [2022-04-21 15:59:12,884] [    INFO] - 我认为跑步最重要的就是给我带来了身体健康
-  ```
\ No newline at end of file
+  ```
diff --git a/speechx/examples/ds2_ol/README.md b/speechx/examples/ds2_ol/README.md
index 64d0afaa..18f248a1 100644
--- a/speechx/examples/ds2_ol/README.md
+++ b/speechx/examples/ds2_ol/README.md
@@ -11,4 +11,3 @@ The below is for developing and offline testing:
 * nnet
 * feat
 * decoder
-
diff --git a/speechx/examples/ds2_ol/aishell/README.md b/speechx/examples/ds2_ol/aishell/README.md
index 115bf85f..01c89979 100644
--- a/speechx/examples/ds2_ol/aishell/README.md
+++ b/speechx/examples/ds2_ol/aishell/README.md
@@ -33,4 +33,4 @@ LM: [wenetspeech](http://paddlespeech.bj.bcebos.com/speechx/examples/ds2_ol/aish
 Overall -> 10.93 % N=104765 C=93410 S=9780 D=1575 I=95
 Mandarin -> 10.93 % N=104762 C=93410 S=9779 D=1573 I=95
 Other -> 100.00 % N=3 C=0 S=1 D=2 I=0
-```
\ No newline at end of file
+```
diff --git a/speechx/examples/ds2_ol/aishell/run.sh b/speechx/examples/ds2_ol/aishell/run.sh
index 9a63ff4d..c68a0cbc 100755
--- a/speechx/examples/ds2_ol/aishell/run.sh
+++ b/speechx/examples/ds2_ol/aishell/run.sh
@@ -119,7 +119,7 @@ if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ]; then
         pushd $wfst
         wget -c https://paddlespeech.bj.bcebos.com/s2t/paddle_asr_online/aishell_graph.zip
         unzip aishell_graph.zip
-        mv aishell_graph/* 
+        mv aishell_graph/* $wfst
         popd
     fi
 
diff --git a/speechx/examples/ds2_ol/aishell/websocket_server.sh b/speechx/examples/ds2_ol/aishell/websocket_server.sh
deleted file mode 100755
index ea619d54..00000000
--- a/speechx/examples/ds2_ol/aishell/websocket_server.sh
+++ /dev/null
@@ -1,66 +0,0 @@
-#!/bin/bash
-set +x
-set -e
-
-. path.sh
-
-
-# 1. compile
-if [ ! -d ${SPEECHX_EXAMPLES} ]; then
-    pushd ${SPEECHX_ROOT} 
-    bash build.sh
-    popd
-fi
-
-# input
-mkdir -p data
-data=$PWD/data
-ckpt_dir=$data/model
-model_dir=$ckpt_dir/exp/deepspeech2_online/checkpoints/
-vocb_dir=$ckpt_dir/data/lang_char/
-
-# output
-aishell_wav_scp=aishell_test.scp
-if [ ! -d $data/test ]; then
-    pushd $data
-    wget -c https://paddlespeech.bj.bcebos.com/s2t/paddle_asr_online/aishell_test.zip
-    unzip  aishell_test.zip
-    popd
-
-    realpath $data/test/*/*.wav > $data/wavlist
-    awk -F '/' '{ print $(NF) }' $data/wavlist | awk -F '.' '{ print $1 }' > $data/utt_id
-    paste $data/utt_id $data/wavlist > $data/$aishell_wav_scp
-fi
-
-
-if [ ! -d $ckpt_dir ]; then
-    mkdir -p $ckpt_dir
-    wget -P $ckpt_dir -c https://paddlespeech.bj.bcebos.com/s2t/aishell/asr0/asr0_deepspeech2_online_aishell_ckpt_0.2.0.model.tar.gz
-    tar xzfv $ckpt_dir/asr0_deepspeech2_online_aishell_ckpt_0.2.0.model.tar.gz -C $ckpt_dir
-fi
-
-
-export GLOG_logtostderr=1
-
-# 3. gen cmvn 
-cmvn=$PWD/cmvn.ark
-cmvn-json2kaldi --json_file=$ckpt_dir/data/mean_std.json --cmvn_write_path=$cmvn
-
-text=$data/test/text
-graph_dir=./aishell_graph
-if [ ! -d $graph_dir ]; then
-    wget -c https://paddlespeech.bj.bcebos.com/s2t/paddle_asr_online/aishell_graph.zip
-    unzip aishell_graph.zip 
-fi
-
-# 5. test websocket server 
-websocket_server_main \
-    --cmvn_file=$cmvn \
-    --model_path=$model_dir/avg_1.jit.pdmodel \
-    --streaming_chunk=0.1 \
-    --convert2PCM32=true \
-    --params_path=$model_dir/avg_1.jit.pdiparams \
-    --word_symbol_table=$graph_dir/words.txt \
-    --model_output_names=softmax_0.tmp_0,tmp_5,concat_0.tmp_0,concat_1.tmp_0 \
-    --graph_path=$graph_dir/TLG.fst --max_active=7500 \
-    --acoustic_scale=1.2 
diff --git a/speechx/examples/ds2_ol/decoder/ctc-prefix-beam-search-decoder-ol.cc b/speechx/examples/ds2_ol/decoder/ctc-prefix-beam-search-decoder-ol.cc
index 4a39217c..6a6495aa 100644
--- a/speechx/examples/ds2_ol/decoder/ctc-prefix-beam-search-decoder-ol.cc
+++ b/speechx/examples/ds2_ol/decoder/ctc-prefix-beam-search-decoder-ol.cc
@@ -76,7 +76,7 @@ int main(int argc, char* argv[]) {
 
     ppspeech::ModelOptions model_opts;
     model_opts.model_path = model_path;
-    model_opts.params_path = model_params;
+    model_opts.param_path = model_params;
     model_opts.cache_shape = FLAGS_model_cache_names;
     model_opts.input_names = FLAGS_model_input_names;
     model_opts.output_names = FLAGS_model_output_names;
diff --git a/speechx/examples/ds2_ol/decoder/wfst-decoder-ol.cc b/speechx/examples/ds2_ol/decoder/wfst-decoder-ol.cc
index 92b3d8ec..544e59cb 100644
--- a/speechx/examples/ds2_ol/decoder/wfst-decoder-ol.cc
+++ b/speechx/examples/ds2_ol/decoder/wfst-decoder-ol.cc
@@ -79,7 +79,7 @@ int main(int argc, char* argv[]) {
 
     ppspeech::ModelOptions model_opts;
     model_opts.model_path = model_graph;
-    model_opts.params_path = model_params;
+    model_opts.param_path = model_params;
     model_opts.cache_shape = FLAGS_model_cache_names;
     model_opts.input_names = FLAGS_model_input_names;
     model_opts.output_names = FLAGS_model_output_names;
diff --git a/speechx/examples/ds2_ol/websocket/.gitignore b/speechx/examples/ds2_ol/websocket/.gitignore
new file mode 100644
index 00000000..bbd86a25
--- /dev/null
+++ b/speechx/examples/ds2_ol/websocket/.gitignore
@@ -0,0 +1,2 @@
+data
+exp
diff --git a/speechx/examples/ds2_ol/websocket/path.sh b/speechx/examples/ds2_ol/websocket/path.sh
new file mode 100755
index 00000000..d66b5dcc
--- /dev/null
+++ b/speechx/examples/ds2_ol/websocket/path.sh
@@ -0,0 +1,14 @@
+# This contains the locations of binarys build required for running the examples.
+
+SPEECHX_ROOT=$PWD/../../..
+SPEECHX_EXAMPLES=$SPEECHX_ROOT/build/examples
+
+SPEECHX_TOOLS=$SPEECHX_ROOT/tools
+TOOLS_BIN=$SPEECHX_TOOLS/valgrind/install/bin
+
+[ -d $SPEECHX_EXAMPLES ] || { echo "Error: 'build/examples' directory not found. please ensure that the project build successfully"; }
+
+export LC_AL=C
+
+SPEECHX_BIN=$SPEECHX_EXAMPLES/ds2_ol/websocket:$SPEECHX_EXAMPLES/ds2_ol/feat
+export PATH=$PATH:$SPEECHX_BIN:$TOOLS_BIN
diff --git a/speechx/examples/ds2_ol/aishell/websocket_client.sh b/speechx/examples/ds2_ol/websocket/websocket_client.sh
similarity index 100%
rename from speechx/examples/ds2_ol/aishell/websocket_client.sh
rename to speechx/examples/ds2_ol/websocket/websocket_client.sh
diff --git a/speechx/examples/ds2_ol/websocket/websocket_client_main.cc b/speechx/examples/ds2_ol/websocket/websocket_client_main.cc
index d6f0d480..c8c438ec 100644
--- a/speechx/examples/ds2_ol/websocket/websocket_client_main.cc
+++ b/speechx/examples/ds2_ol/websocket/websocket_client_main.cc
@@ -26,8 +26,7 @@ using kaldi::int16;
 int main(int argc, char* argv[]) {
     gflags::ParseCommandLineFlags(&argc, &argv, false);
     google::InitGoogleLogging(argv[0]);
-    ppspeech::WebSocketClient client(FLAGS_host, FLAGS_port);
-
+    
     kaldi::SequentialTableReader<kaldi::WaveHolder> wav_reader(
         FLAGS_wav_rspecifier);
 
@@ -36,6 +35,8 @@ int main(int argc, char* argv[]) {
     const int chunk_sample_size = streaming_chunk * sample_rate;
 
     for (; !wav_reader.Done(); wav_reader.Next()) {
+        ppspeech::WebSocketClient client(FLAGS_host, FLAGS_port);
+
         client.SendStartSignal();
         std::string utt = wav_reader.Key();
         const kaldi::WaveData& wave_data = wav_reader.Value();
@@ -74,9 +75,8 @@ int main(int argc, char* argv[]) {
         std::string result = client.GetResult();
         LOG(INFO) << "utt: " << utt << " " << result;
 
-
         client.Join();
-        return 0;
     }
+
     return 0;
 }
diff --git a/speechx/examples/ds2_ol/websocket/websocket_server.sh b/speechx/examples/ds2_ol/websocket/websocket_server.sh
new file mode 100755
index 00000000..0e9e796c
--- /dev/null
+++ b/speechx/examples/ds2_ol/websocket/websocket_server.sh
@@ -0,0 +1,57 @@
+#!/bin/bash
+set +x
+set -e
+
+. path.sh
+
+
+# 1. compile
+if [ ! -d ${SPEECHX_EXAMPLES} ]; then
+    pushd ${SPEECHX_ROOT} 
+    bash build.sh
+    popd
+fi
+
+# input
+mkdir -p data
+data=$PWD/data
+ckpt_dir=$data/model
+model_dir=$ckpt_dir/exp/deepspeech2_online/checkpoints/
+vocb_dir=$ckpt_dir/data/lang_char/
+
+if [ ! -f $ckpt_dir/data/mean_std.json ]; then
+        mkdir -p $ckpt_dir
+        pushd $ckpt_dir
+        wget -c https://paddlespeech.bj.bcebos.com/s2t/aishell/asr0/asr0_deepspeech2_online_aishell_ckpt_0.2.0.model.tar.gz
+        tar xzfv asr0_deepspeech2_online_aishell_ckpt_0.2.0.model.tar.gz 
+        popd
+fi
+
+export GLOG_logtostderr=1
+
+# 3. gen cmvn 
+cmvn=$data/cmvn.ark
+cmvn-json2kaldi --json_file=$ckpt_dir/data/mean_std.json --cmvn_write_path=$cmvn
+
+
+wfst=$data/wfst/
+mkdir -p $wfst
+if [ ! -f $wfst/aishell_graph.zip ]; then
+    pushd $wfst
+    wget -c https://paddlespeech.bj.bcebos.com/s2t/paddle_asr_online/aishell_graph.zip
+    unzip aishell_graph.zip
+    mv aishell_graph/* $wfst
+    popd
+fi
+
+# 5. test websocket server 
+websocket_server_main \
+    --cmvn_file=$cmvn \
+    --model_path=$model_dir/avg_1.jit.pdmodel \
+    --streaming_chunk=0.1 \
+    --convert2PCM32=true \
+    --param_path=$model_dir/avg_1.jit.pdiparams \
+    --word_symbol_table=$data/wfst/words.txt \
+    --model_output_names=softmax_0.tmp_0,tmp_5,concat_0.tmp_0,concat_1.tmp_0 \
+    --graph_path=$data/wfst/TLG.fst --max_active=7500 \
+    --acoustic_scale=1.2 
diff --git a/speechx/speechx/decoder/param.h b/speechx/speechx/decoder/param.h
index a2cbbe50..aff8d39a 100644
--- a/speechx/speechx/decoder/param.h
+++ b/speechx/speechx/decoder/param.h
@@ -15,7 +15,6 @@
 #pragma once
 
 #include "base/common.h"
-
 #include "decoder/ctc_beam_search_decoder.h"
 #include "decoder/ctc_tlg_decoder.h"
 #include "frontend/audio/feature_pipeline.h"
@@ -24,7 +23,7 @@ DEFINE_string(cmvn_file, "", "read cmvn");
 DEFINE_double(streaming_chunk, 0.1, "streaming feature chunk size");
 DEFINE_bool(convert2PCM32, true, "audio convert to pcm32");
 DEFINE_string(model_path, "avg_1.jit.pdmodel", "paddle nnet model");
-DEFINE_string(params_path, "avg_1.jit.pdiparams", "paddle nnet model param");
+DEFINE_string(param_path, "avg_1.jit.pdiparams", "paddle nnet model param");
 DEFINE_string(word_symbol_table, "words.txt", "word symbol table");
 DEFINE_string(graph_path, "TLG", "decoder graph");
 DEFINE_double(acoustic_scale, 1.0, "acoustic scale");
@@ -70,7 +69,7 @@ FeaturePipelineOptions InitFeaturePipelineOptions() {
 ModelOptions InitModelOptions() {
     ModelOptions model_opts;
     model_opts.model_path = FLAGS_model_path;
-    model_opts.params_path = FLAGS_params_path;
+    model_opts.param_path = FLAGS_param_path;
     model_opts.cache_shape = FLAGS_model_cache_names;
     model_opts.output_names = FLAGS_model_output_names;
     return model_opts;
diff --git a/speechx/speechx/nnet/paddle_nnet.cc b/speechx/speechx/nnet/paddle_nnet.cc
index 5c4da11a..f8e1f697 100644
--- a/speechx/speechx/nnet/paddle_nnet.cc
+++ b/speechx/speechx/nnet/paddle_nnet.cc
@@ -49,7 +49,7 @@ void PaddleNnet::InitCacheEncouts(const ModelOptions& opts) {
 
 PaddleNnet::PaddleNnet(const ModelOptions& opts) : opts_(opts) {
     paddle_infer::Config config;
-    config.SetModel(opts.model_path, opts.params_path);
+    config.SetModel(opts.model_path, opts.param_path);
     if (opts.use_gpu) {
         config.EnableUseGpu(500, 0);
     }
diff --git a/speechx/speechx/nnet/paddle_nnet.h b/speechx/speechx/nnet/paddle_nnet.h
index 906994d0..8b4ed478 100644
--- a/speechx/speechx/nnet/paddle_nnet.h
+++ b/speechx/speechx/nnet/paddle_nnet.h
@@ -11,25 +11,19 @@
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
-
-
 #pragma once
-
-
+#include <numeric>
+#include "base/common.h"
 #include "kaldi/matrix/kaldi-matrix.h"
 #include "kaldi/util/options-itf.h"
-
-#include "base/common.h"
 #include "nnet/nnet_itf.h"
 #include "paddle_inference_api.h"
 
-#include <numeric>
-
 namespace ppspeech {
 
 struct ModelOptions {
     std::string model_path;
-    std::string params_path;
+    std::string param_path;
     int thread_num;
     bool use_gpu;
     bool switch_ir_optim;
@@ -41,7 +35,7 @@ struct ModelOptions {
     bool enable_profile;
     ModelOptions()
         : model_path("avg_1.jit.pdmodel"),
-          params_path("avg_1.jit.pdiparams"),
+          param_path("avg_1.jit.pdiparams"),
           thread_num(2),
           use_gpu(false),
           input_names(
@@ -59,7 +53,7 @@ struct ModelOptions {
 
     void Register(kaldi::OptionsItf* opts) {
         opts->Register("model-path", &model_path, "model file path");
-        opts->Register("model-params", &params_path, "params model file path");
+        opts->Register("model-param", &param_path, "params model file path");
         opts->Register("thread-num", &thread_num, "thread num");
         opts->Register("use-gpu", &use_gpu, "if use gpu");
         opts->Register("input-names", &input_names, "paddle input names");
diff --git a/utils/format_rsl.py b/utils/format_rsl.py
index 1a714253..8230416c 100644
--- a/utils/format_rsl.py
+++ b/utils/format_rsl.py
@@ -12,6 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import argparse
+
 import jsonlines