fix param path name; ws client

3 years ago · c403a83820
parent ea5db3daa8
commit c403a83820
17 changed files with 92 additions and 92 deletions
--- a/demos/streaming_asr_server/README.md
+++ b/demos/streaming_asr_server/README.md
@ -352,4 +352,4 @@ wget -c https://paddlespeech.bj.bcebos.com/PaddleAudio/zh.wav
        [2022-04-21 15:59:08,024] [    INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给我带来了身体健康'}
        [2022-04-21 15:59:12,883] [    INFO] - final receive msg={'status': 'ok', 'signal': 'finished', 'asr_results': '我认为跑步最重要的就是给我带来了身体健康'}
        [2022-04-21 15:59:12,884] [    INFO] - 我认为跑步最重要的就是给我带来了身体健康
-  ```
+  ```
--- a/demos/streaming_asr_server/README_cn.md
+++ b/demos/streaming_asr_server/README_cn.md
@ -353,4 +353,4 @@ wget -c https://paddlespeech.bj.bcebos.com/PaddleAudio/zh.wav
        [2022-04-21 15:59:08,024] [    INFO] - receive msg={'asr_results': '我认为跑步最重要的就是给我带来了身体健康'}
        [2022-04-21 15:59:12,883] [    INFO] - final receive msg={'status': 'ok', 'signal': 'finished', 'asr_results': '我认为跑步最重要的就是给我带来了身体健康'}
        [2022-04-21 15:59:12,884] [    INFO] - 我认为跑步最重要的就是给我带来了身体健康
-  ```
+  ```
--- a/speechx/examples/ds2_ol/README.md
+++ b/speechx/examples/ds2_ol/README.md
@ -11,4 +11,3 @@ The below is for developing and offline testing:
 * nnet
 * feat
 * decoder
-
--- a/speechx/examples/ds2_ol/aishell/README.md
+++ b/speechx/examples/ds2_ol/aishell/README.md
@ -33,4 +33,4 @@ LM: [wenetspeech](http://paddlespeech.bj.bcebos.com/speechx/examples/ds2_ol/aish
 Overall -> 10.93 % N=104765 C=93410 S=9780 D=1575 I=95
 Mandarin -> 10.93 % N=104762 C=93410 S=9779 D=1573 I=95
 Other -> 100.00 % N=3 C=0 S=1 D=2 I=0
-```
+```
--- a/speechx/examples/ds2_ol/aishell/run.sh
+++ b/speechx/examples/ds2_ol/aishell/run.sh
@ -119,7 +119,7 @@ if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ]; then
        pushd $wfst
        wget -c https://paddlespeech.bj.bcebos.com/s2t/paddle_asr_online/aishell_graph.zip
        unzip aishell_graph.zip
-        mv aishell_graph/* 
+        mv aishell_graph/* $wfst
        popd
    fi

--- a/speechx/examples/ds2_ol/aishell/websocket_server.sh
+++ b/speechx/examples/ds2_ol/aishell/websocket_server.sh
@ -1,66 +0,0 @@
-#!/bin/bash
-set +x
-set -e
-
-. path.sh
-
-
-# 1. compile
-if [ ! -d ${SPEECHX_EXAMPLES} ]; then
-    pushd ${SPEECHX_ROOT} 
-    bash build.sh
-    popd
-fi
-
-# input
-mkdir -p data
-data=$PWD/data
-ckpt_dir=$data/model
-model_dir=$ckpt_dir/exp/deepspeech2_online/checkpoints/
-vocb_dir=$ckpt_dir/data/lang_char/
-
-# output
-aishell_wav_scp=aishell_test.scp
-if [ ! -d $data/test ]; then
-    pushd $data
-    wget -c https://paddlespeech.bj.bcebos.com/s2t/paddle_asr_online/aishell_test.zip
-    unzip  aishell_test.zip
-    popd
-
-    realpath $data/test/*/*.wav > $data/wavlist
-    awk -F '/' '{ print $(NF) }' $data/wavlist | awk -F '.' '{ print $1 }' > $data/utt_id
-    paste $data/utt_id $data/wavlist > $data/$aishell_wav_scp
-fi
-
-
-if [ ! -d $ckpt_dir ]; then
-    mkdir -p $ckpt_dir
-    wget -P $ckpt_dir -c https://paddlespeech.bj.bcebos.com/s2t/aishell/asr0/asr0_deepspeech2_online_aishell_ckpt_0.2.0.model.tar.gz
-    tar xzfv $ckpt_dir/asr0_deepspeech2_online_aishell_ckpt_0.2.0.model.tar.gz -C $ckpt_dir
-fi
-
-
-export GLOG_logtostderr=1
-
-# 3. gen cmvn 
-cmvn=$PWD/cmvn.ark
-cmvn-json2kaldi --json_file=$ckpt_dir/data/mean_std.json --cmvn_write_path=$cmvn
-
-text=$data/test/text
-graph_dir=./aishell_graph
-if [ ! -d $graph_dir ]; then
-    wget -c https://paddlespeech.bj.bcebos.com/s2t/paddle_asr_online/aishell_graph.zip
-    unzip aishell_graph.zip 
-fi
-
-# 5. test websocket server 
-websocket_server_main \
-    --cmvn_file=$cmvn \
-    --model_path=$model_dir/avg_1.jit.pdmodel \
-    --streaming_chunk=0.1 \
-    --convert2PCM32=true \
-    --params_path=$model_dir/avg_1.jit.pdiparams \
-    --word_symbol_table=$graph_dir/words.txt \
-    --model_output_names=softmax_0.tmp_0,tmp_5,concat_0.tmp_0,concat_1.tmp_0 \
-    --graph_path=$graph_dir/TLG.fst --max_active=7500 \
-    --acoustic_scale=1.2 
--- a/speechx/examples/ds2_ol/decoder/ctc-prefix-beam-search-decoder-ol.cc
+++ b/speechx/examples/ds2_ol/decoder/ctc-prefix-beam-search-decoder-ol.cc
@ -76,7 +76,7 @@ int main(int argc, char* argv[]) {

    ppspeech::ModelOptions model_opts;
    model_opts.model_path = model_path;
-    model_opts.params_path = model_params;
+    model_opts.param_path = model_params;
    model_opts.cache_shape = FLAGS_model_cache_names;
    model_opts.input_names = FLAGS_model_input_names;
    model_opts.output_names = FLAGS_model_output_names;
--- a/speechx/examples/ds2_ol/decoder/wfst-decoder-ol.cc
+++ b/speechx/examples/ds2_ol/decoder/wfst-decoder-ol.cc
@ -79,7 +79,7 @@ int main(int argc, char* argv[]) {

    ppspeech::ModelOptions model_opts;
    model_opts.model_path = model_graph;
-    model_opts.params_path = model_params;
+    model_opts.param_path = model_params;
    model_opts.cache_shape = FLAGS_model_cache_names;
    model_opts.input_names = FLAGS_model_input_names;
    model_opts.output_names = FLAGS_model_output_names;
--- a/speechx/examples/ds2_ol/websocket/.gitignore
+++ b/speechx/examples/ds2_ol/websocket/.gitignore
@ -0,0 +1,2 @@
+data
+exp
--- a/speechx/examples/ds2_ol/websocket/path.sh
+++ b/speechx/examples/ds2_ol/websocket/path.sh
@ -0,0 +1,14 @@
+# This contains the locations of binarys build required for running the examples.
+
+SPEECHX_ROOT=$PWD/../../..
+SPEECHX_EXAMPLES=$SPEECHX_ROOT/build/examples
+
+SPEECHX_TOOLS=$SPEECHX_ROOT/tools
+TOOLS_BIN=$SPEECHX_TOOLS/valgrind/install/bin
+
+[ -d $SPEECHX_EXAMPLES ] || { echo "Error: 'build/examples' directory not found. please ensure that the project build successfully"; }
+
+export LC_AL=C
+
+SPEECHX_BIN=$SPEECHX_EXAMPLES/ds2_ol/websocket:$SPEECHX_EXAMPLES/ds2_ol/feat
+export PATH=$PATH:$SPEECHX_BIN:$TOOLS_BIN
--- a/speechx/examples/ds2_ol/websocket/websocket_client.sh
+++ b/speechx/examples/ds2_ol/websocket/websocket_client.sh
--- a/speechx/examples/ds2_ol/websocket/websocket_client_main.cc
+++ b/speechx/examples/ds2_ol/websocket/websocket_client_main.cc
@ -26,8 +26,7 @@ using kaldi::int16;
 int main(int argc, char* argv[]) {
    gflags::ParseCommandLineFlags(&argc, &argv, false);
    google::InitGoogleLogging(argv[0]);
-    ppspeech::WebSocketClient client(FLAGS_host, FLAGS_port);
-
+    
    kaldi::SequentialTableReader<kaldi::WaveHolder> wav_reader(
        FLAGS_wav_rspecifier);

@ -36,6 +35,8 @@ int main(int argc, char* argv[]) {
    const int chunk_sample_size = streaming_chunk * sample_rate;

    for (; !wav_reader.Done(); wav_reader.Next()) {
+        ppspeech::WebSocketClient client(FLAGS_host, FLAGS_port);
+
        client.SendStartSignal();
        std::string utt = wav_reader.Key();
        const kaldi::WaveData& wave_data = wav_reader.Value();
@ -74,9 +75,8 @@ int main(int argc, char* argv[]) {
        std::string result = client.GetResult();
        LOG(INFO) << "utt: " << utt << " " << result;

-
        client.Join();
-        return 0;
    }
+
    return 0;
 }
--- a/speechx/examples/ds2_ol/websocket/websocket_server.sh
+++ b/speechx/examples/ds2_ol/websocket/websocket_server.sh
@ -0,0 +1,57 @@
+#!/bin/bash
+set +x
+set -e
+
+. path.sh
+
+
+# 1. compile
+if [ ! -d ${SPEECHX_EXAMPLES} ]; then
+    pushd ${SPEECHX_ROOT} 
+    bash build.sh
+    popd
+fi
+
+# input
+mkdir -p data
+data=$PWD/data
+ckpt_dir=$data/model
+model_dir=$ckpt_dir/exp/deepspeech2_online/checkpoints/
+vocb_dir=$ckpt_dir/data/lang_char/
+
+if [ ! -f $ckpt_dir/data/mean_std.json ]; then
+        mkdir -p $ckpt_dir
+        pushd $ckpt_dir
+        wget -c https://paddlespeech.bj.bcebos.com/s2t/aishell/asr0/asr0_deepspeech2_online_aishell_ckpt_0.2.0.model.tar.gz
+        tar xzfv asr0_deepspeech2_online_aishell_ckpt_0.2.0.model.tar.gz 
+        popd
+fi
+
+export GLOG_logtostderr=1
+
+# 3. gen cmvn 
+cmvn=$data/cmvn.ark
+cmvn-json2kaldi --json_file=$ckpt_dir/data/mean_std.json --cmvn_write_path=$cmvn
+
+
+wfst=$data/wfst/
+mkdir -p $wfst
+if [ ! -f $wfst/aishell_graph.zip ]; then
+    pushd $wfst
+    wget -c https://paddlespeech.bj.bcebos.com/s2t/paddle_asr_online/aishell_graph.zip
+    unzip aishell_graph.zip
+    mv aishell_graph/* $wfst
+    popd
+fi
+
+# 5. test websocket server 
+websocket_server_main \
+    --cmvn_file=$cmvn \
+    --model_path=$model_dir/avg_1.jit.pdmodel \
+    --streaming_chunk=0.1 \
+    --convert2PCM32=true \
+    --param_path=$model_dir/avg_1.jit.pdiparams \
+    --word_symbol_table=$data/wfst/words.txt \
+    --model_output_names=softmax_0.tmp_0,tmp_5,concat_0.tmp_0,concat_1.tmp_0 \
+    --graph_path=$data/wfst/TLG.fst --max_active=7500 \
+    --acoustic_scale=1.2 
--- a/speechx/speechx/decoder/param.h
+++ b/speechx/speechx/decoder/param.h
@ -15,7 +15,6 @@
 #pragma once

 #include "base/common.h"
-
 #include "decoder/ctc_beam_search_decoder.h"
 #include "decoder/ctc_tlg_decoder.h"
 #include "frontend/audio/feature_pipeline.h"
@ -24,7 +23,7 @@ DEFINE_string(cmvn_file, "", "read cmvn");
 DEFINE_double(streaming_chunk, 0.1, "streaming feature chunk size");
 DEFINE_bool(convert2PCM32, true, "audio convert to pcm32");
 DEFINE_string(model_path, "avg_1.jit.pdmodel", "paddle nnet model");
-DEFINE_string(params_path, "avg_1.jit.pdiparams", "paddle nnet model param");
+DEFINE_string(param_path, "avg_1.jit.pdiparams", "paddle nnet model param");
 DEFINE_string(word_symbol_table, "words.txt", "word symbol table");
 DEFINE_string(graph_path, "TLG", "decoder graph");
 DEFINE_double(acoustic_scale, 1.0, "acoustic scale");
@ -70,7 +69,7 @@ FeaturePipelineOptions InitFeaturePipelineOptions() {
 ModelOptions InitModelOptions() {
    ModelOptions model_opts;
    model_opts.model_path = FLAGS_model_path;
-    model_opts.params_path = FLAGS_params_path;
+    model_opts.param_path = FLAGS_param_path;
    model_opts.cache_shape = FLAGS_model_cache_names;
    model_opts.output_names = FLAGS_model_output_names;
    return model_opts;
--- a/speechx/speechx/nnet/paddle_nnet.cc
+++ b/speechx/speechx/nnet/paddle_nnet.cc
@ -49,7 +49,7 @@ void PaddleNnet::InitCacheEncouts(const ModelOptions& opts) {

 PaddleNnet::PaddleNnet(const ModelOptions& opts) : opts_(opts) {
    paddle_infer::Config config;
-    config.SetModel(opts.model_path, opts.params_path);
+    config.SetModel(opts.model_path, opts.param_path);
    if (opts.use_gpu) {
        config.EnableUseGpu(500, 0);
    }
--- a/speechx/speechx/nnet/paddle_nnet.h
+++ b/speechx/speechx/nnet/paddle_nnet.h
@ -11,25 +11,19 @@
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
-
-
 #pragma once
-
-
+#include <numeric>
+#include "base/common.h"
 #include "kaldi/matrix/kaldi-matrix.h"
 #include "kaldi/util/options-itf.h"
-
-#include "base/common.h"
 #include "nnet/nnet_itf.h"
 #include "paddle_inference_api.h"

-#include <numeric>
-
 namespace ppspeech {

 struct ModelOptions {
    std::string model_path;
-    std::string params_path;
+    std::string param_path;
    int thread_num;
    bool use_gpu;
    bool switch_ir_optim;
@ -41,7 +35,7 @@ struct ModelOptions {
    bool enable_profile;
    ModelOptions()
        : model_path("avg_1.jit.pdmodel"),
-          params_path("avg_1.jit.pdiparams"),
+          param_path("avg_1.jit.pdiparams"),
          thread_num(2),
          use_gpu(false),
          input_names(
@ -59,7 +53,7 @@ struct ModelOptions {

    void Register(kaldi::OptionsItf* opts) {
        opts->Register("model-path", &model_path, "model file path");
-        opts->Register("model-params", &params_path, "params model file path");
+        opts->Register("model-param", &param_path, "params model file path");
        opts->Register("thread-num", &thread_num, "thread num");
        opts->Register("use-gpu", &use_gpu, "if use gpu");
        opts->Register("input-names", &input_names, "paddle input names");
--- a/utils/format_rsl.py
+++ b/utils/format_rsl.py
@ -12,6 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import argparse
+
 import jsonlines