From a01c163dc359176fc2a71ea8a7e94db624c7f503 Mon Sep 17 00:00:00 2001
From: Hui Zhang <zhtclz@foxmail.com>
Date: Tue, 29 Nov 2022 11:30:01 +0800
Subject: [PATCH] [speechx] more doc of speechx u2 and  ds2 onnx (#2692)

* more doc of speechx u2 onnx
---
 speechx/examples/ds2_ol/onnx/README.md        | 23 ++++---
 .../examples/u2pp_ol/wenetspeech/README.md    | 62 +++++++++++++++++--
 speechx/examples/u2pp_ol/wenetspeech/run.sh   |  3 +
 3 files changed, 72 insertions(+), 16 deletions(-)

diff --git a/speechx/examples/ds2_ol/onnx/README.md b/speechx/examples/ds2_ol/onnx/README.md
index e6ab953c..b98b74b6 100644
--- a/speechx/examples/ds2_ol/onnx/README.md
+++ b/speechx/examples/ds2_ol/onnx/README.md
@@ -1,11 +1,8 @@
-# DeepSpeech2 to ONNX model
+# Convert DeepSpeech2 model to ONNX format
 
-1. convert deepspeech2 model to ONNX, using Paddle2ONNX.
-2. check paddleinference and onnxruntime output equal.
-3. optimize onnx model
-4. check paddleinference and optimized onnxruntime output equal.
-5. quantize onnx model
-4. check paddleinference and optimized onnxruntime output equal.
+> We recommend using U2/U2++ model instead of DS2, please see [here](../../u2pp_ol/wenetspeech/).
+
+This example demonstrate converting ds2 model to ONNX fromat.
 
 Please make sure [Paddle2ONNX](https://github.com/PaddlePaddle/Paddle2ONNX) and [onnx-simplifier](https://github.com/zh794390558/onnx-simplifier/tree/dyn_time_shape) version is correct.
 
@@ -25,18 +22,24 @@ onnxoptimizer            0.2.7
 onnxruntime              1.11.0
 ```
 
+
 ## Using
 
 ```
 bash run.sh --stage 0 --stop_stage 5
 ```
 
+1. convert deepspeech2 model to ONNX, using Paddle2ONNX.
+2. check paddleinference and onnxruntime output equal.
+3. optimize onnx model
+4. check paddleinference and optimized onnxruntime output equal.
+5. quantize onnx model
+6. check paddleinference and optimized onnxruntime output equal.
+
 For more details please see `run.sh`.
 
 ## Outputs
-The optimized onnx model is `exp/model.opt.onnx`, quanted model is `$exp/model.optset11.quant.onnx`.
-
-To show the graph, please using `local/netron.sh`.
+The optimized onnx model is `exp/model.opt.onnx`, quanted model is `exp/model.optset11.quant.onnx`.
 
 
 ## [Results](https://github.com/PaddlePaddle/PaddleSpeech/wiki/ASR-Benchmark#streaming-asr)
diff --git a/speechx/examples/u2pp_ol/wenetspeech/README.md b/speechx/examples/u2pp_ol/wenetspeech/README.md
index b90b8e20..6ca8f6dd 100644
--- a/speechx/examples/u2pp_ol/wenetspeech/README.md
+++ b/speechx/examples/u2pp_ol/wenetspeech/README.md
@@ -1,27 +1,77 @@
-# u2/u2pp Streaming ASR 
+# U2/U2++ Streaming ASR 
+
+A C++ deployment example for `PaddleSpeech/examples/wenetspeech/asr1` recipe. The model is static model from `export`, how to export model please see [here](../../../../examples/wenetspeech/asr1/). If you want using exported model, `run.sh` will download it, for the model link please see `run.sh`.
+
+This example will demonstrate how to using the u2/u2++ model to recognize `wav` and compute `CER`. We using AISHELL-1 as test data.
 
 ## Testing with Aishell Test Data
 
-### Download wav and model
+### Source `path.sh` first 
+
+```bash 
+source path.sh
+```
+
+All bins are under `echo $SPEECHX_BUILD` dir.
+
+### Download dataset and model
 
 ```
 ./run.sh --stop_stage 0
 ```
 
-### compute feature
+### process `cmvn` and compute feature
 
-```
+```bash
 ./run.sh --stage 1 --stop_stage 1
 ```
 
-### decoding using feature
+If you only want to convert `cmvn` file format, can using this cmd:
+
+```bash 
+./local/feat.sh --stage 1 --stop_stage 1
+```
+
+### Decoding using `feature` input
 
 ```
 ./run.sh --stage 2 --stop_stage 2
 ```
 
-### decoding using wav
+### Decoding using `wav` input
 
 ```
 ./run.sh --stage 3 --stop_stage 3
 ```
+
+This stage using `u2_recognizer_main` to recognize wav file.
+
+The input is `scp` file which look like this:
+```text
+# head data/split1/1/aishell_test.scp 
+BAC009S0764W0121        /workspace/PaddleSpeech/speechx/examples/u2pp_ol/wenetspeech/data/test/S0764/BAC009S0764W0121.wav
+BAC009S0764W0122        /workspace/PaddleSpeech/speechx/examples/u2pp_ol/wenetspeech/data/test/S0764/BAC009S0764W0122.wav
+...
+BAC009S0764W0125        /workspace/PaddleSpeech/speechx/examples/u2pp_ol/wenetspeech/data/test/S0764/BAC009S0764W0125.wav
+```
+
+If you want to recognize one wav, you can make `scp` file like this:
+```text
+key  path/to/wav/file
+```
+
+Then specify `--wav_rspecifier=` param for `u2_recognizer_main` bin. For other flags meaning, please see `help`:
+```bash
+u2_recognizer_main --help
+```
+
+The exmaple using `u2_recgonize_main` bin please see `local/recognizer.sh`.
+
+### Decoding with `wav` using quant model
+
+`local/recognizer_quant.sh` is same to `local/recognizer.sh`, but using quanted model.
+
+
+## Results
+
+Please see [here](./RESULTS.md).
diff --git a/speechx/examples/u2pp_ol/wenetspeech/run.sh b/speechx/examples/u2pp_ol/wenetspeech/run.sh
index 870c5dee..711d6808 100755
--- a/speechx/examples/u2pp_ol/wenetspeech/run.sh
+++ b/speechx/examples/u2pp_ol/wenetspeech/run.sh
@@ -72,13 +72,16 @@ fi
 
 
 if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
+    # process cmvn and compute fbank feat
     ./local/feat.sh
 fi
 
 if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
+    # decode with fbank feat input
     ./local/decode.sh
 fi
 
 if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
+    # decode with wav input
     ./loca/recognizer.sh
 fi