From a01c163dc359176fc2a71ea8a7e94db624c7f503 Mon Sep 17 00:00:00 2001 From: Hui Zhang Date: Tue, 29 Nov 2022 11:30:01 +0800 Subject: [PATCH] [speechx] more doc of speechx u2 and ds2 onnx (#2692) * more doc of speechx u2 onnx --- speechx/examples/ds2_ol/onnx/README.md | 23 ++++--- .../examples/u2pp_ol/wenetspeech/README.md | 62 +++++++++++++++++-- speechx/examples/u2pp_ol/wenetspeech/run.sh | 3 + 3 files changed, 72 insertions(+), 16 deletions(-) diff --git a/speechx/examples/ds2_ol/onnx/README.md b/speechx/examples/ds2_ol/onnx/README.md index e6ab953c..b98b74b6 100644 --- a/speechx/examples/ds2_ol/onnx/README.md +++ b/speechx/examples/ds2_ol/onnx/README.md @@ -1,11 +1,8 @@ -# DeepSpeech2 to ONNX model +# Convert DeepSpeech2 model to ONNX format -1. convert deepspeech2 model to ONNX, using Paddle2ONNX. -2. check paddleinference and onnxruntime output equal. -3. optimize onnx model -4. check paddleinference and optimized onnxruntime output equal. -5. quantize onnx model -4. check paddleinference and optimized onnxruntime output equal. +> We recommend using U2/U2++ model instead of DS2, please see [here](../../u2pp_ol/wenetspeech/). + +This example demonstrate converting ds2 model to ONNX fromat. Please make sure [Paddle2ONNX](https://github.com/PaddlePaddle/Paddle2ONNX) and [onnx-simplifier](https://github.com/zh794390558/onnx-simplifier/tree/dyn_time_shape) version is correct. @@ -25,18 +22,24 @@ onnxoptimizer 0.2.7 onnxruntime 1.11.0 ``` + ## Using ``` bash run.sh --stage 0 --stop_stage 5 ``` +1. convert deepspeech2 model to ONNX, using Paddle2ONNX. +2. check paddleinference and onnxruntime output equal. +3. optimize onnx model +4. check paddleinference and optimized onnxruntime output equal. +5. quantize onnx model +6. check paddleinference and optimized onnxruntime output equal. + For more details please see `run.sh`. ## Outputs -The optimized onnx model is `exp/model.opt.onnx`, quanted model is `$exp/model.optset11.quant.onnx`. - -To show the graph, please using `local/netron.sh`. +The optimized onnx model is `exp/model.opt.onnx`, quanted model is `exp/model.optset11.quant.onnx`. ## [Results](https://github.com/PaddlePaddle/PaddleSpeech/wiki/ASR-Benchmark#streaming-asr) diff --git a/speechx/examples/u2pp_ol/wenetspeech/README.md b/speechx/examples/u2pp_ol/wenetspeech/README.md index b90b8e20..6ca8f6dd 100644 --- a/speechx/examples/u2pp_ol/wenetspeech/README.md +++ b/speechx/examples/u2pp_ol/wenetspeech/README.md @@ -1,27 +1,77 @@ -# u2/u2pp Streaming ASR +# U2/U2++ Streaming ASR + +A C++ deployment example for `PaddleSpeech/examples/wenetspeech/asr1` recipe. The model is static model from `export`, how to export model please see [here](../../../../examples/wenetspeech/asr1/). If you want using exported model, `run.sh` will download it, for the model link please see `run.sh`. + +This example will demonstrate how to using the u2/u2++ model to recognize `wav` and compute `CER`. We using AISHELL-1 as test data. ## Testing with Aishell Test Data -### Download wav and model +### Source `path.sh` first + +```bash +source path.sh +``` + +All bins are under `echo $SPEECHX_BUILD` dir. + +### Download dataset and model ``` ./run.sh --stop_stage 0 ``` -### compute feature +### process `cmvn` and compute feature -``` +```bash ./run.sh --stage 1 --stop_stage 1 ``` -### decoding using feature +If you only want to convert `cmvn` file format, can using this cmd: + +```bash +./local/feat.sh --stage 1 --stop_stage 1 +``` + +### Decoding using `feature` input ``` ./run.sh --stage 2 --stop_stage 2 ``` -### decoding using wav +### Decoding using `wav` input ``` ./run.sh --stage 3 --stop_stage 3 ``` + +This stage using `u2_recognizer_main` to recognize wav file. + +The input is `scp` file which look like this: +```text +# head data/split1/1/aishell_test.scp +BAC009S0764W0121 /workspace/PaddleSpeech/speechx/examples/u2pp_ol/wenetspeech/data/test/S0764/BAC009S0764W0121.wav +BAC009S0764W0122 /workspace/PaddleSpeech/speechx/examples/u2pp_ol/wenetspeech/data/test/S0764/BAC009S0764W0122.wav +... +BAC009S0764W0125 /workspace/PaddleSpeech/speechx/examples/u2pp_ol/wenetspeech/data/test/S0764/BAC009S0764W0125.wav +``` + +If you want to recognize one wav, you can make `scp` file like this: +```text +key path/to/wav/file +``` + +Then specify `--wav_rspecifier=` param for `u2_recognizer_main` bin. For other flags meaning, please see `help`: +```bash +u2_recognizer_main --help +``` + +The exmaple using `u2_recgonize_main` bin please see `local/recognizer.sh`. + +### Decoding with `wav` using quant model + +`local/recognizer_quant.sh` is same to `local/recognizer.sh`, but using quanted model. + + +## Results + +Please see [here](./RESULTS.md). diff --git a/speechx/examples/u2pp_ol/wenetspeech/run.sh b/speechx/examples/u2pp_ol/wenetspeech/run.sh index 870c5dee..711d6808 100755 --- a/speechx/examples/u2pp_ol/wenetspeech/run.sh +++ b/speechx/examples/u2pp_ol/wenetspeech/run.sh @@ -72,13 +72,16 @@ fi if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then + # process cmvn and compute fbank feat ./local/feat.sh fi if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then + # decode with fbank feat input ./local/decode.sh fi if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then + # decode with wav input ./loca/recognizer.sh fi