ds2 wenetspeech to onnx and support streaming asr server

pull/2045/head
Hui Zhang 2 years ago
parent f2e64beadc
commit 59a78f2a46

@ -11,7 +11,7 @@ port: 8090
# protocol = ['websocket'] (only one can be selected). # protocol = ['websocket'] (only one can be selected).
# websocket only support online engine type. # websocket only support online engine type.
protocol: 'websocket' protocol: 'websocket'
engine_list: ['asr_online-inference'] engine_list: ['asr_online-onnx']
################################################################################# #################################################################################
@ -21,7 +21,7 @@ engine_list: ['asr_online-inference']
################################### ASR ######################################### ################################### ASR #########################################
################### speech task: asr; engine_type: online-inference ####################### ################### speech task: asr; engine_type: online-inference #######################
asr_online-inference: asr_online-inference:
model_type: 'deepspeech2online_aishell' model_type: 'deepspeech2online_wenetspeech'
am_model: # the pdmodel file of am static model [optional] am_model: # the pdmodel file of am static model [optional]
am_params: # the pdiparams file of am static model [optional] am_params: # the pdiparams file of am static model [optional]
lang: 'zh' lang: 'zh'
@ -53,7 +53,7 @@ asr_online-inference:
################################### ASR ######################################### ################################### ASR #########################################
################### speech task: asr; engine_type: online-onnx ####################### ################### speech task: asr; engine_type: online-onnx #######################
asr_online-onnx: asr_online-onnx:
model_type: 'deepspeech2online_aishell' model_type: 'deepspeech2online_wenetspeech'
am_model: # the pdmodel file of onnx am static model [optional] am_model: # the pdmodel file of onnx am static model [optional]
am_params: # the pdiparams file of am static model [optional] am_params: # the pdiparams file of am static model [optional]
lang: 'zh' lang: 'zh'

@ -0,0 +1,40 @@
#!/usr/bin/env python3
import argparse
if __name__ == '__main__':
parser = argparse.ArgumentParser(prog=__doc__)
parser.add_argument(
'--logfile', type=str, required=True, help='ws client log file')
args = parser.parse_args()
rtfs = []
with open(args.logfile, 'r') as f:
for line in f:
if 'RTF=' in line:
# udio duration: 6.126, elapsed time: 3.471978187561035, RTF=0.5667610492264177
line = line.strip()
beg = line.index("audio")
line = line[beg:]
items = line.split(',')
vals = []
for elem in items:
if "RTF=" in elem:
continue
_, val = elem.split(":")
vals.append(eval(val))
keys = ['T', 'P']
meta = dict(zip(keys, vals))
rtfs.append(meta)
T = 0.0
P = 0.0
n = 0
for m in rtfs:
n += 1
T += m['T']
P += m['P']
print(f"RTF: {P/T}, utts: {n}")

@ -0,0 +1,21 @@
#!/bin/bash
if [ $# != 1 ];then
echo "usage: $0 wav_scp"
exit -1
fi
scp=$1
# calc RTF
# wav_scp can generate from `speechx/examples/ds2_ol/aishell`
exp=exp
mkdir -p $exp
python3 local/websocket_client.py --server_ip 127.0.0.1 --port 8090 --wavscp $scp &> $exp/log.rsl
python3 local/rtf_from_log.py --logfile $exp/log.rsl

@ -1,3 +1,4 @@
#!/usr/bin/python
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
# #
# Licensed under the Apache License, Version 2.0 (the "License"); # Licensed under the Apache License, Version 2.0 (the "License");
@ -11,9 +12,7 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
#!/usr/bin/python # calc avg RTF(NOT Accurate): grep -rn RTF log.txt | awk '{print $NF}' | awk -F "=" '{sum += $NF} END {print "all time",sum, "audio num", NR, "RTF", sum/NR}'
# -*- coding: UTF-8 -*-
# script for calc RTF: grep -rn RTF log.txt | awk '{print $NF}' | awk -F "=" '{sum += $NF} END {print "all time",sum, "audio num", NR, "RTF", sum/NR}'
import argparse import argparse
import asyncio import asyncio
import codecs import codecs

@ -25,10 +25,10 @@ from typing import Dict
import paddle import paddle
import requests import requests
import soundfile as sf
import yaml import yaml
from paddle.framework import load from paddle.framework import load
import paddlespeech.audio
from . import download from . import download
from .entry import commands from .entry import commands
try: try:
@ -282,7 +282,8 @@ def _note_one_stat(cls_name, params={}):
if 'audio_file' in params: if 'audio_file' in params:
try: try:
_, sr = paddlespeech.audio.load(params['audio_file']) # recursive import cased by: utils.DATA_HOME
_, sr = sf.read(params['audio_file'])
except Exception: except Exception:
sr = -1 sr = -1

@ -135,15 +135,21 @@ asr_dynamic_pretrained_models = {
}, },
}, },
"deepspeech2online_wenetspeech-zh-16k": { "deepspeech2online_wenetspeech-zh-16k": {
'1.0': { '1.0.3': {
'url': 'url':
'https://paddlespeech.bj.bcebos.com/s2t/wenetspeech/asr0/asr0_deepspeech2_online_wenetspeech_ckpt_1.0.2.model.tar.gz', 'http://paddlespeech.bj.bcebos.com/s2t/wenetspeech/asr0/asr0_deepspeech2_online_wenetspeech_ckpt_1.0.3.model.tar.gz',
'md5': 'md5':
'b0c77e7f8881e0a27b82127d1abb8d5f', 'cfe273793e68f790f742b411c98bc75e',
'cfg_path': 'cfg_path':
'model.yaml', 'model.yaml',
'ckpt_path': 'ckpt_path':
'exp/deepspeech2_online/checkpoints/avg_10', 'exp/deepspeech2_online/checkpoints/avg_10',
'model':
'exp/deepspeech2_online/checkpoints/avg_10.jit.pdmodel',
'params':
'exp/deepspeech2_online/checkpoints/avg_10.jit.pdiparams',
'onnx_model':
'onnx/model.onnx',
'lm_url': 'lm_url':
'https://deepspeech.bj.bcebos.com/zh_lm/zh_giga.no_cna_cmn.prune01244.klm', 'https://deepspeech.bj.bcebos.com/zh_lm/zh_giga.no_cna_cmn.prune01244.klm',
'lm_md5': 'lm_md5':
@ -170,14 +176,22 @@ asr_dynamic_pretrained_models = {
'1.0.2': { '1.0.2': {
'url': 'url':
'http://paddlespeech.bj.bcebos.com/s2t/aishell/asr0/asr0_deepspeech2_online_aishell_fbank161_ckpt_1.0.2.model.tar.gz', 'http://paddlespeech.bj.bcebos.com/s2t/aishell/asr0/asr0_deepspeech2_online_aishell_fbank161_ckpt_1.0.2.model.tar.gz',
'md5': '4dd42cfce9aaa54db0ec698da6c48ec5', 'md5':
'cfg_path': 'model.yaml', '4dd42cfce9aaa54db0ec698da6c48ec5',
'ckpt_path':'exp/deepspeech2_online/checkpoints/avg_1', 'cfg_path':
'model':'exp/deepspeech2_online/checkpoints/avg_1.jit.pdmodel', 'model.yaml',
'params':'exp/deepspeech2_online/checkpoints/avg_1.jit.pdiparams', 'ckpt_path':
'onnx_model': 'onnx/model.onnx', 'exp/deepspeech2_online/checkpoints/avg_1',
'lm_url':'https://deepspeech.bj.bcebos.com/zh_lm/zh_giga.no_cna_cmn.prune01244.klm', 'model':
'lm_md5':'29e02312deb2e59b3c8686c7966d4fe3' 'exp/deepspeech2_online/checkpoints/avg_1.jit.pdmodel',
'params':
'exp/deepspeech2_online/checkpoints/avg_1.jit.pdiparams',
'onnx_model':
'onnx/model.onnx',
'lm_url':
'https://deepspeech.bj.bcebos.com/zh_lm/zh_giga.no_cna_cmn.prune01244.klm',
'lm_md5':
'29e02312deb2e59b3c8686c7966d4fe3'
}, },
}, },
"deepspeech2offline_librispeech-en-16k": { "deepspeech2offline_librispeech-en-16k": {
@ -241,14 +255,44 @@ asr_static_pretrained_models = {
'1.0.2': { '1.0.2': {
'url': 'url':
'http://paddlespeech.bj.bcebos.com/s2t/aishell/asr0/asr0_deepspeech2_online_aishell_fbank161_ckpt_1.0.2.model.tar.gz', 'http://paddlespeech.bj.bcebos.com/s2t/aishell/asr0/asr0_deepspeech2_online_aishell_fbank161_ckpt_1.0.2.model.tar.gz',
'md5': '4dd42cfce9aaa54db0ec698da6c48ec5', 'md5':
'cfg_path': 'model.yaml', '4dd42cfce9aaa54db0ec698da6c48ec5',
'ckpt_path':'exp/deepspeech2_online/checkpoints/avg_1', 'cfg_path':
'model':'exp/deepspeech2_online/checkpoints/avg_1.jit.pdmodel', 'model.yaml',
'params':'exp/deepspeech2_online/checkpoints/avg_1.jit.pdiparams', 'ckpt_path':
'onnx_model': 'onnx/model.onnx', 'exp/deepspeech2_online/checkpoints/avg_1',
'lm_url':'https://deepspeech.bj.bcebos.com/zh_lm/zh_giga.no_cna_cmn.prune01244.klm', 'model':
'lm_md5':'29e02312deb2e59b3c8686c7966d4fe3' 'exp/deepspeech2_online/checkpoints/avg_1.jit.pdmodel',
'params':
'exp/deepspeech2_online/checkpoints/avg_1.jit.pdiparams',
'onnx_model':
'onnx/model.onnx',
'lm_url':
'https://deepspeech.bj.bcebos.com/zh_lm/zh_giga.no_cna_cmn.prune01244.klm',
'lm_md5':
'29e02312deb2e59b3c8686c7966d4fe3'
},
},
"deepspeech2online_wenetspeech-zh-16k": {
'1.0.3': {
'url':
'http://paddlespeech.bj.bcebos.com/s2t/wenetspeech/asr0/asr0_deepspeech2_online_wenetspeech_ckpt_1.0.3.model.tar.gz',
'md5':
'cfe273793e68f790f742b411c98bc75e',
'cfg_path':
'model.yaml',
'ckpt_path':
'exp/deepspeech2_online/checkpoints/avg_10',
'model':
'exp/deepspeech2_online/checkpoints/avg_10.jit.pdmodel',
'params':
'exp/deepspeech2_online/checkpoints/avg_10.jit.pdiparams',
'onnx_model':
'onnx/model.onnx',
'lm_url':
'https://deepspeech.bj.bcebos.com/zh_lm/zh_giga.no_cna_cmn.prune01244.klm',
'lm_md5':
'29e02312deb2e59b3c8686c7966d4fe3'
}, },
}, },
} }
@ -258,14 +302,44 @@ asr_onnx_pretrained_models = {
'1.0.2': { '1.0.2': {
'url': 'url':
'http://paddlespeech.bj.bcebos.com/s2t/aishell/asr0/asr0_deepspeech2_online_aishell_fbank161_ckpt_1.0.2.model.tar.gz', 'http://paddlespeech.bj.bcebos.com/s2t/aishell/asr0/asr0_deepspeech2_online_aishell_fbank161_ckpt_1.0.2.model.tar.gz',
'md5': '4dd42cfce9aaa54db0ec698da6c48ec5', 'md5':
'cfg_path': 'model.yaml', '4dd42cfce9aaa54db0ec698da6c48ec5',
'ckpt_path':'exp/deepspeech2_online/checkpoints/avg_1', 'cfg_path':
'model':'exp/deepspeech2_online/checkpoints/avg_1.jit.pdmodel', 'model.yaml',
'params':'exp/deepspeech2_online/checkpoints/avg_1.jit.pdiparams', 'ckpt_path':
'onnx_model': 'onnx/model.onnx', 'exp/deepspeech2_online/checkpoints/avg_1',
'lm_url':'https://deepspeech.bj.bcebos.com/zh_lm/zh_giga.no_cna_cmn.prune01244.klm', 'model':
'lm_md5':'29e02312deb2e59b3c8686c7966d4fe3' 'exp/deepspeech2_online/checkpoints/avg_1.jit.pdmodel',
'params':
'exp/deepspeech2_online/checkpoints/avg_1.jit.pdiparams',
'onnx_model':
'onnx/model.onnx',
'lm_url':
'https://deepspeech.bj.bcebos.com/zh_lm/zh_giga.no_cna_cmn.prune01244.klm',
'lm_md5':
'29e02312deb2e59b3c8686c7966d4fe3'
},
},
"deepspeech2online_wenetspeech-zh-16k": {
'1.0.3': {
'url':
'http://paddlespeech.bj.bcebos.com/s2t/wenetspeech/asr0/asr0_deepspeech2_online_wenetspeech_ckpt_1.0.3.model.tar.gz',
'md5':
'cfe273793e68f790f742b411c98bc75e',
'cfg_path':
'model.yaml',
'ckpt_path':
'exp/deepspeech2_online/checkpoints/avg_10',
'model':
'exp/deepspeech2_online/checkpoints/avg_10.jit.pdmodel',
'params':
'exp/deepspeech2_online/checkpoints/avg_10.jit.pdiparams',
'onnx_model':
'onnx/model.onnx',
'lm_url':
'https://deepspeech.bj.bcebos.com/zh_lm/zh_giga.no_cna_cmn.prune01244.klm',
'lm_md5':
'29e02312deb2e59b3c8686c7966d4fe3'
}, },
}, },
} }

@ -11,7 +11,7 @@ port: 8090
# protocol = ['websocket'] (only one can be selected). # protocol = ['websocket'] (only one can be selected).
# websocket only support online engine type. # websocket only support online engine type.
protocol: 'websocket' protocol: 'websocket'
engine_list: ['asr_online-inference'] engine_list: ['asr_online-onnx']
################################################################################# #################################################################################
@ -21,7 +21,7 @@ engine_list: ['asr_online-inference']
################################### ASR ######################################### ################################### ASR #########################################
################### speech task: asr; engine_type: online-inference ####################### ################### speech task: asr; engine_type: online-inference #######################
asr_online-inference: asr_online-inference:
model_type: 'deepspeech2online_aishell' model_type: 'deepspeech2online_wenetspeech'
am_model: # the pdmodel file of am static model [optional] am_model: # the pdmodel file of am static model [optional]
am_params: # the pdiparams file of am static model [optional] am_params: # the pdiparams file of am static model [optional]
lang: 'zh' lang: 'zh'
@ -53,7 +53,7 @@ asr_online-inference:
################################### ASR ######################################### ################################### ASR #########################################
################### speech task: asr; engine_type: online-onnx ####################### ################### speech task: asr; engine_type: online-onnx #######################
asr_online-onnx: asr_online-onnx:
model_type: 'deepspeech2online_aishell' model_type: 'deepspeech2online_wenetspeech'
am_model: # the pdmodel file of onnx am static model [optional] am_model: # the pdmodel file of onnx am static model [optional]
am_params: # the pdiparams file of am static model [optional] am_params: # the pdiparams file of am static model [optional]
lang: 'zh' lang: 'zh'

@ -9,7 +9,7 @@ Please make sure [Paddle2ONNX](https://github.com/PaddlePaddle/Paddle2ONNX) and
The example test with these packages installed: The example test with these packages installed:
``` ```
paddle2onnx 0.9.8rc0 # develop af4354b4e9a61a93be6490640059a02a4499bc7a paddle2onnx 0.9.8 # develop 62c5424e22cd93968dc831216fc9e0f0fce3d819
paddleaudio 0.2.1 paddleaudio 0.2.1
paddlefsl 1.1.0 paddlefsl 1.1.0
paddlenlp 2.2.6 paddlenlp 2.2.6

@ -492,6 +492,8 @@ class SymbolicShapeInference:
skip_infer = node.op_type in [ skip_infer = node.op_type in [
'If', 'Loop', 'Scan', 'SplitToSequence', 'ZipMap', \ 'If', 'Loop', 'Scan', 'SplitToSequence', 'ZipMap', \
# contrib ops # contrib ops
'Attention', 'BiasGelu', \ 'Attention', 'BiasGelu', \
'EmbedLayerNormalization', \ 'EmbedLayerNormalization', \
'FastGelu', 'Gelu', 'LayerNormalization', \ 'FastGelu', 'Gelu', 'LayerNormalization', \

@ -0,0 +1,45 @@
#!/usr/bin/env python3
import argparse
import onnxruntime as ort
# onnxruntime optimizer.
# https://onnxruntime.ai/docs/performance/graph-optimizations.html
# https://onnxruntime.ai/docs/api/python/api_summary.html#api
def parse_arguments():
parser = argparse.ArgumentParser()
parser.add_argument(
'--model_in', required=True, type=str, help='Path to onnx model.')
parser.add_argument(
'--opt_level',
required=True,
type=int,
default=0,
choices=[0, 1, 2],
help='Path to onnx model.')
parser.add_argument(
'--model_out', required=True, help='path to save the optimized model.')
parser.add_argument('--debug', default=False, help='output debug info.')
return parser.parse_args()
if __name__ == '__main__':
args = parse_arguments()
sess_options = ort.SessionOptions()
# Set graph optimization level
print(f"opt level: {args.opt_level}")
if args.opt_level == 0:
sess_options.graph_optimization_level = ort.GraphOptimizationLevel.ORT_ENABLE_BASIC
elif args.opt_level == 1:
sess_options.graph_optimization_level = ort.GraphOptimizationLevel.ORT_ENABLE_EXTENDED
else:
sess_options.graph_optimization_level = ort.GraphOptimizationLevel.ORT_ENABLE_ALL
# To enable model serialization after graph optimization set this
sess_options.optimized_model_filepath = args.model_out
session = ort.InferenceSession(args.model_in, sess_options)

@ -5,10 +5,11 @@ set -e
. path.sh . path.sh
stage=0 stage=0
stop_stage=100 stop_stage=50
#tarfile=asr0_deepspeech2_online_wenetspeech_ckpt_1.0.2.model.tar.gz tarfile=asr0_deepspeech2_online_wenetspeech_ckpt_1.0.2.model.tar.gz
tarfile=asr0_deepspeech2_online_aishell_fbank161_ckpt_1.0.1.model.tar.gz #tarfile=asr0_deepspeech2_online_aishell_fbank161_ckpt_1.0.1.model.tar.gz
model_prefix=avg_1.jit model_prefix=avg_10.jit
#model_prefix=avg_1.jit
model=${model_prefix}.pdmodel model=${model_prefix}.pdmodel
param=${model_prefix}.pdiparams param=${model_prefix}.pdiparams
@ -80,6 +81,14 @@ if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ];then
fi fi
if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ] ;then
# ort graph optmize
./local/ort_opt.py --model_in $exp/model.onnx --opt_level 0 --model_out $exp/model.ort.opt.onnx
./local/infer_check.py --input_file $input_file --model_type $model_type --model_dir $dir --model_prefix $model_prefix --onnx_model $exp/model.ort.opt.onnx
fi
# aishell rnn hidden is 1024 # aishell rnn hidden is 1024
# wenetspeech rnn hiddn is 2048 # wenetspeech rnn hiddn is 2048
if [ $model_type == 'aishell' ];then if [ $model_type == 'aishell' ];then
@ -92,7 +101,7 @@ else
fi fi
if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ] ;then if [ ${stage} -le 51 ] && [ ${stop_stage} -ge 51 ] ;then
# wenetspeech ds2 model execed 2GB limit, will error. # wenetspeech ds2 model execed 2GB limit, will error.
# simplifying onnx model # simplifying onnx model
./local/onnx_opt.sh $exp/model.onnx $exp/model.opt.onnx "$input_shape" ./local/onnx_opt.sh $exp/model.onnx $exp/model.opt.onnx "$input_shape"

Loading…
Cancel
Save