From c0a0ec11ceb9fb352bce98db545531df02d80fbe Mon Sep 17 00:00:00 2001 From: huangyuxin Date: Thu, 29 Jul 2021 05:03:51 +0000 Subject: [PATCH 1/2] =?UTF-8?q?=E8=B7=91=E9=80=9A=E4=BA=86deeppseech=5Fonl?= =?UTF-8?q?ine=E7=9A=84=E6=B5=81=E7=A8=8B?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- deepspeech/exps/deepspeech2/model.py | 68 +++++++++++++++++------- deepspeech/models/ds2/deepspeech2.py | 2 +- deepspeech/models/ds2_online/__init__.py | 18 +++++-- tests/deepspeech2_model_test.py | 6 ++- 4 files changed, 67 insertions(+), 27 deletions(-) diff --git a/deepspeech/exps/deepspeech2/model.py b/deepspeech/exps/deepspeech2/model.py index fab94ced8..9e870b13e 100644 --- a/deepspeech/exps/deepspeech2/model.py +++ b/deepspeech/exps/deepspeech2/model.py @@ -29,8 +29,8 @@ from deepspeech.io.sampler import SortagradBatchSampler from deepspeech.io.sampler import SortagradDistributedBatchSampler from deepspeech.models.ds2 import DeepSpeech2InferModel from deepspeech.models.ds2 import DeepSpeech2Model -#from deepspeech.models.ds2_online import DeepSpeech2InferModelOnline -#from deepspeech.models.ds2_online import DeepSpeech2ModelOnline +from deepspeech.models.ds2_online import DeepSpeech2InferModelOnline +from deepspeech.models.ds2_online import DeepSpeech2ModelOnline from deepspeech.training.gradclip import ClipGradByGlobalNormWithLog from deepspeech.training.trainer import Trainer from deepspeech.utils import error_rate @@ -122,14 +122,26 @@ class DeepSpeech2Trainer(Trainer): def setup_model(self): config = self.config - model = DeepSpeech2Model( - feat_size=self.train_loader.collate_fn.feature_size, - dict_size=self.train_loader.collate_fn.vocab_size, - num_conv_layers=config.model.num_conv_layers, - num_rnn_layers=config.model.num_rnn_layers, - rnn_size=config.model.rnn_layer_size, - use_gru=config.model.use_gru, - share_rnn_weights=config.model.share_rnn_weights) + if config.model.apply_online == True: + model = DeepSpeech2Model( + feat_size=self.train_loader.collate_fn.feature_size, + dict_size=self.train_loader.collate_fn.vocab_size, + num_conv_layers=config.model.num_conv_layers, + num_rnn_layers=config.model.num_rnn_layers, + rnn_size=config.model.rnn_layer_size, + use_gru=config.model.use_gru, + share_rnn_weights=config.model.share_rnn_weights) + else: + model = DeepSpeech2ModelOnline( + feat_size=self.train_loader.collate_fn.feature_size, + dict_size=self.train_loader.collate_fn.vocab_size, + num_conv_layers=config.model.num_conv_layers, + num_rnn_layers=config.model.num_rnn_layers, + num_fc_layers=config.model.num_fc_layers, + fc_layers_size_list=config.model.fc_layers_size_list, + rnn_size=config.model.rnn_layer_size, + use_gru=config.model.use_gru, + share_rnn_weights=config.model.share_rnn_weights) if self.parallel: model = paddle.DataParallel(model) @@ -331,8 +343,12 @@ class DeepSpeech2Tester(DeepSpeech2Trainer): exit(-1) def export(self): - infer_model = DeepSpeech2InferModel.from_pretrained( - self.test_loader, self.config, self.args.checkpoint_path) + if self.config.model.apply_online == True: + infer_model = DeepSpeech2InferModelOnline.from_pretrained( + self.test_loader, self.config, self.args.checkpoint_path) + else: + infer_model = DeepSpeech2InferModel.from_pretrained( + self.test_loader, self.config, self.args.checkpoint_path) infer_model.eval() feat_dim = self.test_loader.collate_fn.feature_size @@ -370,14 +386,26 @@ class DeepSpeech2Tester(DeepSpeech2Trainer): def setup_model(self): config = self.config - model = DeepSpeech2Model( - feat_size=self.test_loader.collate_fn.feature_size, - dict_size=self.test_loader.collate_fn.vocab_size, - num_conv_layers=config.model.num_conv_layers, - num_rnn_layers=config.model.num_rnn_layers, - rnn_size=config.model.rnn_layer_size, - use_gru=config.model.use_gru, - share_rnn_weights=config.model.share_rnn_weights) + if config.model.apply_online == True: + model = DeepSpeech2Model( + feat_size=self.test_loader.collate_fn.feature_size, + dict_size=self.test_loader.collate_fn.vocab_size, + num_conv_layers=config.model.num_conv_layers, + num_rnn_layers=config.model.num_rnn_layers, + rnn_size=config.model.rnn_layer_size, + use_gru=config.model.use_gru, + share_rnn_weights=config.model.share_rnn_weights) + else: + model = DeepSpeech2ModelOnline( + feat_size=self.train_loader.collate_fn.feature_size, + dict_size=self.train_loader.collate_fn.vocab_size, + num_conv_layers=config.model.num_conv_layers, + num_rnn_layers=config.model.num_rnn_layers, + num_fc_layers=config.model.num_fc_layers, + fc_layers_size_list=config.model.fc_layers_size_list, + rnn_size=config.model.rnn_layer_size, + use_gru=config.model.use_gru, + share_rnn_weights=config.model.share_rnn_weights) self.model = model logger.info("Setup model!") diff --git a/deepspeech/models/ds2/deepspeech2.py b/deepspeech/models/ds2/deepspeech2.py index 4026c89a7..8d737e800 100644 --- a/deepspeech/models/ds2/deepspeech2.py +++ b/deepspeech/models/ds2/deepspeech2.py @@ -19,8 +19,8 @@ from paddle import nn from yacs.config import CfgNode from deepspeech.models.ds2.conv import ConvStack -from deepspeech.modules.ctc import CTCDecoder from deepspeech.models.ds2.rnn import RNNStack +from deepspeech.modules.ctc import CTCDecoder from deepspeech.utils import layer_tools from deepspeech.utils.checkpoint import Checkpoint from deepspeech.utils.log import Log diff --git a/deepspeech/models/ds2_online/__init__.py b/deepspeech/models/ds2_online/__init__.py index 88076667c..255000eeb 100644 --- a/deepspeech/models/ds2_online/__init__.py +++ b/deepspeech/models/ds2_online/__init__.py @@ -1,7 +1,17 @@ -from .deepspeech2 import DeepSpeech2ModelOnline +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. from .deepspeech2 import DeepSpeech2InferModelOnline +from .deepspeech2 import DeepSpeech2ModelOnline __all__ = ['DeepSpeech2ModelOnline', 'DeepSpeech2InferModelOnline'] - - - diff --git a/tests/deepspeech2_model_test.py b/tests/deepspeech2_model_test.py index bb40802d7..1938f7147 100644 --- a/tests/deepspeech2_model_test.py +++ b/tests/deepspeech2_model_test.py @@ -16,8 +16,10 @@ import unittest import numpy as np import paddle -#from deepspeech.models.deepspeech2 import DeepSpeech2Model -from deepspeech.models.ds2_online import DeepSpeech2ModelOnline as DeepSpeech2Model +from deepspeech.models.deepspeech2 import DeepSpeech2Model + +from deepspeech.models.ds2_online import DeepSpeech2ModelOnline + class TestDeepSpeech2Model(unittest.TestCase): def setUp(self): From f87eb88f790e16bbe81fbcac8d073bc683d0d050 Mon Sep 17 00:00:00 2001 From: huangyuxin Date: Fri, 30 Jul 2021 05:53:13 +0000 Subject: [PATCH 2/2] complete the pipline of tiny --- deepspeech/exps/deepspeech2/bin/export.py | 4 ++- deepspeech/exps/deepspeech2/bin/test.py | 3 +- deepspeech/exps/deepspeech2/bin/train.py | 3 +- deepspeech/exps/deepspeech2/config.py | 24 +++++++++---- deepspeech/exps/deepspeech2/model.py | 24 ++++++++----- examples/tiny/s0/local/export.sh | 7 ++-- examples/tiny/s0/local/test.sh | 6 ++-- examples/tiny/s0/local/train.sh | 6 ++-- examples/tiny/s0/run.sh | 7 ++-- examples/tiny/s0/run_online.sh | 41 +++++++++++++++++++++++ 10 files changed, 96 insertions(+), 29 deletions(-) create mode 100755 examples/tiny/s0/run_online.sh diff --git a/deepspeech/exps/deepspeech2/bin/export.py b/deepspeech/exps/deepspeech2/bin/export.py index a1607d583..9ae045c48 100644 --- a/deepspeech/exps/deepspeech2/bin/export.py +++ b/deepspeech/exps/deepspeech2/bin/export.py @@ -30,11 +30,13 @@ def main(config, args): if __name__ == "__main__": parser = default_argument_parser() + parser.add_argument("--model_type") args = parser.parse_args() + print_arguments(args) # https://yaml.org/type/float.html - config = get_cfg_defaults() + config = get_cfg_defaults(args.model_type) if args.config: config.merge_from_file(args.config) if args.opts: diff --git a/deepspeech/exps/deepspeech2/bin/test.py b/deepspeech/exps/deepspeech2/bin/test.py index f4edf08a8..49bca73d2 100644 --- a/deepspeech/exps/deepspeech2/bin/test.py +++ b/deepspeech/exps/deepspeech2/bin/test.py @@ -30,11 +30,12 @@ def main(config, args): if __name__ == "__main__": parser = default_argument_parser() + parser.add_argument("--model_type") args = parser.parse_args() print_arguments(args, globals()) # https://yaml.org/type/float.html - config = get_cfg_defaults() + config = get_cfg_defaults(args.model_type) if args.config: config.merge_from_file(args.config) if args.opts: diff --git a/deepspeech/exps/deepspeech2/bin/train.py b/deepspeech/exps/deepspeech2/bin/train.py index 5e5c1e2a4..253806af1 100644 --- a/deepspeech/exps/deepspeech2/bin/train.py +++ b/deepspeech/exps/deepspeech2/bin/train.py @@ -35,11 +35,12 @@ def main(config, args): if __name__ == "__main__": parser = default_argument_parser() + parser.add_argument("--model_type") args = parser.parse_args() print_arguments(args, globals()) # https://yaml.org/type/float.html - config = get_cfg_defaults() + config = get_cfg_defaults(args.model_type) if args.config: config.merge_from_file(args.config) if args.opts: diff --git a/deepspeech/exps/deepspeech2/config.py b/deepspeech/exps/deepspeech2/config.py index a851e1f72..4b3f724ff 100644 --- a/deepspeech/exps/deepspeech2/config.py +++ b/deepspeech/exps/deepspeech2/config.py @@ -18,21 +18,31 @@ from deepspeech.exps.deepspeech2.model import DeepSpeech2Trainer from deepspeech.io.collator import SpeechCollator from deepspeech.io.dataset import ManifestDataset from deepspeech.models.ds2 import DeepSpeech2Model +from deepspeech.models.ds2_online import DeepSpeech2ModelOnline -_C = CfgNode() -_C.data = ManifestDataset.params() +def get_cfg_defaults(model_type): + _C = CfgNode() + if (model_type == 'offline'): + _C.data = ManifestDataset.params() -_C.collator = SpeechCollator.params() + _C.collator = SpeechCollator.params() -_C.model = DeepSpeech2Model.params() + _C.model = DeepSpeech2Model.params() -_C.training = DeepSpeech2Trainer.params() + _C.training = DeepSpeech2Trainer.params() -_C.decoding = DeepSpeech2Tester.params() + _C.decoding = DeepSpeech2Tester.params() + else: + _C.data = ManifestDataset.params() + _C.collator = SpeechCollator.params() -def get_cfg_defaults(): + _C.model = DeepSpeech2ModelOnline.params() + + _C.training = DeepSpeech2Trainer.params() + + _C.decoding = DeepSpeech2Tester.params() """Get a yacs CfgNode object with default values for my_project.""" # Return a clone so that the defaults will not be altered # This is for the "local variable" use pattern diff --git a/deepspeech/exps/deepspeech2/model.py b/deepspeech/exps/deepspeech2/model.py index 9e870b13e..c654dc011 100644 --- a/deepspeech/exps/deepspeech2/model.py +++ b/deepspeech/exps/deepspeech2/model.py @@ -122,7 +122,7 @@ class DeepSpeech2Trainer(Trainer): def setup_model(self): config = self.config - if config.model.apply_online == True: + if self.args.model_type == 'offline': model = DeepSpeech2Model( feat_size=self.train_loader.collate_fn.feature_size, dict_size=self.train_loader.collate_fn.vocab_size, @@ -131,7 +131,7 @@ class DeepSpeech2Trainer(Trainer): rnn_size=config.model.rnn_layer_size, use_gru=config.model.use_gru, share_rnn_weights=config.model.share_rnn_weights) - else: + elif self.args.model_type == 'online': model = DeepSpeech2ModelOnline( feat_size=self.train_loader.collate_fn.feature_size, dict_size=self.train_loader.collate_fn.vocab_size, @@ -142,6 +142,8 @@ class DeepSpeech2Trainer(Trainer): rnn_size=config.model.rnn_layer_size, use_gru=config.model.use_gru, share_rnn_weights=config.model.share_rnn_weights) + else: + raise Exception("wrong model type") if self.parallel: model = paddle.DataParallel(model) @@ -343,12 +345,14 @@ class DeepSpeech2Tester(DeepSpeech2Trainer): exit(-1) def export(self): - if self.config.model.apply_online == True: + if self.args.model_type == 'offline': + infer_model = DeepSpeech2InferModel.from_pretrained( + self.test_loader, self.config, self.args.checkpoint_path) + elif self.args.model_type == 'online': infer_model = DeepSpeech2InferModelOnline.from_pretrained( self.test_loader, self.config, self.args.checkpoint_path) else: - infer_model = DeepSpeech2InferModel.from_pretrained( - self.test_loader, self.config, self.args.checkpoint_path) + raise Exception("wrong model tyep") infer_model.eval() feat_dim = self.test_loader.collate_fn.feature_size @@ -386,7 +390,7 @@ class DeepSpeech2Tester(DeepSpeech2Trainer): def setup_model(self): config = self.config - if config.model.apply_online == True: + if self.args.model_type == 'offline': model = DeepSpeech2Model( feat_size=self.test_loader.collate_fn.feature_size, dict_size=self.test_loader.collate_fn.vocab_size, @@ -395,10 +399,10 @@ class DeepSpeech2Tester(DeepSpeech2Trainer): rnn_size=config.model.rnn_layer_size, use_gru=config.model.use_gru, share_rnn_weights=config.model.share_rnn_weights) - else: + elif self.args.model_type == 'online': model = DeepSpeech2ModelOnline( - feat_size=self.train_loader.collate_fn.feature_size, - dict_size=self.train_loader.collate_fn.vocab_size, + feat_size=self.test_loader.collate_fn.feature_size, + dict_size=self.test_loader.collate_fn.vocab_size, num_conv_layers=config.model.num_conv_layers, num_rnn_layers=config.model.num_rnn_layers, num_fc_layers=config.model.num_fc_layers, @@ -406,6 +410,8 @@ class DeepSpeech2Tester(DeepSpeech2Trainer): rnn_size=config.model.rnn_layer_size, use_gru=config.model.use_gru, share_rnn_weights=config.model.share_rnn_weights) + else: + raise Exception("Wrong model type") self.model = model logger.info("Setup model!") diff --git a/examples/tiny/s0/local/export.sh b/examples/tiny/s0/local/export.sh index f99a15bad..6955239c7 100755 --- a/examples/tiny/s0/local/export.sh +++ b/examples/tiny/s0/local/export.sh @@ -1,6 +1,6 @@ #!/bin/bash -if [ $# != 3 ];then +if [ $# != 4 ];then echo "usage: $0 config_path ckpt_prefix jit_model_path" exit -1 fi @@ -11,6 +11,7 @@ echo "using $ngpu gpus..." config_path=$1 ckpt_path_prefix=$2 jit_model_export_path=$3 +model_type=$4 device=gpu if [ ${ngpu} == 0 ];then @@ -22,8 +23,8 @@ python3 -u ${BIN_DIR}/export.py \ --nproc ${ngpu} \ --config ${config_path} \ --checkpoint_path ${ckpt_path_prefix} \ ---export_path ${jit_model_export_path} - +--export_path ${jit_model_export_path} \ +--model_type ${model_type} if [ $? -ne 0 ]; then echo "Failed in export!" diff --git a/examples/tiny/s0/local/test.sh b/examples/tiny/s0/local/test.sh index 16a5e9ef0..2f74491a1 100755 --- a/examples/tiny/s0/local/test.sh +++ b/examples/tiny/s0/local/test.sh @@ -1,6 +1,6 @@ #!/bin/bash -if [ $# != 2 ];then +if [ $# != 3 ];then echo "usage: ${0} config_path ckpt_path_prefix" exit -1 fi @@ -14,6 +14,7 @@ if [ ${ngpu} == 0 ];then fi config_path=$1 ckpt_prefix=$2 +model_type=$3 # download language model bash local/download_lm_en.sh @@ -26,7 +27,8 @@ python3 -u ${BIN_DIR}/test.py \ --nproc 1 \ --config ${config_path} \ --result_file ${ckpt_prefix}.rsl \ ---checkpoint_path ${ckpt_prefix} +--checkpoint_path ${ckpt_prefix} \ +--model_type ${model_type} if [ $? -ne 0 ]; then echo "Failed in evaluation!" diff --git a/examples/tiny/s0/local/train.sh b/examples/tiny/s0/local/train.sh index f6bd2c983..1d49dcd1d 100755 --- a/examples/tiny/s0/local/train.sh +++ b/examples/tiny/s0/local/train.sh @@ -1,6 +1,6 @@ #!/bin/bash -if [ $# != 2 ];then +if [ $# != 3 ];then echo "usage: CUDA_VISIBLE_DEVICES=0 ${0} config_path ckpt_name" exit -1 fi @@ -10,6 +10,7 @@ echo "using $ngpu gpus..." config_path=$1 ckpt_name=$2 +model_type=$3 device=gpu if [ ${ngpu} == 0 ];then @@ -22,7 +23,8 @@ python3 -u ${BIN_DIR}/train.py \ --device ${device} \ --nproc ${ngpu} \ --config ${config_path} \ ---output exp/${ckpt_name} +--output exp/${ckpt_name} \ +--model_type ${model_type} if [ $? -ne 0 ]; then echo "Failed in training!" diff --git a/examples/tiny/s0/run.sh b/examples/tiny/s0/run.sh index d7e153e8d..a4506e4c5 100755 --- a/examples/tiny/s0/run.sh +++ b/examples/tiny/s0/run.sh @@ -7,6 +7,7 @@ stage=0 stop_stage=100 conf_path=conf/deepspeech2.yaml avg_num=1 +model_type=online source ${MAIN_ROOT}/utils/parse_options.sh || exit 1; @@ -21,7 +22,7 @@ fi if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then # train model, all `ckpt` under `exp` dir - CUDA_VISIBLE_DEVICES=${gpus} ./local/train.sh ${conf_path} ${ckpt} + CUDA_VISIBLE_DEVICES=${gpus} ./local/train.sh ${conf_path} ${ckpt} ${model_type} fi if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then @@ -31,10 +32,10 @@ fi if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then # test ckpt avg_n - CUDA_VISIBLE_DEVICES=${gpus} ./local/test.sh ${conf_path} exp/${ckpt}/checkpoints/${avg_ckpt} || exit -1 + CUDA_VISIBLE_DEVICES=${gpus} ./local/test.sh ${conf_path} exp/${ckpt}/checkpoints/${avg_ckpt} ${model_type} || exit -1 fi if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ]; then # export ckpt avg_n - CUDA_VISIBLE_DEVICES=${gpus} ./local/export.sh ${conf_path} exp/${ckpt}/checkpoints/${avg_ckpt} exp/${ckpt}/checkpoints/${avg_ckpt}.jit + CUDA_VISIBLE_DEVICES=${gpus} ./local/export.sh ${conf_path} exp/${ckpt}/checkpoints/${avg_ckpt} exp/${ckpt}/checkpoints/${avg_ckpt}.jit ${model_type} fi diff --git a/examples/tiny/s0/run_online.sh b/examples/tiny/s0/run_online.sh new file mode 100755 index 000000000..4c3602045 --- /dev/null +++ b/examples/tiny/s0/run_online.sh @@ -0,0 +1,41 @@ +#!/bin/bash +set -e +source path.sh + +gpus=7 +stage=1 +stop_stage=100 +conf_path=conf/deepspeech2.yaml +avg_num=1 +model_type=online + +source ${MAIN_ROOT}/utils/parse_options.sh || exit 1; + +avg_ckpt=avg_${avg_num} +ckpt=$(basename ${conf_path} | awk -F'.' '{print $1}') ###ckpt = deepspeech2 +echo "checkpoint name ${ckpt}" + +if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then + # prepare data + bash ./local/data.sh || exit -1 +fi + +if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then + # train model, all `ckpt` under `exp` dir + CUDA_VISIBLE_DEVICES=${gpus} ./local/train.sh ${conf_path} ${ckpt} ${model_type} +fi + +if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then + # avg n best model + avg.sh exp/${ckpt}/checkpoints ${avg_num} +fi + +if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then + # test ckpt avg_n + CUDA_VISIBLE_DEVICES=${gpus} ./local/test.sh ${conf_path} exp/${ckpt}/checkpoints/${avg_ckpt} ${model_type} || exit -1 +fi + +if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ]; then + # export ckpt avg_n + CUDA_VISIBLE_DEVICES=${gpus} ./local/export.sh ${conf_path} exp/${ckpt}/checkpoints/${avg_ckpt} exp/${ckpt}/checkpoints/${avg_ckpt}.jit ${model_type} +fi