From cda6ca8323935038efc51e911253cb12b24c923a Mon Sep 17 00:00:00 2001 From: Hui Zhang Date: Thu, 16 Sep 2021 12:16:13 +0000 Subject: [PATCH 01/10] add benchmark flags, and logic --- deepspeech/exps/u2/model.py | 3 ++- deepspeech/training/cli.py | 40 ++++++++++++------------------ deepspeech/training/trainer.py | 14 +++++++++++ deepspeech/utils/utility.py | 14 ++++++++++- examples/aishell/s1/local/train.sh | 10 +++++--- examples/tiny/s1/local/train.sh | 40 +++++++++++++++++++----------- tests/benchmark/run_all.sh | 29 +++++++++++++--------- tests/benchmark/run_benchmark.sh | 21 ++++++++-------- 8 files changed, 106 insertions(+), 65 deletions(-) mode change 100644 => 100755 tests/benchmark/run_all.sh mode change 100644 => 100755 tests/benchmark/run_benchmark.sh diff --git a/deepspeech/exps/u2/model.py b/deepspeech/exps/u2/model.py index 1328a1cb..0d17d9fd 100644 --- a/deepspeech/exps/u2/model.py +++ b/deepspeech/exps/u2/model.py @@ -100,7 +100,8 @@ class U2Trainer(Trainer): # Disable gradient synchronizations across DDP processes. # Within this context, gradients will be accumulated on module # variables, which will later be synchronized. - context = self.model.no_sync + # When using cpu w/o DDP, model does not have `no_sync` + context = self.model.no_sync if self.parallel else nullcontext else: # Used for single gpu training and DDP gradient synchronization # processes. diff --git a/deepspeech/training/cli.py b/deepspeech/training/cli.py index 1477bdfe..d8719b3a 100644 --- a/deepspeech/training/cli.py +++ b/deepspeech/training/cli.py @@ -44,32 +44,24 @@ def default_argument_parser(): parser = argparse.ArgumentParser() # yapf: disable - # data and output - parser.add_argument("--config", metavar="FILE", help="path of the config file to overwrite to default config with.") - parser.add_argument("--dump-config", metavar="FILE", help="dump config to yaml file.") - parser.add_argument("--output", metavar="OUTPUT_DIR", help="path to save checkpoint and logs.") - - # load from saved checkpoint - parser.add_argument("--checkpoint_path", type=str, help="path of the checkpoint to load") - - # running - parser.add_argument("--device", type=str, default='gpu', choices=["cpu", "gpu"], - help="device type to use, cpu and gpu are supported.") - parser.add_argument("--nprocs", type=int, default=1, help="number of parallel processes to use.") - - # overwrite extra config and default config - # parser.add_argument("--opts", nargs=argparse.REMAINDER, - # help="options to overwrite --config file and the default config, passing in KEY VALUE pairs") - parser.add_argument("--opts", type=str, default=[], nargs='+', - help="options to overwrite --config file and the default config, passing in KEY VALUE pairs") - - # random seed - parser.add_argument("--seed", type=int, default=None, + train_group = parser.add_argument_group(title='Train Options', description=None) + train_group.add_argument("--seed", type=int, default=None, help="seed to use for paddle, np and random. None or 0 for random, else set seed.") - - # profiler - parser.add_argument('--profiler_options', type=str, default=None, + train_group.add_argument("--device", type=str, default='gpu', choices=["cpu", "gpu"], + help="device cpu and gpu are supported.") + train_group.add_argument("--nprocs", type=int, default=1, help="number of parallel processes. 0 for cpu.") + train_group.add_argument("--config", metavar="CONFIG_FILE", help="config file.") + train_group.add_argument("--output", metavar="CKPT_DIR", help="path to save checkpoint.") + train_group.add_argument("--checkpoint_path", type=str, help="path to load checkpoint") + train_group.add_argument("--opts", type=str, default=[], nargs='+', + help="overwrite --config file, passing in LIST[KEY VALUE] pairs") + train_group.add_argument("--dump-config", metavar="FILE", help="dump config to `this` file.") + + bech_group = parser.add_argument_group(title='Benchmark Options', description=None) + bech_group.add_argument('--profiler-options', type=str, default=None, help='The option of profiler, which should be in format \"key1=value1;key2=value2;key3=value3\".') + bech_group.add_argument('--benchmark-batch-size', type=int, default=None, help='batch size for benchmark.') + bech_group.add_argument('--benchmark-max-step', type=int, default=None, help='max iteration for benchmark.') # yapd: enable return parser diff --git a/deepspeech/training/trainer.py b/deepspeech/training/trainer.py index 6587f129..9549a4dd 100644 --- a/deepspeech/training/trainer.py +++ b/deepspeech/training/trainer.py @@ -11,6 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +import sys import time from pathlib import Path @@ -24,6 +25,7 @@ from deepspeech.utils import profiler from deepspeech.utils.checkpoint import Checkpoint from deepspeech.utils.log import Log from deepspeech.utils.utility import seed_all +from deepspeech.utils.utility import UpdateConfig __all__ = ["Trainer"] @@ -101,6 +103,12 @@ class Trainer(): seed_all(args.seed) logger.info(f"Set seed {args.seed}") + if self.args.benchmark_batch_size: + with UpdateConfig(self.config): + self.config.collator.batch_size = self.args.benchmark_batch_size + logger.info( + f"Benchmark reset batch-size: {self.args.benchmark_batch_size}") + def setup(self): """Setup the experiment. """ @@ -188,6 +196,12 @@ class Trainer(): if self.args.profiler_options: profiler.add_profiler_step(self.args.profiler_options) + if self.args.benchmark_max_step and self.iteration > self.args.benchmark_max_step: + logger.info( + f"Reach benchmark-max-step: {self.args.benchmark_max_step}") + sys.exit( + f"Reach benchmark-max-step: {self.args.benchmark_max_step}") + def train(self): """The training process control by epoch.""" from_scratch = self.resume_or_scratch() diff --git a/deepspeech/utils/utility.py b/deepspeech/utils/utility.py index e18fc1f7..6f84c41b 100644 --- a/deepspeech/utils/utility.py +++ b/deepspeech/utils/utility.py @@ -16,15 +16,27 @@ import distutils.util import math import os import random +from contextlib import contextmanager from typing import List import numpy as np import paddle -__all__ = ["seed_all", 'print_arguments', 'add_arguments', "log_add"] +__all__ = [ + "UpdateConfig", "seed_all", 'print_arguments', 'add_arguments', "log_add" +] + + +@contextmanager +def UpdateConfig(config): + """Update yacs config""" + config.defrost() + yield + config.freeze() def seed_all(seed: int=210329): + """freeze random generator seed.""" np.random.seed(seed) random.seed(seed) paddle.seed(seed) diff --git a/examples/aishell/s1/local/train.sh b/examples/aishell/s1/local/train.sh index e065ad6a..5b9c45f5 100755 --- a/examples/aishell/s1/local/train.sh +++ b/examples/aishell/s1/local/train.sh @@ -1,7 +1,8 @@ #!/bin/bash - profiler_options= +benchmark_batch_size= +benchmark_max_step= # seed may break model convergence seed=0 @@ -32,12 +33,15 @@ ckpt_name=$2 mkdir -p exp python3 -u ${BIN_DIR}/train.py \ +--seed ${seed} \ --device ${device} \ --nproc ${ngpu} \ --config ${config_path} \ --output exp/${ckpt_name} \ ---profiler_options ${profiler_options} \ ---seed ${seed} +--profiler-options "${profiler-options}" \ +--benchmark-batch-size ${benchmark_batch_size} \ +--benchmark-max-step ${benchmark_max_step} + if [ ${seed} != 0 ]; then unset FLAGS_cudnn_deterministic diff --git a/examples/tiny/s1/local/train.sh b/examples/tiny/s1/local/train.sh index 374608fd..56ceab41 100755 --- a/examples/tiny/s1/local/train.sh +++ b/examples/tiny/s1/local/train.sh @@ -1,37 +1,49 @@ #!/bin/bash -if [ $# != 2 ];then - echo "usage: CUDA_VISIBLE_DEVICES=0 ${0} config_path ckpt_name" - exit -1 -fi +profiler_options= +benchmark_batch_size= +benchmark_max_step= + +# seed may break model convergence +seed=0 + +source ${MAIN_ROOT}/utils/parse_options.sh || exit 1; ngpu=$(echo $CUDA_VISIBLE_DEVICES | awk -F "," '{print NF}') echo "using $ngpu gpus..." -config_path=$1 -ckpt_name=$2 - device=gpu if [ ${ngpu} == 0 ];then device=cpu fi -mkdir -p exp - -# seed may break model convergence -seed=0 -if [ ${seed} != 0 ]; then +if [ ${seed} != 0 ]; then export FLAGS_cudnn_deterministic=True + echo "using seed $seed & FLAGS_cudnn_deterministic=True ..." +fi + +if [ $# != 2 ];then + echo "usage: CUDA_VISIBLE_DEVICES=0 ${0} config_path ckpt_name" + exit -1 fi +config_path=$1 +ckpt_name=$2 + +mkdir -p exp + python3 -u ${BIN_DIR}/train.py \ +--seed ${seed} \ --device ${device} \ --nproc ${ngpu} \ --config ${config_path} \ --output exp/${ckpt_name} \ ---seed ${seed} +--profiler-options "${profiler_options}" \ +--benchmark-batch-size ${benchmark_batch_size} \ +--benchmark-max-step ${benchmark_max_step} + -if [ ${seed} != 0 ]; then +if [ ${seed} != 0 ]; then unset FLAGS_cudnn_deterministic fi diff --git a/tests/benchmark/run_all.sh b/tests/benchmark/run_all.sh old mode 100644 new mode 100755 index 7aa11d0f..6f707cdc --- a/tests/benchmark/run_all.sh +++ b/tests/benchmark/run_all.sh @@ -1,41 +1,46 @@ #!/bin/bash +CUR_DIR=${PWD} ROOT_DIR=../../ # 提供可稳定复现性能的脚本,默认在标准docker环境内py37执行: # collect env info bash ${ROOT_DIR}/utils/pd_env_collect.sh -cat pd_env.txt +#cat pd_env.txt -# 执行目录:需说明 -pushd ${ROOT_DIR}/examples/aishell/s1 # 1 安装该模型需要的依赖 (如需开启优化策略请注明) -pushd ${ROOT_DIR}/tools; make; popd -source ${ROOT_DIR}/tools/venv/bin/activate -pushd ${ROOT_DIR}; bash setup.sh; popd +#pushd ${ROOT_DIR}/tools; make; popd +#source ${ROOT_DIR}/tools/venv/bin/activate +#pushd ${ROOT_DIR}; bash setup.sh; popd # 2 拷贝该模型需要数据、预训练模型 + +# 执行目录:需说明 +#pushd ${ROOT_DIR}/examples/aishell/s1 +pushd ${ROOT_DIR}/examples/tiny/s1 + mkdir -p exp/log -loca/data.sh &> exp/log/data.log +. path.sh +#bash local/data.sh &> exp/log/data.log # 3 批量运行(如不方便批量,1,2需放到单个模型中) -model_mode_list=(conformer) +model_mode_list=(conformer transformer) fp_item_list=(fp32) -bs_item=(32 64 96) +bs_item_list=(32 64 96) for model_mode in ${model_mode_list[@]}; do for fp_item in ${fp_item_list[@]}; do - for bs_item in ${bs_list[@]} + for bs_item in ${bs_item_list[@]} do echo "index is speed, 1gpus, begin, ${model_name}" run_mode=sp - CUDA_VISIBLE_DEVICES=0 bash run_benchmark.sh ${run_mode} ${bs_item} ${fp_item} 500 ${model_mode} # (5min) + CUDA_VISIBLE_DEVICES=0 bash ${CUR_DIR}/run_benchmark.sh ${run_mode} ${bs_item} ${fp_item} 500 ${model_mode} # (5min) sleep 60 echo "index is speed, 8gpus, run_mode is multi_process, begin, ${model_name}" run_mode=mp - CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 bash run_benchmark.sh ${run_mode} ${bs_item} ${fp_item} 500 ${model_mode} + CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 bash ${CUR_DIR}/run_benchmark.sh ${run_mode} ${bs_item} ${fp_item} 500 ${model_mode} sleep 60 done done diff --git a/tests/benchmark/run_benchmark.sh b/tests/benchmark/run_benchmark.sh old mode 100644 new mode 100755 index 625d3616..eb111793 --- a/tests/benchmark/run_benchmark.sh +++ b/tests/benchmark/run_benchmark.sh @@ -23,19 +23,19 @@ function _train(){ echo "Train on ${num_gpu_devices} GPUs" echo "current CUDA_VISIBLE_DEVICES=$CUDA_VISIBLE_DEVICES, gpus=$num_gpu_devices, batch_size=$batch_size" - train_cmd="--model_name=${model_name} - --batch_size=${batch_size} - --fp=${fp_item} \ - --max_iter=${max_iter} " + train_cmd="--benchmark-batch-size ${batch_size} + --benchmark-max-step ${max_iter} + conf/${model_name}.yaml ${model_name}" + case ${run_mode} in - sp) train_cmd="python -u tools/train.py "${train_cmd}" ;; + sp) train_cmd="bash local/train.sh "${train_cmd}"" ;; mp) - train_cmd="python -m paddle.distributed.launch --log_dir=./mylog --gpus=$CUDA_VISIBLE_DEVICES tools/train.py "${train_cmd}" - log_parse_file="mylog/workerlog.0" ;; + train_cmd="bash local/train.sh "${train_cmd}"" ;; *) echo "choose run_mode(sp or mp)"; exit 1; esac -# 以下不用修改 - timeout 15m ${train_cmd} > ${log_file} 2>&1 + + # 以下不用修改 + CUDA_VISIBLE_DEVICES=${device} timeout 15m ${train_cmd} > ${log_file} 2>&1 if [ $? -ne 0 ];then echo -e "${model_name}, FAIL" export job_fail_flag=1 @@ -43,7 +43,8 @@ function _train(){ echo -e "${model_name}, SUCCESS" export job_fail_flag=0 fi - kill -9 `ps -ef|grep 'python'|awk '{print $2}'` + + trap 'for pid in $(jobs -pr); do kill -KILL $pid; done' INT QUIT TERM if [ $run_mode = "mp" -a -d mylog ]; then rm ${log_file} From d7a33b9d5e755ad43ebef2f4d257c3832de3d823 Mon Sep 17 00:00:00 2001 From: Hui Zhang Date: Thu, 16 Sep 2021 12:35:41 +0000 Subject: [PATCH 02/10] update config with contextlib --- deepspeech/exps/deepspeech2/bin/tune.py | 191 ------------------------ deepspeech/exps/deepspeech2/model.py | 8 +- deepspeech/exps/u2/model.py | 10 +- deepspeech/exps/u2/trainer.py | 9 +- deepspeech/exps/u2_kaldi/model.py | 9 +- deepspeech/exps/u2_st/model.py | 9 +- deepspeech/models/u2/u2.py | 9 +- deepspeech/models/u2_st.py | 9 +- 8 files changed, 35 insertions(+), 219 deletions(-) delete mode 100644 deepspeech/exps/deepspeech2/bin/tune.py diff --git a/deepspeech/exps/deepspeech2/bin/tune.py b/deepspeech/exps/deepspeech2/bin/tune.py deleted file mode 100644 index 94a9b6c4..00000000 --- a/deepspeech/exps/deepspeech2/bin/tune.py +++ /dev/null @@ -1,191 +0,0 @@ -# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""Beam search parameters tuning for DeepSpeech2 model.""" -import functools -import sys - -import numpy as np -from paddle.io import DataLoader - -from deepspeech.exps.deepspeech2.config import get_cfg_defaults -from deepspeech.io.collator import SpeechCollator -from deepspeech.io.dataset import ManifestDataset -from deepspeech.models.ds2 import DeepSpeech2Model -from deepspeech.training.cli import default_argument_parser -from deepspeech.utils import error_rate -from deepspeech.utils.utility import add_arguments -from deepspeech.utils.utility import print_arguments - - -def tune(config, args): - """Tune parameters alpha and beta incrementally.""" - if not args.num_alphas >= 0: - raise ValueError("num_alphas must be non-negative!") - if not args.num_betas >= 0: - raise ValueError("num_betas must be non-negative!") - config.defrost() - config.data.manfiest = config.data.dev_manifest - config.data.augmentation_config = "" - config.data.keep_transcription_text = True - dev_dataset = ManifestDataset.from_config(config) - - valid_loader = DataLoader( - dev_dataset, - batch_size=config.data.batch_size, - shuffle=False, - drop_last=False, - collate_fn=SpeechCollator(keep_transcription_text=True)) - - model = DeepSpeech2Model.from_pretrained(valid_loader, config, - args.checkpoint_path) - model.eval() - - # decoders only accept string encoded in utf-8 - vocab_list = valid_loader.dataset.vocab_list - errors_func = error_rate.char_errors if config.decoding.error_rate_type == 'cer' else error_rate.word_errors - - # create grid for search - cand_alphas = np.linspace(args.alpha_from, args.alpha_to, args.num_alphas) - cand_betas = np.linspace(args.beta_from, args.beta_to, args.num_betas) - params_grid = [(alpha, beta) for alpha in cand_alphas - for beta in cand_betas] - - err_sum = [0.0 for i in range(len(params_grid))] - err_ave = [0.0 for i in range(len(params_grid))] - - num_ins, len_refs, cur_batch = 0, 0, 0 - # initialize external scorer - model.decoder.init_decode(args.alpha_from, args.beta_from, - config.decoding.lang_model_path, vocab_list, - config.decoding.decoding_method) - ## incremental tuning parameters over multiple batches - print("start tuning ...") - for infer_data in valid_loader(): - if (args.num_batches >= 0) and (cur_batch >= args.num_batches): - break - - def ordid2token(texts, texts_len): - """ ord() id to chr() chr """ - trans = [] - for text, n in zip(texts, texts_len): - n = n.numpy().item() - ids = text[:n] - trans.append(''.join([chr(i) for i in ids])) - return trans - - audio, audio_len, text, text_len = infer_data - target_transcripts = ordid2token(text, text_len) - num_ins += audio.shape[0] - - # model infer - eouts, eouts_len = model.encoder(audio, audio_len) - probs = model.decoder.softmax(eouts) - - # grid search - for index, (alpha, beta) in enumerate(params_grid): - print(f"tuneing: alpha={alpha} beta={beta}") - result_transcripts = model.decoder.decode_probs( - probs.numpy(), eouts_len, vocab_list, - config.decoding.decoding_method, - config.decoding.lang_model_path, alpha, beta, - config.decoding.beam_size, config.decoding.cutoff_prob, - config.decoding.cutoff_top_n, config.decoding.num_proc_bsearch) - - for target, result in zip(target_transcripts, result_transcripts): - errors, len_ref = errors_func(target, result) - err_sum[index] += errors - - # accumulate the length of references of every batchπ - # in the first iteration - if args.alpha_from == alpha and args.beta_from == beta: - len_refs += len_ref - - err_ave[index] = err_sum[index] / len_refs - if index % 2 == 0: - sys.stdout.write('.') - sys.stdout.flush() - print("tuneing: one grid done!") - - # output on-line tuning result at the end of current batch - err_ave_min = min(err_ave) - min_index = err_ave.index(err_ave_min) - print("\nBatch %d [%d/?], current opt (alpha, beta) = (%s, %s), " - " min [%s] = %f" % - (cur_batch, num_ins, "%.3f" % params_grid[min_index][0], - "%.3f" % params_grid[min_index][1], - config.decoding.error_rate_type, err_ave_min)) - cur_batch += 1 - - # output WER/CER at every (alpha, beta) - print("\nFinal %s:\n" % config.decoding.error_rate_type) - for index in range(len(params_grid)): - print("(alpha, beta) = (%s, %s), [%s] = %f" % - ("%.3f" % params_grid[index][0], "%.3f" % params_grid[index][1], - config.decoding.error_rate_type, err_ave[index])) - - err_ave_min = min(err_ave) - min_index = err_ave.index(err_ave_min) - print("\nFinish tuning on %d batches, final opt (alpha, beta) = (%s, %s)" % - (cur_batch, "%.3f" % params_grid[min_index][0], - "%.3f" % params_grid[min_index][1])) - - print("finish tuning") - - -def main(config, args): - tune(config, args) - - -if __name__ == "__main__": - parser = default_argument_parser() - add_arg = functools.partial(add_arguments, argparser=parser) - add_arg('num_batches', int, -1, "# of batches tuning on. " - "Default -1, on whole dev set.") - add_arg('num_alphas', int, 45, "# of alpha candidates for tuning.") - add_arg('num_betas', int, 8, "# of beta candidates for tuning.") - add_arg('alpha_from', float, 1.0, "Where alpha starts tuning from.") - add_arg('alpha_to', float, 3.2, "Where alpha ends tuning with.") - add_arg('beta_from', float, 0.1, "Where beta starts tuning from.") - add_arg('beta_to', float, 0.45, "Where beta ends tuning with.") - - add_arg('batch_size', int, 256, "# of samples per batch.") - add_arg('beam_size', int, 500, "Beam search width.") - add_arg('num_proc_bsearch', int, 8, "# of CPUs for beam search.") - add_arg('cutoff_prob', float, 1.0, "Cutoff probability for pruning.") - add_arg('cutoff_top_n', int, 40, "Cutoff number for pruning.") - - args = parser.parse_args() - print_arguments(args, globals()) - - # https://yaml.org/type/float.html - config = get_cfg_defaults() - if args.config: - config.merge_from_file(args.config) - if args.opts: - config.merge_from_list(args.opts) - - config.data.batch_size = args.batch_size - config.decoding.beam_size = args.beam_size - config.decoding.num_proc_bsearch = args.num_proc_bsearch - config.decoding.cutoff_prob = args.cutoff_prob - config.decoding.cutoff_top_n = args.cutoff_top_n - - config.freeze() - print(config) - - if args.dump_config: - with open(args.dump_config, 'w') as f: - print(config, file=f) - - main(config, args) diff --git a/deepspeech/exps/deepspeech2/model.py b/deepspeech/exps/deepspeech2/model.py index fbc357ca..df35c52c 100644 --- a/deepspeech/exps/deepspeech2/model.py +++ b/deepspeech/exps/deepspeech2/model.py @@ -41,6 +41,7 @@ from deepspeech.utils import layer_tools from deepspeech.utils import mp_tools from deepspeech.utils.log import Autolog from deepspeech.utils.log import Log +from deepspeech.utils.utility import UpdateConfig logger = Log(__name__).getlog() @@ -147,10 +148,9 @@ class DeepSpeech2Trainer(Trainer): def setup_model(self): config = self.config.clone() - config.defrost() - config.model.feat_size = self.train_loader.collate_fn.feature_size - config.model.dict_size = self.train_loader.collate_fn.vocab_size - config.freeze() + with UpdateConfig(config): + config.model.feat_size = self.train_loader.collate_fn.feature_size + config.model.dict_size = self.train_loader.collate_fn.vocab_size if self.args.model_type == 'offline': model = DeepSpeech2Model.from_config(config.model) diff --git a/deepspeech/exps/u2/model.py b/deepspeech/exps/u2/model.py index 0d17d9fd..89d443e0 100644 --- a/deepspeech/exps/u2/model.py +++ b/deepspeech/exps/u2/model.py @@ -43,6 +43,7 @@ from deepspeech.utils import mp_tools from deepspeech.utils import text_grid from deepspeech.utils import utility from deepspeech.utils.log import Log +from deepspeech.utils.utility import UpdateConfig logger = Log(__name__).getlog() @@ -315,10 +316,11 @@ class U2Trainer(Trainer): def setup_model(self): config = self.config model_conf = config.model - model_conf.defrost() - model_conf.input_dim = self.train_loader.collate_fn.feature_size - model_conf.output_dim = self.train_loader.collate_fn.vocab_size - model_conf.freeze() + + with UpdateConfig(model_conf): + model_conf.input_dim = self.train_loader.collate_fn.feature_size + model_conf.output_dim = self.train_loader.collate_fn.vocab_size + model = U2Model.from_config(model_conf) if self.parallel: diff --git a/deepspeech/exps/u2/trainer.py b/deepspeech/exps/u2/trainer.py index fa3e6d9d..8e8634ac 100644 --- a/deepspeech/exps/u2/trainer.py +++ b/deepspeech/exps/u2/trainer.py @@ -32,6 +32,7 @@ from deepspeech.training.trainer import Trainer from deepspeech.training.updaters.trainer import Trainer as NewTrainer from deepspeech.utils import layer_tools from deepspeech.utils.log import Log +from deepspeech.utils.utility import UpdateConfig logger = Log(__name__).getlog() @@ -121,10 +122,10 @@ class U2Trainer(Trainer): def setup_model(self): config = self.config model_conf = config.model - model_conf.defrost() - model_conf.input_dim = self.train_loader.collate_fn.feature_size - model_conf.output_dim = self.train_loader.collate_fn.vocab_size - model_conf.freeze() + with UpdateConfig(model_conf): + model_conf.input_dim = self.train_loader.collate_fn.feature_size + model_conf.output_dim = self.train_loader.collate_fn.vocab_size + model = U2Model.from_config(model_conf) if self.parallel: diff --git a/deepspeech/exps/u2_kaldi/model.py b/deepspeech/exps/u2_kaldi/model.py index 3d15e025..edcc3401 100644 --- a/deepspeech/exps/u2_kaldi/model.py +++ b/deepspeech/exps/u2_kaldi/model.py @@ -41,6 +41,7 @@ from deepspeech.utils import mp_tools from deepspeech.utils import text_grid from deepspeech.utils import utility from deepspeech.utils.log import Log +from deepspeech.utils.utility import UpdateConfig logger = Log(__name__).getlog() @@ -319,10 +320,10 @@ class U2Trainer(Trainer): # model model_conf = config.model - model_conf.defrost() - model_conf.input_dim = self.train_loader.feat_dim - model_conf.output_dim = self.train_loader.vocab_size - model_conf.freeze() + with UpdateConfig(model_conf): + model_conf.input_dim = self.train_loader.feat_dim + model_conf.output_dim = self.train_loader.vocab_size + model = U2Model.from_config(model_conf) if self.parallel: model = paddle.DataParallel(model) diff --git a/deepspeech/exps/u2_st/model.py b/deepspeech/exps/u2_st/model.py index 91a81503..0fa8ed73 100644 --- a/deepspeech/exps/u2_st/model.py +++ b/deepspeech/exps/u2_st/model.py @@ -47,6 +47,7 @@ from deepspeech.utils import mp_tools from deepspeech.utils import text_grid from deepspeech.utils import utility from deepspeech.utils.log import Log +from deepspeech.utils.utility import UpdateConfig logger = Log(__name__).getlog() @@ -345,10 +346,10 @@ class U2STTrainer(Trainer): def setup_model(self): config = self.config model_conf = config.model - model_conf.defrost() - model_conf.input_dim = self.train_loader.collate_fn.feature_size - model_conf.output_dim = self.train_loader.collate_fn.vocab_size - model_conf.freeze() + with UpdateConfig(model_conf): + model_conf.input_dim = self.train_loader.collate_fn.feature_size + model_conf.output_dim = self.train_loader.collate_fn.vocab_size + model = U2STModel.from_config(model_conf) if self.parallel: diff --git a/deepspeech/models/u2/u2.py b/deepspeech/models/u2/u2.py index fd8f1547..39ed9d5d 100644 --- a/deepspeech/models/u2/u2.py +++ b/deepspeech/models/u2/u2.py @@ -48,6 +48,7 @@ from deepspeech.utils.tensor_utils import add_sos_eos from deepspeech.utils.tensor_utils import pad_sequence from deepspeech.utils.tensor_utils import th_accuracy from deepspeech.utils.utility import log_add +from deepspeech.utils.utility import UpdateConfig __all__ = ["U2Model", "U2InferModel"] @@ -903,10 +904,10 @@ class U2Model(U2BaseModel): Returns: DeepSpeech2Model: The model built from pretrained result. """ - config.defrost() - config.input_dim = dataloader.collate_fn.feature_size - config.output_dim = dataloader.collate_fn.vocab_size - config.freeze() + with UpdateConfig(config): + config.input_dim = dataloader.collate_fn.feature_size + config.output_dim = dataloader.collate_fn.vocab_size + model = cls.from_config(config) if checkpoint_path: diff --git a/deepspeech/models/u2_st.py b/deepspeech/models/u2_st.py index 6737a549..87ca68b2 100644 --- a/deepspeech/models/u2_st.py +++ b/deepspeech/models/u2_st.py @@ -42,6 +42,7 @@ from deepspeech.utils import layer_tools from deepspeech.utils.log import Log from deepspeech.utils.tensor_utils import add_sos_eos from deepspeech.utils.tensor_utils import th_accuracy +from deepspeech.utils.utility import UpdateConfig __all__ = ["U2STModel", "U2STInferModel"] @@ -686,10 +687,10 @@ class U2STModel(U2STBaseModel): Returns: DeepSpeech2Model: The model built from pretrained result. """ - config.defrost() - config.input_dim = dataloader.collate_fn.feature_size - config.output_dim = dataloader.collate_fn.vocab_size - config.freeze() + with UpdateConfig(config): + config.input_dim = dataloader.collate_fn.feature_size + config.output_dim = dataloader.collate_fn.vocab_size + model = cls.from_config(config) if checkpoint_path: From f15e1ff7325b6c0e94145bf4530b0601a2575413 Mon Sep 17 00:00:00 2001 From: Hui Zhang Date: Thu, 16 Sep 2021 12:50:04 +0000 Subject: [PATCH 03/10] fix doc link --- README.md | 20 +++++++++--------- README_cn.md | 49 --------------------------------------------- docs/src/install.md | 2 +- 3 files changed, 10 insertions(+), 61 deletions(-) delete mode 100644 README_cn.md diff --git a/README.md b/README.md index 931e6331..71bc6363 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,3 @@ -[中文版](README_cn.md) - # PaddlePaddle Speech to Any toolkit ![License](https://img.shields.io/badge/license-Apache%202-red.svg) @@ -11,7 +9,7 @@ ## Features - See [feature list](doc/src/feature_list.md) for more information. + See [feature list](docs/src/feature_list.md) for more information. ## Setup @@ -20,20 +18,20 @@ All tested under: * python>=3.7 * paddlepaddle>=2.2.0rc -Please see [install](doc/src/install.md). +Please see [install](docs/src/install.md). ## Getting Started -Please see [Getting Started](doc/src/getting_started.md) and [tiny egs](examples/tiny/s0/README.md). +Please see [Getting Started](docs/src/getting_started.md) and [tiny egs](examples/tiny/s0/README.md). ## More Information -* [Data Prepration](doc/src/data_preparation.md) -* [Data Augmentation](doc/src/augmentation.md) -* [Ngram LM](doc/src/ngram_lm.md) -* [Benchmark](doc/src/benchmark.md) -* [Relased Model](doc/src/released_model.md) +* [Data Prepration](docs/src/data_preparation.md) +* [Data Augmentation](docs/src/augmentation.md) +* [Ngram LM](docs/src/ngram_lm.md) +* [Benchmark](docs/src/benchmark.md) +* [Relased Model](docs/src/released_model.md) ## Questions and Help @@ -47,4 +45,4 @@ DeepSpeech is provided under the [Apache-2.0 License](./LICENSE). ## Acknowledgement -We depends on many open source repos. See [References](doc/src/reference.md) for more information. +We depends on many open source repos. See [References](docs/src/reference.md) for more information. diff --git a/README_cn.md b/README_cn.md deleted file mode 100644 index cc993f8b..00000000 --- a/README_cn.md +++ /dev/null @@ -1,49 +0,0 @@ -[English](README.md) - -# PaddlePaddle Speech to Any toolkit - -![License](https://img.shields.io/badge/license-Apache%202-red.svg) -![python version](https://img.shields.io/badge/python-3.7+-orange.svg) -![support os](https://img.shields.io/badge/os-linux-yellow.svg) - -*DeepSpeech*是一个采用[PaddlePaddle](https://github.com/PaddlePaddle/Paddle)平台的端到端自动语音识别引擎的开源项目, -我们的愿景是为语音识别在工业应用和学术研究上,提供易于使用、高效、小型化和可扩展的工具,包括训练,推理,以及 部署。 - -## 特性 - - 参看 [特性列表](doc/src/feature_list.md)。 - - -## 安装 - -在以下环境测试验证过: - -* Ubuntu 16.04 -* python>=3.7 -* paddlepaddle>=2.2.0rc - -参看 [安装](doc/src/install.md)。 - -## 开始 - -请查看 [开始](doc/src/getting_started.md) 和 [tiny egs](examples/tiny/s0/README.md)。 - -## 更多信息 - -* [数据处理](doc/src/data_preparation.md) -* [数据增强](doc/src/augmentation.md) -* [语言模型](doc/src/ngram_lm.md) -* [Benchmark](doc/src/benchmark.md) -* [Relased Model](doc/src/released_model.md) - -## 问题和帮助 - -欢迎您在[Github讨论](https://github.com/PaddlePaddle/DeepSpeech/discussions)提交问题,[Github问题](https://github.com/PaddlePaddle/models/issues)中反馈bug。也欢迎您为这个项目做出贡献。 - -## License - -DeepSpeech 遵循[Apache-2.0开源协议](./LICENSE)。 - -## 感谢 - -开发中参考一些优秀的仓库,详情参见 [References](doc/src/reference.md)。 diff --git a/docs/src/install.md b/docs/src/install.md index 79460737..8cecba12 100644 --- a/docs/src/install.md +++ b/docs/src/install.md @@ -4,7 +4,7 @@ To avoid the trouble of environment setup, [running in Docker container](#runnin ## Prerequisites - Python >= 3.7 -- PaddlePaddle 2.0.0 or later (please refer to the [Installation Guide](https://www.paddlepaddle.org.cn/documentation/docs/en/beginners_guide/index_en.html)) +- PaddlePaddle latest version (please refer to the [Installation Guide](https://www.paddlepaddle.org.cn/documentation/docs/en/beginners_guide/index_en.html)) ## Setup (Important) From 576e94da044c5ee08c2ef417d1646c150f86d329 Mon Sep 17 00:00:00 2001 From: Hui Zhang Date: Fri, 17 Sep 2021 02:49:36 +0000 Subject: [PATCH 04/10] log interval 1 when benchmark --- deepspeech/exps/deepspeech2/model.py | 2 +- deepspeech/training/trainer.py | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/deepspeech/exps/deepspeech2/model.py b/deepspeech/exps/deepspeech2/model.py index df35c52c..128c4c82 100644 --- a/deepspeech/exps/deepspeech2/model.py +++ b/deepspeech/exps/deepspeech2/model.py @@ -100,7 +100,7 @@ class DeepSpeech2Trainer(Trainer): iteration_time = time.time() - start - msg += "train time: {:>.3f}s, ".format(iteration_time) + msg += "batch cost: {:>.3f}s, ".format(iteration_time) msg += "batch size: {}, ".format(self.config.collator.batch_size) msg += "accum: {}, ".format(train_conf.accum_grad) msg += ', '.join('{}: {:>.6f}'.format(k, v) diff --git a/deepspeech/training/trainer.py b/deepspeech/training/trainer.py index 9549a4dd..f5e5f12a 100644 --- a/deepspeech/training/trainer.py +++ b/deepspeech/training/trainer.py @@ -106,6 +106,7 @@ class Trainer(): if self.args.benchmark_batch_size: with UpdateConfig(self.config): self.config.collator.batch_size = self.args.benchmark_batch_size + self.config.training.log_interval = 1 logger.info( f"Benchmark reset batch-size: {self.args.benchmark_batch_size}") From 6de20de3f899874b11b4af193cefed2809b9fd9e Mon Sep 17 00:00:00 2001 From: Hui Zhang Date: Fri, 17 Sep 2021 02:50:55 +0000 Subject: [PATCH 05/10] rename reporter.scope to ObsScope --- deepspeech/training/extensions/evaluator.py | 4 ++-- deepspeech/training/reporter.py | 2 +- deepspeech/training/updaters/trainer.py | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/deepspeech/training/extensions/evaluator.py b/deepspeech/training/extensions/evaluator.py index d5b35982..5137dbdd 100644 --- a/deepspeech/training/extensions/evaluator.py +++ b/deepspeech/training/extensions/evaluator.py @@ -21,7 +21,7 @@ from paddle.nn import Layer from . import extension from ..reporter import DictSummary from ..reporter import report -from ..reporter import scope +from ..reporter import ObsScope from ..timer import Timer from deepspeech.utils.log import Log logger = Log(__name__).getlog() @@ -78,7 +78,7 @@ class StandardEvaluator(extension.Extension): summary = DictSummary() for batch in self.dataloader: observation = {} - with scope(observation): + with ObsScope(observation): # main evaluation computation here. with paddle.no_grad(): self.evaluate_sync(self.evaluate_core(batch)) diff --git a/deepspeech/training/reporter.py b/deepspeech/training/reporter.py index 66a81ade..7afc33f3 100644 --- a/deepspeech/training/reporter.py +++ b/deepspeech/training/reporter.py @@ -19,7 +19,7 @@ OBSERVATIONS = None @contextlib.contextmanager -def scope(observations): +def ObsScope(observations): # make `observation` the target to report to. # it is basically a dictionary that stores temporary observations global OBSERVATIONS diff --git a/deepspeech/training/updaters/trainer.py b/deepspeech/training/updaters/trainer.py index a52fb9eb..07769465 100644 --- a/deepspeech/training/updaters/trainer.py +++ b/deepspeech/training/updaters/trainer.py @@ -24,7 +24,7 @@ import tqdm from deepspeech.training.extensions.extension import Extension from deepspeech.training.extensions.extension import PRIORITY_READER -from deepspeech.training.reporter import scope +from deepspeech.training.reporter import ObsScope from deepspeech.training.triggers import get_trigger from deepspeech.training.triggers.limit_trigger import LimitTrigger from deepspeech.training.updaters.updater import UpdaterBase @@ -144,7 +144,7 @@ class Trainer(): # you can use `report` freely in Updater.update() # updating parameters and state - with scope(self.observation): + with ObsScope(self.observation): update() p.update() From 0e91d26ae3808497072c36d71d424a4db40cf4f8 Mon Sep 17 00:00:00 2001 From: Hui Zhang Date: Fri, 17 Sep 2021 06:26:08 +0000 Subject: [PATCH 06/10] fix log; add report to trainer --- deepspeech/exps/deepspeech2/model.py | 22 ++++++++------- deepspeech/exps/u2/model.py | 42 ++++++++++++++++++---------- deepspeech/training/trainer.py | 35 +++++++++++++++++------ examples/aishell/s1/local/train.sh | 6 ++-- examples/tiny/s1/local/train.sh | 4 +-- 5 files changed, 70 insertions(+), 39 deletions(-) diff --git a/deepspeech/exps/deepspeech2/model.py b/deepspeech/exps/deepspeech2/model.py index 128c4c82..8272d72e 100644 --- a/deepspeech/exps/deepspeech2/model.py +++ b/deepspeech/exps/deepspeech2/model.py @@ -36,6 +36,7 @@ from deepspeech.models.ds2_online import DeepSpeech2InferModelOnline from deepspeech.models.ds2_online import DeepSpeech2ModelOnline from deepspeech.training.gradclip import ClipGradByGlobalNormWithLog from deepspeech.training.trainer import Trainer +from deepspeech.training.reporter import report from deepspeech.utils import error_rate from deepspeech.utils import layer_tools from deepspeech.utils import mp_tools @@ -67,7 +68,9 @@ class DeepSpeech2Trainer(Trainer): super().__init__(config, args) def train_batch(self, batch_index, batch_data, msg): - train_conf = self.config.training + batch_size = self.config.collator.batch_size + accum_grad = self.config.training.accum_grad + start = time.time() # forward @@ -78,7 +81,7 @@ class DeepSpeech2Trainer(Trainer): } # loss backward - if (batch_index + 1) % train_conf.accum_grad != 0: + if (batch_index + 1) % accum_grad != 0: # Disable gradient synchronizations across DDP processes. # Within this context, gradients will be accumulated on module # variables, which will later be synchronized. @@ -93,20 +96,19 @@ class DeepSpeech2Trainer(Trainer): layer_tools.print_grads(self.model, print_func=None) # optimizer step - if (batch_index + 1) % train_conf.accum_grad == 0: + if (batch_index + 1) % accum_grad == 0: self.optimizer.step() self.optimizer.clear_grad() self.iteration += 1 iteration_time = time.time() - start - msg += "batch cost: {:>.3f}s, ".format(iteration_time) - msg += "batch size: {}, ".format(self.config.collator.batch_size) - msg += "accum: {}, ".format(train_conf.accum_grad) - msg += ', '.join('{}: {:>.6f}'.format(k, v) - for k, v in losses_np.items()) - logger.info(msg) - + for k, v in losses_np.items(): + report(k, v) + report("batch_size", batch_size) + report("accum", accum_grad) + report("step_cost", iteration_time) + if dist.get_rank() == 0 and self.visualizer: for k, v in losses_np.items(): # `step -1` since we update `step` after optimizer.step(). diff --git a/deepspeech/exps/u2/model.py b/deepspeech/exps/u2/model.py index 89d443e0..68b001ca 100644 --- a/deepspeech/exps/u2/model.py +++ b/deepspeech/exps/u2/model.py @@ -17,6 +17,7 @@ import os import sys import time from collections import defaultdict +from collections import OrderedDict from contextlib import nullcontext from pathlib import Path from typing import Optional @@ -36,6 +37,8 @@ from deepspeech.training.optimizer import OptimizerFactory from deepspeech.training.scheduler import LRSchedulerFactory from deepspeech.training.timer import Timer from deepspeech.training.trainer import Trainer +from deepspeech.training.reporter import report +from deepspeech.training.reporter import ObsScope from deepspeech.utils import ctc_utils from deepspeech.utils import error_rate from deepspeech.utils import layer_tools @@ -121,12 +124,11 @@ class U2Trainer(Trainer): iteration_time = time.time() - start if (batch_index + 1) % train_conf.log_interval == 0: - msg += "train time: {:>.3f}s, ".format(iteration_time) - msg += "batch size: {}, ".format(self.config.collator.batch_size) - msg += "accum: {}, ".format(train_conf.accum_grad) - msg += ', '.join('{}: {:>.6f}'.format(k, v) - for k, v in losses_np.items()) - logger.info(msg) + for k, v in losses_np.items(): + report(k, v) + report("batch_size", self.config.collator.batch_size) + report("accum", train_conf.accum_grad) + report("step_cost", iteration_time) if dist.get_rank() == 0 and self.visualizer: losses_np_v = losses_np.copy() @@ -199,15 +201,25 @@ class U2Trainer(Trainer): data_start_time = time.time() for batch_index, batch in enumerate(self.train_loader): dataload_time = time.time() - data_start_time - msg = "Train: Rank: {}, ".format(dist.get_rank()) - msg += "epoch: {}, ".format(self.epoch) - msg += "step: {}, ".format(self.iteration) - msg += "batch : {}/{}, ".format(batch_index + 1, - len(self.train_loader)) - msg += "lr: {:>.8f}, ".format(self.lr_scheduler()) - msg += "data time: {:>.3f}s, ".format(dataload_time) - self.train_batch(batch_index, batch, msg) - self.after_train_batch() + msg = "Train:" + observation = OrderedDict() + with ObsScope(observation): + report("Rank", dist.get_rank()) + report("epoch", self.epoch) + report('step', self.iteration) + report('step/total', (batch_index + 1) / len(self.train_loader)) + report("lr", self.lr_scheduler()) + self.train_batch(batch_index, batch, msg) + self.after_train_batch() + report('reader_cost', dataload_time) + observation['batch_cost'] = observation['reader_cost']+observation['step_cost'] + observation['samples'] = observation['batch_size'] + observation['ips[sent./sec]'] = observation['batch_size'] / observation['batch_cost'] + for k, v in observation.items(): + msg += f" {k}: " + msg += f"{v:>.8f}" if isinstance(v, float) else f"{v}" + msg += "," + logger.info(msg) data_start_time = time.time() except Exception as e: logger.error(e) diff --git a/deepspeech/training/trainer.py b/deepspeech/training/trainer.py index f5e5f12a..18578b42 100644 --- a/deepspeech/training/trainer.py +++ b/deepspeech/training/trainer.py @@ -14,12 +14,15 @@ import sys import time from pathlib import Path +from collections import OrderedDict import paddle from paddle import distributed as dist from tensorboardX import SummaryWriter from deepspeech.training.timer import Timer +from deepspeech.training.reporter import report +from deepspeech.training.reporter import ObsScope from deepspeech.utils import mp_tools from deepspeech.utils import profiler from deepspeech.utils.checkpoint import Checkpoint @@ -27,6 +30,7 @@ from deepspeech.utils.log import Log from deepspeech.utils.utility import seed_all from deepspeech.utils.utility import UpdateConfig + __all__ = ["Trainer"] logger = Log(__name__).getlog() @@ -98,6 +102,9 @@ class Trainer(): self.checkpoint_dir = None self.iteration = 0 self.epoch = 0 + self.rank = dist.get_rank() + + logger.info(f"Rank: {self.rank}/{dist.get_world_size()}") if args.seed: seed_all(args.seed) @@ -223,15 +230,25 @@ class Trainer(): data_start_time = time.time() for batch_index, batch in enumerate(self.train_loader): dataload_time = time.time() - data_start_time - msg = "Train: Rank: {}, ".format(dist.get_rank()) - msg += "epoch: {}, ".format(self.epoch) - msg += "step: {}, ".format(self.iteration) - msg += "batch : {}/{}, ".format(batch_index + 1, - len(self.train_loader)) - msg += "lr: {:>.8f}, ".format(self.lr_scheduler()) - msg += "data time: {:>.3f}s, ".format(dataload_time) - self.train_batch(batch_index, batch, msg) - self.after_train_batch() + msg = "Train:" + observation = OrderedDict() + with ObsScope(observation): + report("Rank", dist.get_rank()) + report("epoch", self.epoch) + report('step', self.iteration) + report('step/total', (batch_index + 1) / len(self.train_loader)) + report("lr", self.lr_scheduler()) + self.train_batch(batch_index, batch, msg) + self.after_train_batch() + report('reader_cost', dataload_time) + observation['batch_cost'] = observation['reader_cost']+observation['step_cost'] + observation['samples'] = observation['batch_size'] + observation['ips[sent./sec]'] = observation['batch_size'] / observation['batch_cost'] + for k, v in observation.items(): + msg += f" {k}: " + msg += f"{v:>.8f}" if isinstance(v, float) else f"{v}" + msg += "," + logger.info(msg) data_start_time = time.time() except Exception as e: logger.error(e) diff --git a/examples/aishell/s1/local/train.sh b/examples/aishell/s1/local/train.sh index 5b9c45f5..1a341de7 100755 --- a/examples/aishell/s1/local/train.sh +++ b/examples/aishell/s1/local/train.sh @@ -1,8 +1,8 @@ #!/bin/bash profiler_options= -benchmark_batch_size= -benchmark_max_step= +benchmark_batch_size=0 +benchmark_max_step=0 # seed may break model convergence seed=0 @@ -52,4 +52,4 @@ if [ $? -ne 0 ]; then exit 1 fi -exit 0 \ No newline at end of file +exit 0 diff --git a/examples/tiny/s1/local/train.sh b/examples/tiny/s1/local/train.sh index 56ceab41..5097d4d0 100755 --- a/examples/tiny/s1/local/train.sh +++ b/examples/tiny/s1/local/train.sh @@ -1,8 +1,8 @@ #!/bin/bash profiler_options= -benchmark_batch_size= -benchmark_max_step= +benchmark_batch_size=0 +benchmark_max_step=0 # seed may break model convergence seed=0 From 054e099b282d697a4c16c5aaf98fafc47b9b938a Mon Sep 17 00:00:00 2001 From: Hui Zhang Date: Fri, 17 Sep 2021 06:26:35 +0000 Subject: [PATCH 07/10] format --- deepspeech/exps/deepspeech2/model.py | 4 ++-- deepspeech/exps/u2/model.py | 16 ++++++++++------ deepspeech/training/extensions/evaluator.py | 2 +- deepspeech/training/trainer.py | 19 +++++++++++-------- 4 files changed, 24 insertions(+), 17 deletions(-) diff --git a/deepspeech/exps/deepspeech2/model.py b/deepspeech/exps/deepspeech2/model.py index 8272d72e..7bf02930 100644 --- a/deepspeech/exps/deepspeech2/model.py +++ b/deepspeech/exps/deepspeech2/model.py @@ -35,8 +35,8 @@ from deepspeech.models.ds2 import DeepSpeech2Model from deepspeech.models.ds2_online import DeepSpeech2InferModelOnline from deepspeech.models.ds2_online import DeepSpeech2ModelOnline from deepspeech.training.gradclip import ClipGradByGlobalNormWithLog -from deepspeech.training.trainer import Trainer from deepspeech.training.reporter import report +from deepspeech.training.trainer import Trainer from deepspeech.utils import error_rate from deepspeech.utils import layer_tools from deepspeech.utils import mp_tools @@ -108,7 +108,7 @@ class DeepSpeech2Trainer(Trainer): report("batch_size", batch_size) report("accum", accum_grad) report("step_cost", iteration_time) - + if dist.get_rank() == 0 and self.visualizer: for k, v in losses_np.items(): # `step -1` since we update `step` after optimizer.step(). diff --git a/deepspeech/exps/u2/model.py b/deepspeech/exps/u2/model.py index 68b001ca..2e512ef1 100644 --- a/deepspeech/exps/u2/model.py +++ b/deepspeech/exps/u2/model.py @@ -34,11 +34,11 @@ from deepspeech.io.sampler import SortagradBatchSampler from deepspeech.io.sampler import SortagradDistributedBatchSampler from deepspeech.models.u2 import U2Model from deepspeech.training.optimizer import OptimizerFactory +from deepspeech.training.reporter import ObsScope +from deepspeech.training.reporter import report from deepspeech.training.scheduler import LRSchedulerFactory from deepspeech.training.timer import Timer from deepspeech.training.trainer import Trainer -from deepspeech.training.reporter import report -from deepspeech.training.reporter import ObsScope from deepspeech.utils import ctc_utils from deepspeech.utils import error_rate from deepspeech.utils import layer_tools @@ -207,17 +207,21 @@ class U2Trainer(Trainer): report("Rank", dist.get_rank()) report("epoch", self.epoch) report('step', self.iteration) - report('step/total', (batch_index + 1) / len(self.train_loader)) + report('step/total', + (batch_index + 1) / len(self.train_loader)) report("lr", self.lr_scheduler()) self.train_batch(batch_index, batch, msg) self.after_train_batch() report('reader_cost', dataload_time) - observation['batch_cost'] = observation['reader_cost']+observation['step_cost'] + observation['batch_cost'] = observation[ + 'reader_cost'] + observation['step_cost'] observation['samples'] = observation['batch_size'] - observation['ips[sent./sec]'] = observation['batch_size'] / observation['batch_cost'] + observation['ips[sent./sec]'] = observation[ + 'batch_size'] / observation['batch_cost'] for k, v in observation.items(): msg += f" {k}: " - msg += f"{v:>.8f}" if isinstance(v, float) else f"{v}" + msg += f"{v:>.8f}" if isinstance(v, + float) else f"{v}" msg += "," logger.info(msg) data_start_time = time.time() diff --git a/deepspeech/training/extensions/evaluator.py b/deepspeech/training/extensions/evaluator.py index 5137dbdd..1026a4ec 100644 --- a/deepspeech/training/extensions/evaluator.py +++ b/deepspeech/training/extensions/evaluator.py @@ -20,8 +20,8 @@ from paddle.nn import Layer from . import extension from ..reporter import DictSummary -from ..reporter import report from ..reporter import ObsScope +from ..reporter import report from ..timer import Timer from deepspeech.utils.log import Log logger = Log(__name__).getlog() diff --git a/deepspeech/training/trainer.py b/deepspeech/training/trainer.py index 18578b42..a5efdd54 100644 --- a/deepspeech/training/trainer.py +++ b/deepspeech/training/trainer.py @@ -13,16 +13,16 @@ # limitations under the License. import sys import time -from pathlib import Path from collections import OrderedDict +from pathlib import Path import paddle from paddle import distributed as dist from tensorboardX import SummaryWriter -from deepspeech.training.timer import Timer -from deepspeech.training.reporter import report from deepspeech.training.reporter import ObsScope +from deepspeech.training.reporter import report +from deepspeech.training.timer import Timer from deepspeech.utils import mp_tools from deepspeech.utils import profiler from deepspeech.utils.checkpoint import Checkpoint @@ -30,7 +30,6 @@ from deepspeech.utils.log import Log from deepspeech.utils.utility import seed_all from deepspeech.utils.utility import UpdateConfig - __all__ = ["Trainer"] logger = Log(__name__).getlog() @@ -236,17 +235,21 @@ class Trainer(): report("Rank", dist.get_rank()) report("epoch", self.epoch) report('step', self.iteration) - report('step/total', (batch_index + 1) / len(self.train_loader)) + report('step/total', + (batch_index + 1) / len(self.train_loader)) report("lr", self.lr_scheduler()) self.train_batch(batch_index, batch, msg) self.after_train_batch() report('reader_cost', dataload_time) - observation['batch_cost'] = observation['reader_cost']+observation['step_cost'] + observation['batch_cost'] = observation[ + 'reader_cost'] + observation['step_cost'] observation['samples'] = observation['batch_size'] - observation['ips[sent./sec]'] = observation['batch_size'] / observation['batch_cost'] + observation['ips[sent./sec]'] = observation[ + 'batch_size'] / observation['batch_cost'] for k, v in observation.items(): msg += f" {k}: " - msg += f"{v:>.8f}" if isinstance(v, float) else f"{v}" + msg += f"{v:>.8f}" if isinstance(v, + float) else f"{v}" msg += "," logger.info(msg) data_start_time = time.time() From 9fb349f9355b63a41c04471f928e1bc27c46bb8e Mon Sep 17 00:00:00 2001 From: Hui Zhang Date: Fri, 17 Sep 2021 06:31:21 +0000 Subject: [PATCH 08/10] fix benchmark cli --- deepspeech/training/cli.py | 68 ++++++++++++++++++++++++++++---------- 1 file changed, 50 insertions(+), 18 deletions(-) diff --git a/deepspeech/training/cli.py b/deepspeech/training/cli.py index d8719b3a..07c213db 100644 --- a/deepspeech/training/cli.py +++ b/deepspeech/training/cli.py @@ -43,25 +43,57 @@ def default_argument_parser(): """ parser = argparse.ArgumentParser() - # yapf: disable - train_group = parser.add_argument_group(title='Train Options', description=None) - train_group.add_argument("--seed", type=int, default=None, - help="seed to use for paddle, np and random. None or 0 for random, else set seed.") - train_group.add_argument("--device", type=str, default='gpu', choices=["cpu", "gpu"], + train_group = parser.add_argument_group( + title='Train Options', description=None) + train_group.add_argument( + "--seed", + type=int, + default=None, + help="seed to use for paddle, np and random. None or 0 for random, else set seed." + ) + train_group.add_argument( + "--device", + type=str, + default='gpu', + choices=["cpu", "gpu"], help="device cpu and gpu are supported.") - train_group.add_argument("--nprocs", type=int, default=1, help="number of parallel processes. 0 for cpu.") - train_group.add_argument("--config", metavar="CONFIG_FILE", help="config file.") - train_group.add_argument("--output", metavar="CKPT_DIR", help="path to save checkpoint.") - train_group.add_argument("--checkpoint_path", type=str, help="path to load checkpoint") - train_group.add_argument("--opts", type=str, default=[], nargs='+', - help="overwrite --config file, passing in LIST[KEY VALUE] pairs") - train_group.add_argument("--dump-config", metavar="FILE", help="dump config to `this` file.") + train_group.add_argument( + "--nprocs", + type=int, + default=1, + help="number of parallel processes. 0 for cpu.") + train_group.add_argument( + "--config", metavar="CONFIG_FILE", help="config file.") + train_group.add_argument( + "--output", metavar="CKPT_DIR", help="path to save checkpoint.") + train_group.add_argument( + "--checkpoint_path", type=str, help="path to load checkpoint") + train_group.add_argument( + "--opts", + type=str, + default=[], + nargs='+', + help="overwrite --config file, passing in LIST[KEY VALUE] pairs") + train_group.add_argument( + "--dump-config", metavar="FILE", help="dump config to `this` file.") - bech_group = parser.add_argument_group(title='Benchmark Options', description=None) - bech_group.add_argument('--profiler-options', type=str, default=None, - help='The option of profiler, which should be in format \"key1=value1;key2=value2;key3=value3\".') - bech_group.add_argument('--benchmark-batch-size', type=int, default=None, help='batch size for benchmark.') - bech_group.add_argument('--benchmark-max-step', type=int, default=None, help='max iteration for benchmark.') - # yapd: enable + profile_group = parser.add_argument_group( + title='Benchmark Options', description=None) + profile_group.add_argument( + '--profiler-options', + type=str, + default=None, + help='The option of profiler, which should be in format \"key1=value1;key2=value2;key3=value3\".' + ) + profile_group.add_argument( + '--benchmark-batch-size', + type=int, + default=None, + help='batch size for benchmark.') + profile_group.add_argument( + '--benchmark-max-step', + type=int, + default=None, + help='max iteration for benchmark.') return parser From b381f5b447f53fb6915f1d30328bde8d29c5f27a Mon Sep 17 00:00:00 2001 From: Hui Zhang Date: Fri, 17 Sep 2021 06:35:56 +0000 Subject: [PATCH 09/10] fix profiler optitons config --- examples/aishell/s1/local/train.sh | 2 +- examples/tiny/s0/local/train.sh | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/examples/aishell/s1/local/train.sh b/examples/aishell/s1/local/train.sh index 1a341de7..5097d4d0 100755 --- a/examples/aishell/s1/local/train.sh +++ b/examples/aishell/s1/local/train.sh @@ -38,7 +38,7 @@ python3 -u ${BIN_DIR}/train.py \ --nproc ${ngpu} \ --config ${config_path} \ --output exp/${ckpt_name} \ ---profiler-options "${profiler-options}" \ +--profiler-options "${profiler_options}" \ --benchmark-batch-size ${benchmark_batch_size} \ --benchmark-max-step ${benchmark_max_step} diff --git a/examples/tiny/s0/local/train.sh b/examples/tiny/s0/local/train.sh index f96508b4..9a76c7ad 100755 --- a/examples/tiny/s0/local/train.sh +++ b/examples/tiny/s0/local/train.sh @@ -38,7 +38,7 @@ python3 -u ${BIN_DIR}/train.py \ --config ${config_path} \ --output exp/${ckpt_name} \ --model_type ${model_type} \ ---profiler_options "${profiler_options}" \ +--profiler-options "${profiler_options}" \ --seed ${seed} if [ ${seed} != 0 ]; then From 9a95ceb0b4f605dc7c825c67c62ed1dcc3918f25 Mon Sep 17 00:00:00 2001 From: Hui Zhang Date: Fri, 17 Sep 2021 07:25:49 +0000 Subject: [PATCH 10/10] add Acknowledgements --- docs/src/reference.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docs/src/reference.md b/docs/src/reference.md index 341e1361..d3676fff 100644 --- a/docs/src/reference.md +++ b/docs/src/reference.md @@ -1,5 +1,7 @@ # Reference +We refer these repos to build `model` and `engine`: + * [delta](https://github.com/Delta-ML/delta.git) * [espnet](https://github.com/espnet/espnet.git) * [kaldi](https://github.com/kaldi-asr/kaldi.git)