diff --git a/deepspeech/exps/u2/model.py b/deepspeech/exps/u2/model.py index 1328a1cb..0d17d9fd 100644 --- a/deepspeech/exps/u2/model.py +++ b/deepspeech/exps/u2/model.py @@ -100,7 +100,8 @@ class U2Trainer(Trainer): # Disable gradient synchronizations across DDP processes. # Within this context, gradients will be accumulated on module # variables, which will later be synchronized. - context = self.model.no_sync + # When using cpu w/o DDP, model does not have `no_sync` + context = self.model.no_sync if self.parallel else nullcontext else: # Used for single gpu training and DDP gradient synchronization # processes. diff --git a/deepspeech/training/cli.py b/deepspeech/training/cli.py index 1477bdfe..d8719b3a 100644 --- a/deepspeech/training/cli.py +++ b/deepspeech/training/cli.py @@ -44,32 +44,24 @@ def default_argument_parser(): parser = argparse.ArgumentParser() # yapf: disable - # data and output - parser.add_argument("--config", metavar="FILE", help="path of the config file to overwrite to default config with.") - parser.add_argument("--dump-config", metavar="FILE", help="dump config to yaml file.") - parser.add_argument("--output", metavar="OUTPUT_DIR", help="path to save checkpoint and logs.") - - # load from saved checkpoint - parser.add_argument("--checkpoint_path", type=str, help="path of the checkpoint to load") - - # running - parser.add_argument("--device", type=str, default='gpu', choices=["cpu", "gpu"], - help="device type to use, cpu and gpu are supported.") - parser.add_argument("--nprocs", type=int, default=1, help="number of parallel processes to use.") - - # overwrite extra config and default config - # parser.add_argument("--opts", nargs=argparse.REMAINDER, - # help="options to overwrite --config file and the default config, passing in KEY VALUE pairs") - parser.add_argument("--opts", type=str, default=[], nargs='+', - help="options to overwrite --config file and the default config, passing in KEY VALUE pairs") - - # random seed - parser.add_argument("--seed", type=int, default=None, + train_group = parser.add_argument_group(title='Train Options', description=None) + train_group.add_argument("--seed", type=int, default=None, help="seed to use for paddle, np and random. None or 0 for random, else set seed.") - - # profiler - parser.add_argument('--profiler_options', type=str, default=None, + train_group.add_argument("--device", type=str, default='gpu', choices=["cpu", "gpu"], + help="device cpu and gpu are supported.") + train_group.add_argument("--nprocs", type=int, default=1, help="number of parallel processes. 0 for cpu.") + train_group.add_argument("--config", metavar="CONFIG_FILE", help="config file.") + train_group.add_argument("--output", metavar="CKPT_DIR", help="path to save checkpoint.") + train_group.add_argument("--checkpoint_path", type=str, help="path to load checkpoint") + train_group.add_argument("--opts", type=str, default=[], nargs='+', + help="overwrite --config file, passing in LIST[KEY VALUE] pairs") + train_group.add_argument("--dump-config", metavar="FILE", help="dump config to `this` file.") + + bech_group = parser.add_argument_group(title='Benchmark Options', description=None) + bech_group.add_argument('--profiler-options', type=str, default=None, help='The option of profiler, which should be in format \"key1=value1;key2=value2;key3=value3\".') + bech_group.add_argument('--benchmark-batch-size', type=int, default=None, help='batch size for benchmark.') + bech_group.add_argument('--benchmark-max-step', type=int, default=None, help='max iteration for benchmark.') # yapd: enable return parser diff --git a/deepspeech/training/trainer.py b/deepspeech/training/trainer.py index 6587f129..9549a4dd 100644 --- a/deepspeech/training/trainer.py +++ b/deepspeech/training/trainer.py @@ -11,6 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +import sys import time from pathlib import Path @@ -24,6 +25,7 @@ from deepspeech.utils import profiler from deepspeech.utils.checkpoint import Checkpoint from deepspeech.utils.log import Log from deepspeech.utils.utility import seed_all +from deepspeech.utils.utility import UpdateConfig __all__ = ["Trainer"] @@ -101,6 +103,12 @@ class Trainer(): seed_all(args.seed) logger.info(f"Set seed {args.seed}") + if self.args.benchmark_batch_size: + with UpdateConfig(self.config): + self.config.collator.batch_size = self.args.benchmark_batch_size + logger.info( + f"Benchmark reset batch-size: {self.args.benchmark_batch_size}") + def setup(self): """Setup the experiment. """ @@ -188,6 +196,12 @@ class Trainer(): if self.args.profiler_options: profiler.add_profiler_step(self.args.profiler_options) + if self.args.benchmark_max_step and self.iteration > self.args.benchmark_max_step: + logger.info( + f"Reach benchmark-max-step: {self.args.benchmark_max_step}") + sys.exit( + f"Reach benchmark-max-step: {self.args.benchmark_max_step}") + def train(self): """The training process control by epoch.""" from_scratch = self.resume_or_scratch() diff --git a/deepspeech/utils/utility.py b/deepspeech/utils/utility.py index e18fc1f7..6f84c41b 100644 --- a/deepspeech/utils/utility.py +++ b/deepspeech/utils/utility.py @@ -16,15 +16,27 @@ import distutils.util import math import os import random +from contextlib import contextmanager from typing import List import numpy as np import paddle -__all__ = ["seed_all", 'print_arguments', 'add_arguments', "log_add"] +__all__ = [ + "UpdateConfig", "seed_all", 'print_arguments', 'add_arguments', "log_add" +] + + +@contextmanager +def UpdateConfig(config): + """Update yacs config""" + config.defrost() + yield + config.freeze() def seed_all(seed: int=210329): + """freeze random generator seed.""" np.random.seed(seed) random.seed(seed) paddle.seed(seed) diff --git a/examples/aishell/s1/local/train.sh b/examples/aishell/s1/local/train.sh index e065ad6a..5b9c45f5 100755 --- a/examples/aishell/s1/local/train.sh +++ b/examples/aishell/s1/local/train.sh @@ -1,7 +1,8 @@ #!/bin/bash - profiler_options= +benchmark_batch_size= +benchmark_max_step= # seed may break model convergence seed=0 @@ -32,12 +33,15 @@ ckpt_name=$2 mkdir -p exp python3 -u ${BIN_DIR}/train.py \ +--seed ${seed} \ --device ${device} \ --nproc ${ngpu} \ --config ${config_path} \ --output exp/${ckpt_name} \ ---profiler_options ${profiler_options} \ ---seed ${seed} +--profiler-options "${profiler-options}" \ +--benchmark-batch-size ${benchmark_batch_size} \ +--benchmark-max-step ${benchmark_max_step} + if [ ${seed} != 0 ]; then unset FLAGS_cudnn_deterministic diff --git a/examples/tiny/s1/local/train.sh b/examples/tiny/s1/local/train.sh index 374608fd..56ceab41 100755 --- a/examples/tiny/s1/local/train.sh +++ b/examples/tiny/s1/local/train.sh @@ -1,37 +1,49 @@ #!/bin/bash -if [ $# != 2 ];then - echo "usage: CUDA_VISIBLE_DEVICES=0 ${0} config_path ckpt_name" - exit -1 -fi +profiler_options= +benchmark_batch_size= +benchmark_max_step= + +# seed may break model convergence +seed=0 + +source ${MAIN_ROOT}/utils/parse_options.sh || exit 1; ngpu=$(echo $CUDA_VISIBLE_DEVICES | awk -F "," '{print NF}') echo "using $ngpu gpus..." -config_path=$1 -ckpt_name=$2 - device=gpu if [ ${ngpu} == 0 ];then device=cpu fi -mkdir -p exp - -# seed may break model convergence -seed=0 -if [ ${seed} != 0 ]; then +if [ ${seed} != 0 ]; then export FLAGS_cudnn_deterministic=True + echo "using seed $seed & FLAGS_cudnn_deterministic=True ..." +fi + +if [ $# != 2 ];then + echo "usage: CUDA_VISIBLE_DEVICES=0 ${0} config_path ckpt_name" + exit -1 fi +config_path=$1 +ckpt_name=$2 + +mkdir -p exp + python3 -u ${BIN_DIR}/train.py \ +--seed ${seed} \ --device ${device} \ --nproc ${ngpu} \ --config ${config_path} \ --output exp/${ckpt_name} \ ---seed ${seed} +--profiler-options "${profiler_options}" \ +--benchmark-batch-size ${benchmark_batch_size} \ +--benchmark-max-step ${benchmark_max_step} + -if [ ${seed} != 0 ]; then +if [ ${seed} != 0 ]; then unset FLAGS_cudnn_deterministic fi diff --git a/tests/benchmark/run_all.sh b/tests/benchmark/run_all.sh old mode 100644 new mode 100755 index 7aa11d0f..6f707cdc --- a/tests/benchmark/run_all.sh +++ b/tests/benchmark/run_all.sh @@ -1,41 +1,46 @@ #!/bin/bash +CUR_DIR=${PWD} ROOT_DIR=../../ # 提供可稳定复现性能的脚本,默认在标准docker环境内py37执行: # collect env info bash ${ROOT_DIR}/utils/pd_env_collect.sh -cat pd_env.txt +#cat pd_env.txt -# 执行目录:需说明 -pushd ${ROOT_DIR}/examples/aishell/s1 # 1 安装该模型需要的依赖 (如需开启优化策略请注明) -pushd ${ROOT_DIR}/tools; make; popd -source ${ROOT_DIR}/tools/venv/bin/activate -pushd ${ROOT_DIR}; bash setup.sh; popd +#pushd ${ROOT_DIR}/tools; make; popd +#source ${ROOT_DIR}/tools/venv/bin/activate +#pushd ${ROOT_DIR}; bash setup.sh; popd # 2 拷贝该模型需要数据、预训练模型 + +# 执行目录:需说明 +#pushd ${ROOT_DIR}/examples/aishell/s1 +pushd ${ROOT_DIR}/examples/tiny/s1 + mkdir -p exp/log -loca/data.sh &> exp/log/data.log +. path.sh +#bash local/data.sh &> exp/log/data.log # 3 批量运行(如不方便批量,1,2需放到单个模型中) -model_mode_list=(conformer) +model_mode_list=(conformer transformer) fp_item_list=(fp32) -bs_item=(32 64 96) +bs_item_list=(32 64 96) for model_mode in ${model_mode_list[@]}; do for fp_item in ${fp_item_list[@]}; do - for bs_item in ${bs_list[@]} + for bs_item in ${bs_item_list[@]} do echo "index is speed, 1gpus, begin, ${model_name}" run_mode=sp - CUDA_VISIBLE_DEVICES=0 bash run_benchmark.sh ${run_mode} ${bs_item} ${fp_item} 500 ${model_mode} # (5min) + CUDA_VISIBLE_DEVICES=0 bash ${CUR_DIR}/run_benchmark.sh ${run_mode} ${bs_item} ${fp_item} 500 ${model_mode} # (5min) sleep 60 echo "index is speed, 8gpus, run_mode is multi_process, begin, ${model_name}" run_mode=mp - CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 bash run_benchmark.sh ${run_mode} ${bs_item} ${fp_item} 500 ${model_mode} + CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 bash ${CUR_DIR}/run_benchmark.sh ${run_mode} ${bs_item} ${fp_item} 500 ${model_mode} sleep 60 done done diff --git a/tests/benchmark/run_benchmark.sh b/tests/benchmark/run_benchmark.sh old mode 100644 new mode 100755 index 625d3616..eb111793 --- a/tests/benchmark/run_benchmark.sh +++ b/tests/benchmark/run_benchmark.sh @@ -23,19 +23,19 @@ function _train(){ echo "Train on ${num_gpu_devices} GPUs" echo "current CUDA_VISIBLE_DEVICES=$CUDA_VISIBLE_DEVICES, gpus=$num_gpu_devices, batch_size=$batch_size" - train_cmd="--model_name=${model_name} - --batch_size=${batch_size} - --fp=${fp_item} \ - --max_iter=${max_iter} " + train_cmd="--benchmark-batch-size ${batch_size} + --benchmark-max-step ${max_iter} + conf/${model_name}.yaml ${model_name}" + case ${run_mode} in - sp) train_cmd="python -u tools/train.py "${train_cmd}" ;; + sp) train_cmd="bash local/train.sh "${train_cmd}"" ;; mp) - train_cmd="python -m paddle.distributed.launch --log_dir=./mylog --gpus=$CUDA_VISIBLE_DEVICES tools/train.py "${train_cmd}" - log_parse_file="mylog/workerlog.0" ;; + train_cmd="bash local/train.sh "${train_cmd}"" ;; *) echo "choose run_mode(sp or mp)"; exit 1; esac -# 以下不用修改 - timeout 15m ${train_cmd} > ${log_file} 2>&1 + + # 以下不用修改 + CUDA_VISIBLE_DEVICES=${device} timeout 15m ${train_cmd} > ${log_file} 2>&1 if [ $? -ne 0 ];then echo -e "${model_name}, FAIL" export job_fail_flag=1 @@ -43,7 +43,8 @@ function _train(){ echo -e "${model_name}, SUCCESS" export job_fail_flag=0 fi - kill -9 `ps -ef|grep 'python'|awk '{print $2}'` + + trap 'for pid in $(jobs -pr); do kill -KILL $pid; done' INT QUIT TERM if [ $run_mode = "mp" -a -d mylog ]; then rm ${log_file}