add benchmark flags, and logic

pull/837/head
Hui Zhang 3 years ago
parent dc2cdbf3fb
commit cda6ca8323

@ -100,7 +100,8 @@ class U2Trainer(Trainer):
# Disable gradient synchronizations across DDP processes.
# Within this context, gradients will be accumulated on module
# variables, which will later be synchronized.
context = self.model.no_sync
# When using cpu w/o DDP, model does not have `no_sync`
context = self.model.no_sync if self.parallel else nullcontext
else:
# Used for single gpu training and DDP gradient synchronization
# processes.

@ -44,32 +44,24 @@ def default_argument_parser():
parser = argparse.ArgumentParser()
# yapf: disable
# data and output
parser.add_argument("--config", metavar="FILE", help="path of the config file to overwrite to default config with.")
parser.add_argument("--dump-config", metavar="FILE", help="dump config to yaml file.")
parser.add_argument("--output", metavar="OUTPUT_DIR", help="path to save checkpoint and logs.")
# load from saved checkpoint
parser.add_argument("--checkpoint_path", type=str, help="path of the checkpoint to load")
# running
parser.add_argument("--device", type=str, default='gpu', choices=["cpu", "gpu"],
help="device type to use, cpu and gpu are supported.")
parser.add_argument("--nprocs", type=int, default=1, help="number of parallel processes to use.")
# overwrite extra config and default config
# parser.add_argument("--opts", nargs=argparse.REMAINDER,
# help="options to overwrite --config file and the default config, passing in KEY VALUE pairs")
parser.add_argument("--opts", type=str, default=[], nargs='+',
help="options to overwrite --config file and the default config, passing in KEY VALUE pairs")
# random seed
parser.add_argument("--seed", type=int, default=None,
train_group = parser.add_argument_group(title='Train Options', description=None)
train_group.add_argument("--seed", type=int, default=None,
help="seed to use for paddle, np and random. None or 0 for random, else set seed.")
# profiler
parser.add_argument('--profiler_options', type=str, default=None,
train_group.add_argument("--device", type=str, default='gpu', choices=["cpu", "gpu"],
help="device cpu and gpu are supported.")
train_group.add_argument("--nprocs", type=int, default=1, help="number of parallel processes. 0 for cpu.")
train_group.add_argument("--config", metavar="CONFIG_FILE", help="config file.")
train_group.add_argument("--output", metavar="CKPT_DIR", help="path to save checkpoint.")
train_group.add_argument("--checkpoint_path", type=str, help="path to load checkpoint")
train_group.add_argument("--opts", type=str, default=[], nargs='+',
help="overwrite --config file, passing in LIST[KEY VALUE] pairs")
train_group.add_argument("--dump-config", metavar="FILE", help="dump config to `this` file.")
bech_group = parser.add_argument_group(title='Benchmark Options', description=None)
bech_group.add_argument('--profiler-options', type=str, default=None,
help='The option of profiler, which should be in format \"key1=value1;key2=value2;key3=value3\".')
bech_group.add_argument('--benchmark-batch-size', type=int, default=None, help='batch size for benchmark.')
bech_group.add_argument('--benchmark-max-step', type=int, default=None, help='max iteration for benchmark.')
# yapd: enable
return parser

@ -11,6 +11,7 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import sys
import time
from pathlib import Path
@ -24,6 +25,7 @@ from deepspeech.utils import profiler
from deepspeech.utils.checkpoint import Checkpoint
from deepspeech.utils.log import Log
from deepspeech.utils.utility import seed_all
from deepspeech.utils.utility import UpdateConfig
__all__ = ["Trainer"]
@ -101,6 +103,12 @@ class Trainer():
seed_all(args.seed)
logger.info(f"Set seed {args.seed}")
if self.args.benchmark_batch_size:
with UpdateConfig(self.config):
self.config.collator.batch_size = self.args.benchmark_batch_size
logger.info(
f"Benchmark reset batch-size: {self.args.benchmark_batch_size}")
def setup(self):
"""Setup the experiment.
"""
@ -188,6 +196,12 @@ class Trainer():
if self.args.profiler_options:
profiler.add_profiler_step(self.args.profiler_options)
if self.args.benchmark_max_step and self.iteration > self.args.benchmark_max_step:
logger.info(
f"Reach benchmark-max-step: {self.args.benchmark_max_step}")
sys.exit(
f"Reach benchmark-max-step: {self.args.benchmark_max_step}")
def train(self):
"""The training process control by epoch."""
from_scratch = self.resume_or_scratch()

@ -16,15 +16,27 @@ import distutils.util
import math
import os
import random
from contextlib import contextmanager
from typing import List
import numpy as np
import paddle
__all__ = ["seed_all", 'print_arguments', 'add_arguments', "log_add"]
__all__ = [
"UpdateConfig", "seed_all", 'print_arguments', 'add_arguments', "log_add"
]
@contextmanager
def UpdateConfig(config):
"""Update yacs config"""
config.defrost()
yield
config.freeze()
def seed_all(seed: int=210329):
"""freeze random generator seed."""
np.random.seed(seed)
random.seed(seed)
paddle.seed(seed)

@ -1,7 +1,8 @@
#!/bin/bash
profiler_options=
benchmark_batch_size=
benchmark_max_step=
# seed may break model convergence
seed=0
@ -32,12 +33,15 @@ ckpt_name=$2
mkdir -p exp
python3 -u ${BIN_DIR}/train.py \
--seed ${seed} \
--device ${device} \
--nproc ${ngpu} \
--config ${config_path} \
--output exp/${ckpt_name} \
--profiler_options ${profiler_options} \
--seed ${seed}
--profiler-options "${profiler-options}" \
--benchmark-batch-size ${benchmark_batch_size} \
--benchmark-max-step ${benchmark_max_step}
if [ ${seed} != 0 ]; then
unset FLAGS_cudnn_deterministic

@ -1,35 +1,47 @@
#!/bin/bash
if [ $# != 2 ];then
echo "usage: CUDA_VISIBLE_DEVICES=0 ${0} config_path ckpt_name"
exit -1
fi
profiler_options=
benchmark_batch_size=
benchmark_max_step=
# seed may break model convergence
seed=0
source ${MAIN_ROOT}/utils/parse_options.sh || exit 1;
ngpu=$(echo $CUDA_VISIBLE_DEVICES | awk -F "," '{print NF}')
echo "using $ngpu gpus..."
config_path=$1
ckpt_name=$2
device=gpu
if [ ${ngpu} == 0 ];then
device=cpu
fi
mkdir -p exp
# seed may break model convergence
seed=0
if [ ${seed} != 0 ]; then
export FLAGS_cudnn_deterministic=True
echo "using seed $seed & FLAGS_cudnn_deterministic=True ..."
fi
if [ $# != 2 ];then
echo "usage: CUDA_VISIBLE_DEVICES=0 ${0} config_path ckpt_name"
exit -1
fi
config_path=$1
ckpt_name=$2
mkdir -p exp
python3 -u ${BIN_DIR}/train.py \
--seed ${seed} \
--device ${device} \
--nproc ${ngpu} \
--config ${config_path} \
--output exp/${ckpt_name} \
--seed ${seed}
--profiler-options "${profiler_options}" \
--benchmark-batch-size ${benchmark_batch_size} \
--benchmark-max-step ${benchmark_max_step}
if [ ${seed} != 0 ]; then
unset FLAGS_cudnn_deterministic

@ -1,41 +1,46 @@
#!/bin/bash
CUR_DIR=${PWD}
ROOT_DIR=../../
# 提供可稳定复现性能的脚本默认在标准docker环境内py37执行
# collect env info
bash ${ROOT_DIR}/utils/pd_env_collect.sh
cat pd_env.txt
#cat pd_env.txt
# 执行目录:需说明
pushd ${ROOT_DIR}/examples/aishell/s1
# 1 安装该模型需要的依赖 (如需开启优化策略请注明)
pushd ${ROOT_DIR}/tools; make; popd
source ${ROOT_DIR}/tools/venv/bin/activate
pushd ${ROOT_DIR}; bash setup.sh; popd
#pushd ${ROOT_DIR}/tools; make; popd
#source ${ROOT_DIR}/tools/venv/bin/activate
#pushd ${ROOT_DIR}; bash setup.sh; popd
# 2 拷贝该模型需要数据、预训练模型
# 执行目录:需说明
#pushd ${ROOT_DIR}/examples/aishell/s1
pushd ${ROOT_DIR}/examples/tiny/s1
mkdir -p exp/log
loca/data.sh &> exp/log/data.log
. path.sh
#bash local/data.sh &> exp/log/data.log
# 3 批量运行如不方便批量12需放到单个模型中
model_mode_list=(conformer)
model_mode_list=(conformer transformer)
fp_item_list=(fp32)
bs_item=(32 64 96)
bs_item_list=(32 64 96)
for model_mode in ${model_mode_list[@]}; do
for fp_item in ${fp_item_list[@]}; do
for bs_item in ${bs_list[@]}
for bs_item in ${bs_item_list[@]}
do
echo "index is speed, 1gpus, begin, ${model_name}"
run_mode=sp
CUDA_VISIBLE_DEVICES=0 bash run_benchmark.sh ${run_mode} ${bs_item} ${fp_item} 500 ${model_mode} # (5min)
CUDA_VISIBLE_DEVICES=0 bash ${CUR_DIR}/run_benchmark.sh ${run_mode} ${bs_item} ${fp_item} 500 ${model_mode} # (5min)
sleep 60
echo "index is speed, 8gpus, run_mode is multi_process, begin, ${model_name}"
run_mode=mp
CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 bash run_benchmark.sh ${run_mode} ${bs_item} ${fp_item} 500 ${model_mode}
CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 bash ${CUR_DIR}/run_benchmark.sh ${run_mode} ${bs_item} ${fp_item} 500 ${model_mode}
sleep 60
done
done

@ -23,19 +23,19 @@ function _train(){
echo "Train on ${num_gpu_devices} GPUs"
echo "current CUDA_VISIBLE_DEVICES=$CUDA_VISIBLE_DEVICES, gpus=$num_gpu_devices, batch_size=$batch_size"
train_cmd="--model_name=${model_name}
--batch_size=${batch_size}
--fp=${fp_item} \
--max_iter=${max_iter} "
train_cmd="--benchmark-batch-size ${batch_size}
--benchmark-max-step ${max_iter}
conf/${model_name}.yaml ${model_name}"
case ${run_mode} in
sp) train_cmd="python -u tools/train.py "${train_cmd}" ;;
sp) train_cmd="bash local/train.sh "${train_cmd}"" ;;
mp)
train_cmd="python -m paddle.distributed.launch --log_dir=./mylog --gpus=$CUDA_VISIBLE_DEVICES tools/train.py "${train_cmd}"
log_parse_file="mylog/workerlog.0" ;;
train_cmd="bash local/train.sh "${train_cmd}"" ;;
*) echo "choose run_mode(sp or mp)"; exit 1;
esac
# 以下不用修改
timeout 15m ${train_cmd} > ${log_file} 2>&1
# 以下不用修改
CUDA_VISIBLE_DEVICES=${device} timeout 15m ${train_cmd} > ${log_file} 2>&1
if [ $? -ne 0 ];then
echo -e "${model_name}, FAIL"
export job_fail_flag=1
@ -43,7 +43,8 @@ function _train(){
echo -e "${model_name}, SUCCESS"
export job_fail_flag=0
fi
kill -9 `ps -ef|grep 'python'|awk '{print $2}'`
trap 'for pid in $(jobs -pr); do kill -KILL $pid; done' INT QUIT TERM
if [ $run_mode = "mp" -a -d mylog ]; then
rm ${log_file}

Loading…
Cancel
Save