add op profiling

pull/829/head
Hui Zhang 3 years ago
parent 2dadfba15b
commit 5fdda953b9

@ -21,6 +21,7 @@ from deepspeech.exps.u2.config import get_cfg_defaults
from deepspeech.exps.u2.model import U2Trainer as Trainer
from deepspeech.training.cli import default_argument_parser
from deepspeech.utils.utility import print_arguments
# from deepspeech.exps.u2.trainer import U2Trainer as Trainer

@ -204,6 +204,7 @@ class U2Trainer(Trainer):
msg += "lr: {:>.8f}, ".format(self.lr_scheduler())
msg += "data time: {:>.3f}s, ".format(dataload_time)
self.train_batch(batch_index, batch, msg)
self.after_train_batch()
data_start_time = time.time()
except Exception as e:
logger.error(e)

@ -205,6 +205,7 @@ class U2Trainer(Trainer):
msg += "lr: {:>.8f}, ".format(self.lr_scheduler())
msg += "data time: {:>.3f}s, ".format(dataload_time)
self.train_batch(batch_index, batch, msg)
self.after_train_batch()
data_start_time = time.time()
except Exception as e:
logger.error(e)

@ -222,6 +222,7 @@ class U2STTrainer(Trainer):
msg += "lr: {:>.8f}, ".format(self.lr_scheduler())
msg += "data time: {:>.3f}s, ".format(dataload_time)
self.train_batch(batch_index, batch, msg)
self.after_train_batch()
data_start_time = time.time()
except Exception as e:
logger.error(e)

@ -63,8 +63,13 @@ def default_argument_parser():
parser.add_argument("--opts", type=str, default=[], nargs='+',
help="options to overwrite --config file and the default config, passing in KEY VALUE pairs")
# random seed
parser.add_argument("--seed", type=int, default=None,
help="seed to use for paddle, np and random. None or 0 for random, else set seed.")
# profiler
parser.add_argument('--profiler_options', type=str, default=None,
help='The option of profiler, which should be in format \"key1=value1;key2=value2;key3=value3\".')
# yapd: enable
return parser

@ -20,6 +20,7 @@ from tensorboardX import SummaryWriter
from deepspeech.training.timer import Timer
from deepspeech.utils import mp_tools
from deepspeech.utils import profiler
from deepspeech.utils.checkpoint import Checkpoint
from deepspeech.utils.log import Log
from deepspeech.utils.utility import seed_all
@ -183,6 +184,9 @@ class Trainer():
if isinstance(batch_sampler, paddle.io.DistributedBatchSampler):
batch_sampler.set_epoch(self.epoch)
def after_train_batch(self):
profiler.add_profiler_step(self.args.profiler_options)
def train(self):
"""The training process control by epoch."""
from_scratch = self.resume_or_scratch()
@ -209,6 +213,7 @@ class Trainer():
msg += "lr: {:>.8f}, ".format(self.lr_scheduler())
msg += "data time: {:>.3f}s, ".format(dataload_time)
self.train_batch(batch_index, batch, msg)
self.after_train_batch()
data_start_time = time.time()
except Exception as e:
logger.error(e)

@ -0,0 +1,116 @@
# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import sys
import paddle
from deepspeech.utils.log import Log
logger = Log(__name__).getlog()
# A global variable to record the number of calling times for profiler
# functions. It is used to specify the tracing range of training steps.
_profiler_step_id = 0
# A global variable to avoid parsing from string every time.
_profiler_options = None
class ProfilerOptions(object):
'''
Use a string to initialize a ProfilerOptions.
The string should be in the format: "key1=value1;key2=value;key3=value3".
For example:
"profile_path=model.profile"
"batch_range=[50, 60]; profile_path=model.profile"
"batch_range=[50, 60]; tracer_option=OpDetail; profile_path=model.profile"
ProfilerOptions supports following key-value pair:
batch_range - a integer list, e.g. [100, 110].
state - a string, the optional values are 'CPU', 'GPU' or 'All'.
sorted_key - a string, the optional values are 'calls', 'total',
'max', 'min' or 'ave.
tracer_option - a string, the optional values are 'Default', 'OpDetail',
'AllOpDetail'.
profile_path - a string, the path to save the serialized profile data,
which can be used to generate a timeline.
exit_on_finished - a boolean.
'''
def __init__(self, options_str):
assert isinstance(options_str, str)
self._options = {
'batch_range': [10, 20],
'state': 'All',
'sorted_key': 'total',
'tracer_option': 'Default',
'profile_path': '/tmp/profile',
'exit_on_finished': True
}
self._parse_from_string(options_str)
def _parse_from_string(self, options_str):
for kv in options_str.replace(' ', '').split(';'):
key, value = kv.split('=')
if key == 'batch_range':
value_list = value.replace('[', '').replace(']', '').split(',')
value_list = list(map(int, value_list))
if len(value_list) >= 2 and value_list[0] >= 0 and value_list[
1] > value_list[0]:
self._options[key] = value_list
elif key == 'exit_on_finished':
self._options[key] = value.lower() in ("yes", "true", "t", "1")
elif key in [
'state', 'sorted_key', 'tracer_option', 'profile_path'
]:
self._options[key] = value
def __getitem__(self, name):
if self._options.get(name, None) is None:
raise ValueError(
"ProfilerOptions does not have an option named %s." % name)
return self._options[name]
def add_profiler_step(options_str=None):
'''
Enable the operator-level timing using PaddlePaddle's profiler.
The profiler uses a independent variable to count the profiler steps.
One call of this function is treated as a profiler step.
Args:
profiler_options - a string to initialize the ProfilerOptions.
Default is None, and the profiler is disabled.
'''
if options_str is None:
return
global _profiler_step_id
global _profiler_options
if _profiler_options is None:
_profiler_options = ProfilerOptions(options_str)
logger.info(f"{options_str}")
logger.info(f"{_profiler_options._options}")
if _profiler_step_id == _profiler_options['batch_range'][0]:
paddle.utils.profiler.start_profiler(_profiler_options['state'],
_profiler_options['tracer_option'])
elif _profiler_step_id == _profiler_options['batch_range'][1]:
paddle.utils.profiler.stop_profiler(_profiler_options['sorted_key'],
_profiler_options['profile_path'])
if _profiler_options['exit_on_finished']:
sys.exit(0)
_profiler_step_id += 1

@ -1,38 +1,45 @@
#!/bin/bash
if [ $# != 2 ];then
echo "usage: CUDA_VISIBLE_DEVICES=0 ${0} config_path ckpt_name"
exit -1
fi
profiler_options=
# seed may break model convergence
seed=0
source ${MAIN_ROOT}/utils/parse_options.sh || exit 1;
ngpu=$(echo $CUDA_VISIBLE_DEVICES | awk -F "," '{print NF}')
echo "using $ngpu gpus..."
config_path=$1
ckpt_name=$2
device=gpu
if [ ${ngpu} == 0 ];then
device=cpu
fi
echo "using ${device}..."
mkdir -p exp
# seed may break model convergence
seed=0
if [ ${seed} != 0 ]; then
if [ ${seed} != 0 ]; then
export FLAGS_cudnn_deterministic=True
echo "using seed $seed & FLAGS_cudnn_deterministic=True ..."
fi
if [ $# != 2 ];then
echo "usage: CUDA_VISIBLE_DEVICES=0 ${0} config_path ckpt_name"
exit -1
fi
config_path=$1
ckpt_name=$2
mkdir -p exp
python3 -u ${BIN_DIR}/train.py \
--device ${device} \
--nproc ${ngpu} \
--config ${config_path} \
--output exp/${ckpt_name} \
--profiler_options ${profiler_options} \
--seed ${seed}
if [ ${seed} != 0 ]; then
if [ ${seed} != 0 ]; then
unset FLAGS_cudnn_deterministic
fi
@ -41,4 +48,4 @@ if [ $? -ne 0 ]; then
exit 1
fi
exit 0
exit 0

@ -11,7 +11,7 @@ data:
max_output_input_ratio: .inf
collator:
batch_size: 15
batch_size: 20
mean_std_filepath: data/mean_std.json
unit_type: char
vocab_filepath: data/vocab.txt
@ -45,7 +45,7 @@ model:
training:
n_epoch: 50
accum_grad: 4
accum_grad: 1
lr: 1e-3
lr_decay: 0.83
weight_decay: 1e-06

@ -1,36 +1,44 @@
#!/bin/bash
if [ $# != 3 ];then
echo "usage: CUDA_VISIBLE_DEVICES=0 ${0} config_path ckpt_name model_type"
exit -1
fi
profiler_options=
# seed may break model convergence
seed=0
source ${MAIN_ROOT}/utils/parse_options.sh || exit 1;
ngpu=$(echo $CUDA_VISIBLE_DEVICES | awk -F "," '{print NF}')
echo "using $ngpu gpus..."
config_path=$1
ckpt_name=$2
model_type=$3
device=gpu
if [ ${ngpu} == 0 ];then
device=cpu
fi
mkdir -p exp
# seed may break model convergence
seed=0
if [ ${seed} != 0 ]; then
export FLAGS_cudnn_deterministic=True
echo "using seed $seed & FLAGS_cudnn_deterministic=True ..."
fi
if [ $# != 3 ];then
echo "usage: CUDA_VISIBLE_DEVICES=0 ${0} config_path ckpt_name model_type"
exit -1
fi
config_path=$1
ckpt_name=$2
model_type=$3
mkdir -p exp
python3 -u ${BIN_DIR}/train.py \
--device ${device} \
--nproc ${ngpu} \
--config ${config_path} \
--output exp/${ckpt_name} \
--model_type ${model_type} \
--profiler_options ${profiler_options} \
--seed ${seed}
if [ ${seed} != 0 ]; then

Loading…
Cancel
Save