From 438e1bd34fbf5e1c1181559b4f19301657d6b4c7 Mon Sep 17 00:00:00 2001 From: Hui Zhang Date: Wed, 15 Sep 2021 11:03:18 +0000 Subject: [PATCH] add benmark scripts --- tests/benchmark/.gitignore | 2 + tests/benchmark/README.md | 12 +++ tests/benchmark/run_all.sh | 33 ++++++ tests/benchmark/run_benchmark.sh | 54 ++++++++++ utils/pd_env_collect.sh | 167 +++++++++++++++++++++++++++++++ 5 files changed, 268 insertions(+) create mode 100644 tests/benchmark/.gitignore create mode 100644 tests/benchmark/README.md create mode 100644 tests/benchmark/run_all.sh create mode 100644 tests/benchmark/run_benchmark.sh create mode 100644 utils/pd_env_collect.sh diff --git a/tests/benchmark/.gitignore b/tests/benchmark/.gitignore new file mode 100644 index 000000000..7d166b066 --- /dev/null +++ b/tests/benchmark/.gitignore @@ -0,0 +1,2 @@ +old-pd_env.txt +pd_env.txt diff --git a/tests/benchmark/README.md b/tests/benchmark/README.md new file mode 100644 index 000000000..8ec43f89e --- /dev/null +++ b/tests/benchmark/README.md @@ -0,0 +1,12 @@ +# Benchmark Test + +## Data + +* Aishell + +## Docker + +``` +registry.baidubce.com/paddlepaddle/paddle 2.1.1-gpu-cuda10.2-cudnn7 59d5ec1de486 +``` + diff --git a/tests/benchmark/run_all.sh b/tests/benchmark/run_all.sh new file mode 100644 index 000000000..7564174b4 --- /dev/null +++ b/tests/benchmark/run_all.sh @@ -0,0 +1,33 @@ +#!/bin/bash + +# collect env info +bash ../../utils/pd_env_collect.sh + + + +# 提供可稳定复现性能的脚本,默认在标准docker环境内py37执行: paddlepaddle/paddle:latest-gpu-cuda10.1-cudnn7 paddle=2.1.2 py=37 +# 执行目录:需说明 +cd ** +# 1 安装该模型需要的依赖 (如需开启优化策略请注明) +pip install ... +# 2 拷贝该模型需要数据、预训练模型 +# 3 批量运行(如不方便批量,1,2需放到单个模型中) + +model_mode_list=(MobileNetv1 MobileNetv2) +fp_item_list=(fp32 fp16) +bs_item=(32 64 96) +for model_mode in ${model_mode_list[@]}; do + for fp_item in ${fp_item_list[@]}; do + for bs_item in ${bs_list[@]} + do + echo "index is speed, 1gpus, begin, ${model_name}" + run_mode=sp + CUDA_VISIBLE_DEVICES=0 bash run_benchmark.sh ${run_mode} ${bs_item} ${fp_item} 500 ${model_mode} # (5min) + sleep 60 + echo "index is speed, 8gpus, run_mode is multi_process, begin, ${model_name}" + run_mode=mp + CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 bash run_benchmark.sh ${run_mode} ${bs_item} ${fp_item} 500 ${model_mode} + sleep 60 + done + done +done diff --git a/tests/benchmark/run_benchmark.sh b/tests/benchmark/run_benchmark.sh new file mode 100644 index 000000000..2b9cf70fd --- /dev/null +++ b/tests/benchmark/run_benchmark.sh @@ -0,0 +1,54 @@ +#!/bin/bash + +set -xe +# 运行示例:CUDA_VISIBLE_DEVICES=0 bash run_benchmark.sh ${run_mode} ${bs_item} ${fp_item} 500 ${model_mode} +# 参数说明 +function _set_params(){ + run_mode=${1:-"sp"} # 单卡sp|多卡mp + batch_size=${2:-"64"} + fp_item=${3:-"fp32"} # fp32|fp16 + max_iter=${4:-"500"} # 可选,如果需要修改代码提前中断 + model_name=${5:-"model_name"} + run_log_path=${TRAIN_LOG_DIR:-$(pwd)} # TRAIN_LOG_DIR 后续QA设置该参数 + +# 以下不用修改 + device=${CUDA_VISIBLE_DEVICES//,/ } + arr=(${device}) + num_gpu_devices=${#arr[*]} + log_file=${run_log_path}/${model_name}_${run_mode}_bs${batch_size}_${fp_item}_${num_gpu_devices} +} +function _train(){ + echo "Train on ${num_gpu_devices} GPUs" + echo "current CUDA_VISIBLE_DEVICES=$CUDA_VISIBLE_DEVICES, gpus=$num_gpu_devices, batch_size=$batch_size" + + train_cmd="--model_name=${model_name} + --batch_size=${batch_size} + --fp=${fp_item} \ + --max_iter=${max_iter} " + case ${run_mode} in + sp) train_cmd="python -u tools/train.py "${train_cmd}" ;; + mp) + train_cmd="python -m paddle.distributed.launch --log_dir=./mylog --gpus=$CUDA_VISIBLE_DEVICES tools/train.py "${train_cmd}" + log_parse_file="mylog/workerlog.0" ;; + *) echo "choose run_mode(sp or mp)"; exit 1; + esac +# 以下不用修改 + timeout 15m ${train_cmd} > ${log_file} 2>&1 + if [ $? -ne 0 ];then + echo -e "${model_name}, FAIL" + export job_fail_flag=1 + else + echo -e "${model_name}, SUCCESS" + export job_fail_flag=0 + fi + kill -9 `ps -ef|grep 'python'|awk '{print $2}'` + + if [ $run_mode = "mp" -a -d mylog ]; then + rm ${log_file} + cp mylog/workerlog.0 ${log_file} + fi +} + +_set_params $@ +_train + diff --git a/utils/pd_env_collect.sh b/utils/pd_env_collect.sh new file mode 100644 index 000000000..64ff8886c --- /dev/null +++ b/utils/pd_env_collect.sh @@ -0,0 +1,167 @@ +#!/usr/bin/env bash + +unset GREP_OPTIONS + +set -u # Check for undefined variables + +die() { + # Print a message and exit with code 1. + # + # Usage: die + # e.g., die "Something bad happened." + + echo $@ + exit 1 +} + +echo "Collecting system information..." + +OUTPUT_FILE=pd_env.txt +python_bin_path=$(which python || which python3 || die "Cannot find Python binary") + +{ +echo +echo '== check python ===================================================' +} >> ${OUTPUT_FILE} + +cat < /tmp/check_python.py +import platform +print("""python version: %s +python branch: %s +python build version: %s +python compiler version: %s +python implementation: %s +""" % ( +platform.python_version(), +platform.python_branch(), +platform.python_build(), +platform.python_compiler(), +platform.python_implementation(), +)) +EOF +${python_bin_path} /tmp/check_python.py 2>&1 >> ${OUTPUT_FILE} + +{ +echo +echo '== check os platform ===============================================' +} >> ${OUTPUT_FILE} + +cat < /tmp/check_os.py +import platform +print("""os: %s +os kernel version: %s +os release version: %s +os platform: %s +linux distribution: %s +linux os distribution: %s +mac version: %s +uname: %s +architecture: %s +machine: %s +""" % ( +platform.system(), +platform.version(), +platform.release(), +platform.platform(), +platform.linux_distribution(), +platform.dist(), +platform.mac_ver(), +platform.uname(), +platform.architecture(), +platform.machine(), +)) +EOF +${python_bin_path} /tmp/check_os.py 2>&1 >> ${OUTPUT_FILE} + +{ + echo + echo '== are we in docker =============================================' + num=`cat /proc/1/cgroup | grep docker | wc -l`; + if [ $num -ge 1 ]; then + echo "Yes" + else + echo "No" + fi + + echo + echo '== compiler =====================================================' + c++ --version 2>&1 + + echo + echo '== check pips ===================================================' + pip list 2>&1 | grep "proto\|numpy\|paddlepaddle" + + + echo + echo '== check for virtualenv =========================================' + ${python_bin_path} -c "import sys;print(hasattr(sys, \"real_prefix\"))" + + echo + echo '== paddlepaddle import ============================================' +} >> ${OUTPUT_FILE} + +cat < /tmp/check_pd.py +import paddle as pd; +pd.set_device('cpu') +print("pd.version.full_version = %s" % pd.version.full_version) +print("pd.version.commit = %s" % pd.version.commit) +print("pd.__version__ = %s" % pd.__version__) +print("Sanity check: %r" % pd.zeros([1,2,3])[:1]) +EOF +${python_bin_path} /tmp/check_pd.py 2>&1 >> ${OUTPUT_FILE} + +LD_DEBUG=libs ${python_bin_path} -c "import paddle" 2>>${OUTPUT_FILE} > /tmp/loadedlibs + +{ + grep libcudnn.so /tmp/loadedlibs + echo + echo '== env ==========================================================' + if [ -z ${LD_LIBRARY_PATH+x} ]; then + echo "LD_LIBRARY_PATH is unset"; + else + echo LD_LIBRARY_PATH ${LD_LIBRARY_PATH} ; + fi + if [ -z ${DYLD_LIBRARY_PATH+x} ]; then + echo "DYLD_LIBRARY_PATH is unset"; + else + echo DYLD_LIBRARY_PATH ${DYLD_LIBRARY_PATH} ; + fi + + + echo + echo '== nvidia-smi ===================================================' + nvidia-smi 2>&1 + + echo + echo '== cuda libs ===================================================' +} >> ${OUTPUT_FILE} + +find /usr/local -type f -name 'libcudart*' 2>/dev/null | grep cuda | grep -v "\\.cache" >> ${OUTPUT_FILE} +find /usr/local -type f -name 'libudnn*' 2>/dev/null | grep cuda | grep -v "\\.cache" >> ${OUTPUT_FILE} + +{ + echo + echo '== paddlepaddle installed from info ==================' + pip show paddlepaddle-gpu + + echo + echo '== python version ==============================================' + echo '(major, minor, micro, releaselevel, serial)' + python -c 'import sys; print(sys.version_info[:])' + + echo + echo '== bazel version ===============================================' + bazel version + echo '== cmake version ===============================================' + cmake --version +} >> ${OUTPUT_FILE} + +# Remove any words with google. +mv $OUTPUT_FILE old-$OUTPUT_FILE +grep -v -i google old-${OUTPUT_FILE} > $OUTPUT_FILE + +echo "Wrote environment to ${OUTPUT_FILE}. You can review the contents of that file." +echo "and use it to populate the fields in the github issue template." +echo +echo "cat ${OUTPUT_FILE}" +echo