diff --git a/tests/benchmark/README.md b/tests/benchmark/README.md index fdfc6328f..411f89dad 100644 --- a/tests/benchmark/README.md +++ b/tests/benchmark/README.md @@ -1,6 +1,38 @@ ### Prepare the environment Please follow the instructions shown in [here](https://github.com/PaddlePaddle/DeepSpeech/blob/develop/docs/src/install.md) to install the Deepspeech first. +### File list +└── benchmark # 模型名 + ├── README.md # 运行文档 + ├── analysis.py # log解析脚本,每个框架尽量统一,可参考[paddle的analysis.py](https://github.com/mmglove/benchmark/blob/jp_0907/scripts/analysis.py) + ├── recoder_mp_bs16_fp32_ngpu1.txt # 单卡数据 + ├── recoder_mp_bs16_fp32_ngpu8.txt # 8卡数据 + ├── prepare.sh # 竞品PyTorch运行环境搭建 + ├── run_benchmark.sh # 运行脚本(包含性能、收敛性) + ├── run_analysis_mp.sh # 分析8卡的脚本 + ├── run_analysis_sp.sh # 分析单卡的脚本 + ├── log + │ ├── log_sp.out # 单卡的结果 + │ └── log_mp.out # 8卡的结果 + └── run.sh # 全量运行脚本 + + +### The physical environment +- 单机(单卡、8卡) + - 系统:Ubuntu 16.04.6 LTS + - GPU:Tesla V100-SXM2-16GB * 8 + - CPU:Intel(R) Xeon(R) Gold 6148 CPU @ 2.40GHz * 96 + - Driver Version: 440.64.00 + - 内存:440 GB + - CUDA、cudnn Version: cuda10.2-cudnn7 +- 多机(32卡) TODO + +### Docker 镜像,如: + +- **镜像版本**: `registry.baidubce.com/paddlepaddle/paddle:2.1.0-gpu-cuda10.2-cudnn7` +- **CUDA 版本**: `10.2` +- **cuDnn 版本**: `7` + ### Prepare the benchmark environment ``` bash prepare.sh @@ -20,3 +52,20 @@ bash run_analysis_sp.sh ``` bash run_analysis_mp.sh ``` + +### The log +``` +{"log_file": "recoder_sp_bs16_fp32_ngpu1.txt", + "model_name": "Conformer", + "mission_name": "one gpu", + "direction_id": 1, + "run_mode": "sp", + "index": 1, + "gpu_num": 1, + "FINAL_RESULT": 23.228, + "JOB_FAIL_FLAG": 0, + "log_with_profiler": null, + "profiler_path": null, + "UNIT": "sent./sec" +} +``` diff --git a/tests/benchmark/prepare.sh b/tests/benchmark/prepare.sh index c934f487b..3e86b13d6 100644 --- a/tests/benchmark/prepare.sh +++ b/tests/benchmark/prepare.sh @@ -1,7 +1,7 @@ -source ../tools/venv/bin/activate +source ../../tools/venv/bin/activate #Enter the example dir -pushd ../examples/aishell/s1 +pushd ../../examples/aishell/s1 #Prepare the data bash run.sh --stage 0 --stop_stage 0 diff --git a/tests/benchmark/run.sh b/tests/benchmark/run.sh index 4a23b0edb..a77e43fe5 100644 --- a/tests/benchmark/run.sh +++ b/tests/benchmark/run.sh @@ -2,9 +2,9 @@ # 提供可稳定复现性能的脚本,默认在标准docker环境内py37执行: paddlepaddle/paddle:latest-gpu-cuda10.1-cudnn7 paddle=2.1.2 py=37 # 执行目录:需说明 CUR_DIR=${PWD} -source ../tools/venv/bin/activate +source ../../tools/venv/bin/activate #cd ** -pushd ../examples/aishell/s1 +pushd ../../examples/aishell/s1 # 1 安装该模型需要的依赖 (如需开启优化策略请注明) # 2 拷贝该模型需要数据、预训练模型 @@ -28,7 +28,13 @@ for fp_item in ${fp_item_list[@]}; do echo "index is speed, 1gpus, begin, conformer" run_mode=sp ngpu=1 - CUDA_VISIBLE_DEVICES=7 bash ${CUR_DIR}/run_benchmark.sh ${run_mode} ${config_path} ${output} ${seed} ${ngpu} ${profiler_options} ${batch_size} ${fp_item} ${CUR_DIR} + CUDA_VISIBLE_DEVICES=0 bash ${CUR_DIR}/run_benchmark.sh ${run_mode} ${config_path} ${output} ${seed} ${ngpu} ${profiler_options} ${batch_size} ${fp_item} ${CUR_DIR} done done +popd + +mkdir -p log +bash run_analysis_sp.sh > log/log_sp.out +bash run_analysis_mp.sh > log/log_mp.out + diff --git a/tests/benchmark/run_analysis_mp.sh b/tests/benchmark/run_analysis_mp.sh index 24a3b383f..f68dd1e45 100644 --- a/tests/benchmark/run_analysis_mp.sh +++ b/tests/benchmark/run_analysis_mp.sh @@ -9,4 +9,4 @@ python analysis.py \ --gpu_num 8 \ --use_num 480 \ --separator " " \ - + --direction_id "1" diff --git a/tests/benchmark/run_analysis_sp.sh b/tests/benchmark/run_analysis_sp.sh index 2d3c8e733..e521db88d 100644 --- a/tests/benchmark/run_analysis_sp.sh +++ b/tests/benchmark/run_analysis_sp.sh @@ -9,4 +9,4 @@ python analysis.py \ --gpu_num 1 \ --use_num 60 \ --separator " " \ - + --direction_id "1"