From 58fc5bf1556744d762858202bcfb13084fe25eb3 Mon Sep 17 00:00:00 2001 From: huangyuxin Date: Fri, 8 Oct 2021 10:59:31 +0000 Subject: [PATCH 1/3] add bentchmark --- benchmark/README.md | 14 ++ benchmark/analysis.py | 345 +++++++++++++++++++++++++++++++++++ benchmark/prepare.sh | 7 + benchmark/run.sh | 34 ++++ benchmark/run_analysis_mp.sh | 12 ++ benchmark/run_analysis_sp.sh | 12 ++ benchmark/run_benchmark.sh | 65 +++++++ 7 files changed, 489 insertions(+) create mode 100644 benchmark/README.md create mode 100644 benchmark/analysis.py create mode 100644 benchmark/prepare.sh create mode 100644 benchmark/run.sh create mode 100644 benchmark/run_analysis_mp.sh create mode 100644 benchmark/run_analysis_sp.sh create mode 100644 benchmark/run_benchmark.sh diff --git a/benchmark/README.md b/benchmark/README.md new file mode 100644 index 000000000..2d1760a34 --- /dev/null +++ b/benchmark/README.md @@ -0,0 +1,14 @@ +### Prepare the environment +Please follow the instructions shown in [here](https://github.com/PaddlePaddle/DeepSpeech/blob/develop/docs/src/install.md) to install the Deepspeech first. + +### Prepare the benchmark environment +bash prepare.sh + +### Start benchmarking +bash run.sh + +### Analyse the sp +bash run_analysis_sp.sh + +### Analyse the mp +bash run_analysis_mp.sh diff --git a/benchmark/analysis.py b/benchmark/analysis.py new file mode 100644 index 000000000..610791c8c --- /dev/null +++ b/benchmark/analysis.py @@ -0,0 +1,345 @@ +# copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from __future__ import print_function + +import argparse +import json +import re +import traceback + + +def parse_args(): + parser = argparse.ArgumentParser(description=__doc__) + parser.add_argument( + "--filename", type=str, help="The name of log which need to analysis.") + parser.add_argument( + "--log_with_profiler", + type=str, + help="The path of train log with profiler") + parser.add_argument( + "--profiler_path", type=str, help="The path of profiler timeline log.") + parser.add_argument( + "--keyword", type=str, help="Keyword to specify analysis data") + parser.add_argument( + "--separator", + type=str, + default=None, + help="Separator of different field in log") + parser.add_argument( + '--position', type=int, default=None, help='The position of data field') + parser.add_argument( + '--range', + type=str, + default="", + help='The range of data field to intercept') + parser.add_argument( + '--base_batch_size', type=int, help='base_batch size on gpu') + parser.add_argument( + '--skip_steps', + type=int, + default=0, + help='The number of steps to be skipped') + parser.add_argument( + '--model_mode', + type=int, + default=-1, + help='Analysis mode, default value is -1') + parser.add_argument('--ips_unit', type=str, default=None, help='IPS unit') + parser.add_argument( + '--model_name', + type=str, + default=0, + help='training model_name, transformer_base') + parser.add_argument( + '--mission_name', type=str, default=0, help='training mission name') + parser.add_argument( + '--direction_id', type=int, default=0, help='training direction_id') + parser.add_argument( + '--run_mode', + type=str, + default="sp", + help='multi process or single process') + parser.add_argument( + '--index', + type=int, + default=1, + help='{1: speed, 2:mem, 3:profiler, 6:max_batch_size}') + parser.add_argument( + '--gpu_num', type=int, default=1, help='nums of training gpus') + parser.add_argument( + '--use_num', type=int, default=1, help='nums of used recoders') + args = parser.parse_args() + args.separator = None if args.separator == "None" else args.separator + return args + + +def _is_number(num): + pattern = re.compile(r'^[-+]?[-0-9]\d*\.\d*|[-+]?\.?[0-9]\d*$') + result = pattern.match(num) + if result: + return True + else: + return False + + +class TimeAnalyzer(object): + def __init__(self, + filename, + keyword=None, + separator=None, + position=None, + range="-1"): + if filename is None: + raise Exception("Please specify the filename!") + + if keyword is None: + raise Exception("Please specify the keyword!") + + self.filename = filename + self.keyword = keyword + self.separator = separator + self.position = position + self.range = range + self.records = None + self._distil() + + def _distil(self): + self.records = [] + with open(self.filename, "r") as f_object: + lines = f_object.readlines() + for line in lines: + if self.keyword not in line: + continue + try: + result = None + + # Distil the string from a line. + line = line.strip() + line_words = line.split( + self.separator) if self.separator else line.split() + print("line_words", line_words) + if args.position: + result = line_words[self.position] + else: + # Distil the string following the keyword. + for i in range(len(line_words) - 1): + if line_words[i] == self.keyword: + result = line_words[i + 1] + break + + # Distil the result from the picked string. + if not self.range: + result = result[0:] + elif _is_number(self.range): + result = result[0:int(self.range)] + else: + result = result[int(self.range.split(":")[0]):int( + self.range.split(":")[1])] + self.records.append(float(result)) + except Exception as exc: + pass + #print("line is: {}; separator={}; position={}".format(line, self.separator, self.position)) + self.records.sort() + self.records = self.records[:args.use_num] + print("records", self.records) + print("Extract {} records: separator={}; position={}".format( + len(self.records), self.separator, self.position)) + + def _get_fps(self, + mode, + batch_size, + gpu_num, + avg_of_records, + run_mode, + unit=None): + if mode == -1 and run_mode == 'sp': + assert unit, "Please set the unit when mode is -1." + fps = gpu_num * avg_of_records + elif mode == -1 and run_mode == 'mp': + assert unit, "Please set the unit when mode is -1." + fps = gpu_num * avg_of_records #temporarily, not used now + print("------------this is mp") + elif mode == 0: + # s/step -> samples/s + fps = (batch_size * gpu_num) / avg_of_records + unit = "samples/s" + elif mode == 1: + # steps/s -> steps/s + fps = avg_of_records + unit = "steps/s" + elif mode == 2: + # s/step -> steps/s + fps = 1 / avg_of_records + unit = "steps/s" + elif mode == 3: + # steps/s -> samples/s + fps = batch_size * gpu_num * avg_of_records + unit = "samples/s" + elif mode == 4: + # s/epoch -> s/epoch + fps = avg_of_records + unit = "s/epoch" + else: + ValueError("Unsupported analysis mode.") + + return fps, unit + + def analysis(self, + batch_size, + gpu_num=1, + skip_steps=0, + mode=-1, + run_mode='sp', + unit=None): + if batch_size <= 0: + print("base_batch_size should larger than 0.") + return 0, '' + + if len( + self.records + ) <= skip_steps: # to address the condition which item of log equals to skip_steps + print("no records") + return 0, '' + + sum_of_records = 0 + sum_of_records_skipped = 0 + skip_min = self.records[skip_steps] + skip_max = self.records[skip_steps] + + count = len(self.records) + for i in range(count): + sum_of_records += self.records[i] + if i >= skip_steps: + sum_of_records_skipped += self.records[i] + if self.records[i] < skip_min: + skip_min = self.records[i] + if self.records[i] > skip_max: + skip_max = self.records[i] + + avg_of_records = sum_of_records / float(count) + avg_of_records_skipped = sum_of_records_skipped / float(count - + skip_steps) + + fps, fps_unit = self._get_fps(mode, batch_size, gpu_num, avg_of_records, + run_mode, unit) + fps_skipped, _ = self._get_fps(mode, batch_size, gpu_num, + avg_of_records_skipped, run_mode, unit) + if mode == -1: + print("average ips of %d steps, skip 0 step:" % count) + print("\tAvg: %.3f %s" % (avg_of_records, fps_unit)) + print("\tFPS: %.3f %s" % (fps, fps_unit)) + if skip_steps > 0: + print("average ips of %d steps, skip %d steps:" % + (count, skip_steps)) + print("\tAvg: %.3f %s" % (avg_of_records_skipped, fps_unit)) + print("\tMin: %.3f %s" % (skip_min, fps_unit)) + print("\tMax: %.3f %s" % (skip_max, fps_unit)) + print("\tFPS: %.3f %s" % (fps_skipped, fps_unit)) + elif mode == 1 or mode == 3: + print("average latency of %d steps, skip 0 step:" % count) + print("\tAvg: %.3f steps/s" % avg_of_records) + print("\tFPS: %.3f %s" % (fps, fps_unit)) + if skip_steps > 0: + print("average latency of %d steps, skip %d steps:" % + (count, skip_steps)) + print("\tAvg: %.3f steps/s" % avg_of_records_skipped) + print("\tMin: %.3f steps/s" % skip_min) + print("\tMax: %.3f steps/s" % skip_max) + print("\tFPS: %.3f %s" % (fps_skipped, fps_unit)) + elif mode == 0 or mode == 2: + print("average latency of %d steps, skip 0 step:" % count) + print("\tAvg: %.3f s/step" % avg_of_records) + print("\tFPS: %.3f %s" % (fps, fps_unit)) + if skip_steps > 0: + print("average latency of %d steps, skip %d steps:" % + (count, skip_steps)) + print("\tAvg: %.3f s/step" % avg_of_records_skipped) + print("\tMin: %.3f s/step" % skip_min) + print("\tMax: %.3f s/step" % skip_max) + print("\tFPS: %.3f %s" % (fps_skipped, fps_unit)) + + return round(fps_skipped, 3), fps_unit + + +if __name__ == "__main__": + args = parse_args() + run_info = dict() + run_info["log_file"] = args.filename + run_info["model_name"] = args.model_name + run_info["mission_name"] = args.mission_name + run_info["direction_id"] = args.direction_id + run_info["run_mode"] = args.run_mode + run_info["index"] = args.index + run_info["gpu_num"] = args.gpu_num + run_info["FINAL_RESULT"] = 0 + run_info["JOB_FAIL_FLAG"] = 0 + + try: + if args.index == 1: + if args.gpu_num == 1: + run_info["log_with_profiler"] = args.log_with_profiler + run_info["profiler_path"] = args.profiler_path + analyzer = TimeAnalyzer(args.filename, args.keyword, args.separator, + args.position, args.range) + run_info["FINAL_RESULT"], run_info["UNIT"] = analyzer.analysis( + batch_size=args.base_batch_size, + gpu_num=args.gpu_num, + skip_steps=args.skip_steps, + mode=args.model_mode, + run_mode=args.run_mode, + unit=args.ips_unit) + # if int(os.getenv('job_fail_flag')) == 1 or int(run_info["FINAL_RESULT"]) == 0: + # run_info["JOB_FAIL_FLAG"] = 1 + elif args.index == 3: + run_info["FINAL_RESULT"] = {} + records_fo_total = TimeAnalyzer(args.filename, 'Framework overhead', + None, 3, '').records + records_fo_ratio = TimeAnalyzer(args.filename, 'Framework overhead', + None, 5).records + records_ct_total = TimeAnalyzer(args.filename, 'Computation time', + None, 3, '').records + records_gm_total = TimeAnalyzer(args.filename, + 'GpuMemcpy Calls', + None, 4, '').records + records_gm_ratio = TimeAnalyzer(args.filename, + 'GpuMemcpy Calls', + None, 6).records + records_gmas_total = TimeAnalyzer(args.filename, + 'GpuMemcpyAsync Calls', + None, 4, '').records + records_gms_total = TimeAnalyzer(args.filename, + 'GpuMemcpySync Calls', + None, 4, '').records + run_info["FINAL_RESULT"]["Framework_Total"] = records_fo_total[ + 0] if records_fo_total else 0 + run_info["FINAL_RESULT"]["Framework_Ratio"] = records_fo_ratio[ + 0] if records_fo_ratio else 0 + run_info["FINAL_RESULT"][ + "ComputationTime_Total"] = records_ct_total[ + 0] if records_ct_total else 0 + run_info["FINAL_RESULT"]["GpuMemcpy_Total"] = records_gm_total[ + 0] if records_gm_total else 0 + run_info["FINAL_RESULT"]["GpuMemcpy_Ratio"] = records_gm_ratio[ + 0] if records_gm_ratio else 0 + run_info["FINAL_RESULT"][ + "GpuMemcpyAsync_Total"] = records_gmas_total[ + 0] if records_gmas_total else 0 + run_info["FINAL_RESULT"]["GpuMemcpySync_Total"] = records_gms_total[ + 0] if records_gms_total else 0 + else: + print("Not support!") + except Exception: + traceback.print_exc() + print("{}".format(json.dumps(run_info)) + ) # it's required, for the log file path insert to the database diff --git a/benchmark/prepare.sh b/benchmark/prepare.sh new file mode 100644 index 000000000..483caaab7 --- /dev/null +++ b/benchmark/prepare.sh @@ -0,0 +1,7 @@ +source ../tools/venv/bin/activate + +#进入执行目录 +pushd ../examples/aishell/s1 + +#准备数据 +bash run.sh --stage 0 --stop_stage 0 diff --git a/benchmark/run.sh b/benchmark/run.sh new file mode 100644 index 000000000..4a23b0edb --- /dev/null +++ b/benchmark/run.sh @@ -0,0 +1,34 @@ + +# 鎻愪緵鍙ǔ瀹氬鐜版ц兘鐨勮剼鏈紝榛樿鍦ㄦ爣鍑哾ocker鐜鍐卲y37鎵ц锛 paddlepaddle/paddle:latest-gpu-cuda10.1-cudnn7 paddle=2.1.2 py=37 +# 鎵ц鐩綍锛氶渶璇存槑 +CUR_DIR=${PWD} +source ../tools/venv/bin/activate +#cd ** +pushd ../examples/aishell/s1 +# 1 瀹夎璇ユā鍨嬮渶瑕佺殑渚濊禆 (濡傞渶寮鍚紭鍖栫瓥鐣ヨ娉ㄦ槑) +# 2 鎷疯礉璇ユā鍨嬮渶瑕佹暟鎹侀璁粌妯″瀷 + + +source path.sh +fp_item_list=(fp32) +bs_item=(16) +config_path=conf/conformer.yaml +seed=0 +output=exp/conformer +profiler_options=None +for fp_item in ${fp_item_list[@]}; do + for batch_size in ${bs_item[@]} + do + rm exp -rf + echo "index is speed, 8gpus, run_mode is multi_process, begin, conformer" + run_mode=mp + ngpu=8 + CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 bash ${CUR_DIR}/run_benchmark.sh ${run_mode} ${config_path} ${output} ${seed} ${ngpu} ${profiler_options} ${batch_size} ${fp_item} ${CUR_DIR} + rm exp -rf + echo "index is speed, 1gpus, begin, conformer" + run_mode=sp + ngpu=1 + CUDA_VISIBLE_DEVICES=7 bash ${CUR_DIR}/run_benchmark.sh ${run_mode} ${config_path} ${output} ${seed} ${ngpu} ${profiler_options} ${batch_size} ${fp_item} ${CUR_DIR} + done +done + diff --git a/benchmark/run_analysis_mp.sh b/benchmark/run_analysis_mp.sh new file mode 100644 index 000000000..24a3b383f --- /dev/null +++ b/benchmark/run_analysis_mp.sh @@ -0,0 +1,12 @@ +python analysis.py \ + --filename "recoder_mp_bs16_fp32_ngpu8.txt" \ + --keyword "ips[sent./sec]:" \ + --base_batch_size 16 \ + --model_name "Conformer" \ + --mission_name "eight gpu" \ + --run_mode "mp" \ + --ips_unit "sent./sec" \ + --gpu_num 8 \ + --use_num 480 \ + --separator " " \ + diff --git a/benchmark/run_analysis_sp.sh b/benchmark/run_analysis_sp.sh new file mode 100644 index 000000000..2d3c8e733 --- /dev/null +++ b/benchmark/run_analysis_sp.sh @@ -0,0 +1,12 @@ +python analysis.py \ + --filename "recoder_sp_bs16_fp32_ngpu1.txt" \ + --keyword "ips[sent./sec]:" \ + --base_batch_size 16 \ + --model_name "Conformer" \ + --mission_name "one gpu" \ + --run_mode "sp" \ + --ips_unit "sent./sec" \ + --gpu_num 1 \ + --use_num 60 \ + --separator " " \ + diff --git a/benchmark/run_benchmark.sh b/benchmark/run_benchmark.sh new file mode 100644 index 000000000..c745aaaf3 --- /dev/null +++ b/benchmark/run_benchmark.sh @@ -0,0 +1,65 @@ +#!/usr/bin/env bash +set -xe +# 杩愯绀轰緥锛欳UDA_VISIBLE_DEVICES=0 bash run_benchmark.sh ${run_mode} ${bs_item} ${fp_item} 500 ${model_mode} +# 鍙傛暟璇存槑 +function _set_params(){ + + run_mode=${1:-"sp"} # 鍗曞崱sp|澶氬崱mp + config_path=${2:-"conf/conformer.yaml"} + output=${3:-"exp/conformer"} + seed=${4:-"0"} + ngpu=${5:-"1"} + profiler_options=${6:-"None"} + batch_size=${7:-"32"} + fp_item=${8:-"fp32"} + TRAIN_LOG_DIR=${9:-$(pwd)} + + benchmark_max_step=0 + + run_log_path=${TRAIN_LOG_DIR:-$(pwd)} # TRAIN_LOG_DIR 鍚庣画QA璁剧疆璇ュ弬鏁 + +# 浠ヤ笅涓嶇敤淇敼 + device=${CUDA_VISIBLE_DEVICES//,/ } + arr=(${device}) + num_gpu_devices=${#arr[*]} + log_file=${run_log_path}/recoder_${run_mode}_bs${batch_size}_${fp_item}_ngpu${ngpu}.txt +} + +function _train(){ + echo "Train on ${num_gpu_devices} GPUs" + echo "current CUDA_VISIBLE_DEVICES=$CUDA_VISIBLE_DEVICES, gpus=$num_gpu_devices, batch_size=$batch_size" + train_cmd="--config=${config_path} + --output=${output} + --seed=${seed} + --nproc=${ngpu} + --profiler-options "${profiler_options}" + --benchmark-batch-size ${batch_size} + --benchmark-max-step ${benchmark_max_step} " + + echo "run_mode "${run_mode} + + case ${run_mode} in + sp) train_cmd="python3 -u ${BIN_DIR}/train.py "${train_cmd} ;; + mp) train_cmd="python3 -u ${BIN_DIR}/train.py "${train_cmd} ;; + *) echo "choose run_mode(sp or mp)"; exit 1; + esac + echo ${train_cmd} +# 浠ヤ笅涓嶇敤淇敼 + timeout 15m ${train_cmd} > ${log_file} 2>&1 + if [ $? -ne 0 ];then + echo -e "${model_name}, FAIL" + export job_fail_flag=1 + else + echo -e "${model_name}, SUCCESS" + export job_fail_flag=0 + fi + kill -9 `ps -ef|grep 'python'|awk '{print $2}'` + + if [ $run_mode = "mp" -a -d mylog ]; then + rm ${log_file} + cp mylog/workerlog.0 ${log_file} + fi +} + +_set_params $@ +_train From c1b4366834f4c46ddc0f44c2d0bf4e8e70252fe9 Mon Sep 17 00:00:00 2001 From: huangyuxin Date: Sat, 9 Oct 2021 02:52:41 +0000 Subject: [PATCH 2/3] update the installing script for openfst --- benchmark/README.md | 14 ---- benchmark/run_benchmark.sh | 65 ------------------- deepspeech/decoders/swig/setup.sh | 2 +- tests/benchmark/.gitignore | 2 - tests/benchmark/README.md | 21 ++++-- {benchmark => tests/benchmark}/analysis.py | 0 {benchmark => tests/benchmark}/prepare.sh | 4 +- {benchmark => tests/benchmark}/run.sh | 0 tests/benchmark/run_all.sh | 49 -------------- .../benchmark}/run_analysis_mp.sh | 0 .../benchmark}/run_analysis_sp.sh | 0 tests/benchmark/run_benchmark.sh | 46 +++++++------ 12 files changed, 46 insertions(+), 157 deletions(-) delete mode 100644 benchmark/README.md delete mode 100644 benchmark/run_benchmark.sh delete mode 100644 tests/benchmark/.gitignore rename {benchmark => tests/benchmark}/analysis.py (100%) rename {benchmark => tests/benchmark}/prepare.sh (71%) rename {benchmark => tests/benchmark}/run.sh (100%) delete mode 100755 tests/benchmark/run_all.sh rename {benchmark => tests/benchmark}/run_analysis_mp.sh (100%) rename {benchmark => tests/benchmark}/run_analysis_sp.sh (100%) mode change 100755 => 100644 tests/benchmark/run_benchmark.sh diff --git a/benchmark/README.md b/benchmark/README.md deleted file mode 100644 index 2d1760a34..000000000 --- a/benchmark/README.md +++ /dev/null @@ -1,14 +0,0 @@ -### Prepare the environment -Please follow the instructions shown in [here](https://github.com/PaddlePaddle/DeepSpeech/blob/develop/docs/src/install.md) to install the Deepspeech first. - -### Prepare the benchmark environment -bash prepare.sh - -### Start benchmarking -bash run.sh - -### Analyse the sp -bash run_analysis_sp.sh - -### Analyse the mp -bash run_analysis_mp.sh diff --git a/benchmark/run_benchmark.sh b/benchmark/run_benchmark.sh deleted file mode 100644 index c745aaaf3..000000000 --- a/benchmark/run_benchmark.sh +++ /dev/null @@ -1,65 +0,0 @@ -#!/usr/bin/env bash -set -xe -# 杩愯绀轰緥锛欳UDA_VISIBLE_DEVICES=0 bash run_benchmark.sh ${run_mode} ${bs_item} ${fp_item} 500 ${model_mode} -# 鍙傛暟璇存槑 -function _set_params(){ - - run_mode=${1:-"sp"} # 鍗曞崱sp|澶氬崱mp - config_path=${2:-"conf/conformer.yaml"} - output=${3:-"exp/conformer"} - seed=${4:-"0"} - ngpu=${5:-"1"} - profiler_options=${6:-"None"} - batch_size=${7:-"32"} - fp_item=${8:-"fp32"} - TRAIN_LOG_DIR=${9:-$(pwd)} - - benchmark_max_step=0 - - run_log_path=${TRAIN_LOG_DIR:-$(pwd)} # TRAIN_LOG_DIR 鍚庣画QA璁剧疆璇ュ弬鏁 - -# 浠ヤ笅涓嶇敤淇敼 - device=${CUDA_VISIBLE_DEVICES//,/ } - arr=(${device}) - num_gpu_devices=${#arr[*]} - log_file=${run_log_path}/recoder_${run_mode}_bs${batch_size}_${fp_item}_ngpu${ngpu}.txt -} - -function _train(){ - echo "Train on ${num_gpu_devices} GPUs" - echo "current CUDA_VISIBLE_DEVICES=$CUDA_VISIBLE_DEVICES, gpus=$num_gpu_devices, batch_size=$batch_size" - train_cmd="--config=${config_path} - --output=${output} - --seed=${seed} - --nproc=${ngpu} - --profiler-options "${profiler_options}" - --benchmark-batch-size ${batch_size} - --benchmark-max-step ${benchmark_max_step} " - - echo "run_mode "${run_mode} - - case ${run_mode} in - sp) train_cmd="python3 -u ${BIN_DIR}/train.py "${train_cmd} ;; - mp) train_cmd="python3 -u ${BIN_DIR}/train.py "${train_cmd} ;; - *) echo "choose run_mode(sp or mp)"; exit 1; - esac - echo ${train_cmd} -# 浠ヤ笅涓嶇敤淇敼 - timeout 15m ${train_cmd} > ${log_file} 2>&1 - if [ $? -ne 0 ];then - echo -e "${model_name}, FAIL" - export job_fail_flag=1 - else - echo -e "${model_name}, SUCCESS" - export job_fail_flag=0 - fi - kill -9 `ps -ef|grep 'python'|awk '{print $2}'` - - if [ $run_mode = "mp" -a -d mylog ]; then - rm ${log_file} - cp mylog/workerlog.0 ${log_file} - fi -} - -_set_params $@ -_train diff --git a/deepspeech/decoders/swig/setup.sh b/deepspeech/decoders/swig/setup.sh index 73fa7aea7..302c55502 100755 --- a/deepspeech/decoders/swig/setup.sh +++ b/deepspeech/decoders/swig/setup.sh @@ -10,7 +10,7 @@ fi if [ ! -d openfst-1.6.3 ]; then echo "Download and extract openfst ..." - wget http://www.openfst.org/twiki/pub/FST/FstDownload/openfst-1.6.3.tar.gz + wget http://www.openfst.org/twiki/pub/FST/FstDownload/openfst-1.6.3.tar.gz --no-check-certificate tar -xzvf openfst-1.6.3.tar.gz echo -e "\n" fi diff --git a/tests/benchmark/.gitignore b/tests/benchmark/.gitignore deleted file mode 100644 index 7d166b066..000000000 --- a/tests/benchmark/.gitignore +++ /dev/null @@ -1,2 +0,0 @@ -old-pd_env.txt -pd_env.txt diff --git a/tests/benchmark/README.md b/tests/benchmark/README.md index d21999ab3..fdfc6328f 100644 --- a/tests/benchmark/README.md +++ b/tests/benchmark/README.md @@ -1,11 +1,22 @@ -# Benchmark Test +### Prepare the environment +Please follow the instructions shown in [here](https://github.com/PaddlePaddle/DeepSpeech/blob/develop/docs/src/install.md) to install the Deepspeech first. -## Data +### Prepare the benchmark environment +``` +bash prepare.sh +``` -* Aishell +### Start benchmarking +``` +bash run.sh +``` -## Docker +### Analyse the sp +``` +bash run_analysis_sp.sh +``` +### Analyse the mp ``` -registry.baidubce.com/paddlepaddle/paddle 2.1.1-gpu-cuda10.2-cudnn7 59d5ec1de486 +bash run_analysis_mp.sh ``` diff --git a/benchmark/analysis.py b/tests/benchmark/analysis.py similarity index 100% rename from benchmark/analysis.py rename to tests/benchmark/analysis.py diff --git a/benchmark/prepare.sh b/tests/benchmark/prepare.sh similarity index 71% rename from benchmark/prepare.sh rename to tests/benchmark/prepare.sh index 483caaab7..c934f487b 100644 --- a/benchmark/prepare.sh +++ b/tests/benchmark/prepare.sh @@ -1,7 +1,7 @@ source ../tools/venv/bin/activate -#进入执行目录 +#Enter the example dir pushd ../examples/aishell/s1 -#准备数据 +#Prepare the data bash run.sh --stage 0 --stop_stage 0 diff --git a/benchmark/run.sh b/tests/benchmark/run.sh similarity index 100% rename from benchmark/run.sh rename to tests/benchmark/run.sh diff --git a/tests/benchmark/run_all.sh b/tests/benchmark/run_all.sh deleted file mode 100755 index 6f707cdcb..000000000 --- a/tests/benchmark/run_all.sh +++ /dev/null @@ -1,49 +0,0 @@ -#!/bin/bash - -CUR_DIR=${PWD} -ROOT_DIR=../../ - -# 鎻愪緵鍙ǔ瀹氬鐜版ц兘鐨勮剼鏈紝榛樿鍦ㄦ爣鍑哾ocker鐜鍐卲y37鎵ц锛 -# collect env info -bash ${ROOT_DIR}/utils/pd_env_collect.sh -#cat pd_env.txt - - -# 1 瀹夎璇ユā鍨嬮渶瑕佺殑渚濊禆 (濡傞渶寮鍚紭鍖栫瓥鐣ヨ娉ㄦ槑) -#pushd ${ROOT_DIR}/tools; make; popd -#source ${ROOT_DIR}/tools/venv/bin/activate -#pushd ${ROOT_DIR}; bash setup.sh; popd - - -# 2 鎷疯礉璇ユā鍨嬮渶瑕佹暟鎹侀璁粌妯″瀷 - -# 鎵ц鐩綍锛氶渶璇存槑 -#pushd ${ROOT_DIR}/examples/aishell/s1 -pushd ${ROOT_DIR}/examples/tiny/s1 - -mkdir -p exp/log -. path.sh -#bash local/data.sh &> exp/log/data.log - -# 3 鎵归噺杩愯锛堝涓嶆柟渚挎壒閲忥紝1锛2闇鏀惧埌鍗曚釜妯″瀷涓級 - -model_mode_list=(conformer transformer) -fp_item_list=(fp32) -bs_item_list=(32 64 96) -for model_mode in ${model_mode_list[@]}; do - for fp_item in ${fp_item_list[@]}; do - for bs_item in ${bs_item_list[@]} - do - echo "index is speed, 1gpus, begin, ${model_name}" - run_mode=sp - CUDA_VISIBLE_DEVICES=0 bash ${CUR_DIR}/run_benchmark.sh ${run_mode} ${bs_item} ${fp_item} 500 ${model_mode} # (5min) - sleep 60 - echo "index is speed, 8gpus, run_mode is multi_process, begin, ${model_name}" - run_mode=mp - CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 bash ${CUR_DIR}/run_benchmark.sh ${run_mode} ${bs_item} ${fp_item} 500 ${model_mode} - sleep 60 - done - done -done - -popd # aishell/s1 diff --git a/benchmark/run_analysis_mp.sh b/tests/benchmark/run_analysis_mp.sh similarity index 100% rename from benchmark/run_analysis_mp.sh rename to tests/benchmark/run_analysis_mp.sh diff --git a/benchmark/run_analysis_sp.sh b/tests/benchmark/run_analysis_sp.sh similarity index 100% rename from benchmark/run_analysis_sp.sh rename to tests/benchmark/run_analysis_sp.sh diff --git a/tests/benchmark/run_benchmark.sh b/tests/benchmark/run_benchmark.sh old mode 100755 new mode 100644 index bd4655d19..c745aaaf3 --- a/tests/benchmark/run_benchmark.sh +++ b/tests/benchmark/run_benchmark.sh @@ -1,40 +1,50 @@ -#!/bin/bash - +#!/usr/bin/env bash set -xe - # 杩愯绀轰緥锛欳UDA_VISIBLE_DEVICES=0 bash run_benchmark.sh ${run_mode} ${bs_item} ${fp_item} 500 ${model_mode} # 鍙傛暟璇存槑 function _set_params(){ + run_mode=${1:-"sp"} # 鍗曞崱sp|澶氬崱mp - batch_size=${2:-"64"} - fp_item=${3:-"fp32"} # fp32|fp16 - max_iter=${4:-"500"} # 鍙夛紝濡傛灉闇瑕佷慨鏀逛唬鐮佹彁鍓嶄腑鏂 - model_name=${5:-"model_name"} + config_path=${2:-"conf/conformer.yaml"} + output=${3:-"exp/conformer"} + seed=${4:-"0"} + ngpu=${5:-"1"} + profiler_options=${6:-"None"} + batch_size=${7:-"32"} + fp_item=${8:-"fp32"} + TRAIN_LOG_DIR=${9:-$(pwd)} + + benchmark_max_step=0 + run_log_path=${TRAIN_LOG_DIR:-$(pwd)} # TRAIN_LOG_DIR 鍚庣画QA璁剧疆璇ュ弬鏁 # 浠ヤ笅涓嶇敤淇敼 device=${CUDA_VISIBLE_DEVICES//,/ } arr=(${device}) num_gpu_devices=${#arr[*]} - log_file=${run_log_path}/${model_name}_${run_mode}_bs${batch_size}_${fp_item}_${num_gpu_devices} + log_file=${run_log_path}/recoder_${run_mode}_bs${batch_size}_${fp_item}_ngpu${ngpu}.txt } function _train(){ echo "Train on ${num_gpu_devices} GPUs" echo "current CUDA_VISIBLE_DEVICES=$CUDA_VISIBLE_DEVICES, gpus=$num_gpu_devices, batch_size=$batch_size" + train_cmd="--config=${config_path} + --output=${output} + --seed=${seed} + --nproc=${ngpu} + --profiler-options "${profiler_options}" + --benchmark-batch-size ${batch_size} + --benchmark-max-step ${benchmark_max_step} " - train_cmd="--benchmark-batch-size ${batch_size} - --benchmark-max-step ${max_iter} - conf/${model_name}.yaml ${model_name}" + echo "run_mode "${run_mode} case ${run_mode} in - sp) train_cmd="bash local/train.sh "${train_cmd}"" ;; - mp) - train_cmd="bash local/train.sh "${train_cmd}"" ;; + sp) train_cmd="python3 -u ${BIN_DIR}/train.py "${train_cmd} ;; + mp) train_cmd="python3 -u ${BIN_DIR}/train.py "${train_cmd} ;; *) echo "choose run_mode(sp or mp)"; exit 1; esac - - # 浠ヤ笅涓嶇敤淇敼 + echo ${train_cmd} +# 浠ヤ笅涓嶇敤淇敼 timeout 15m ${train_cmd} > ${log_file} 2>&1 if [ $? -ne 0 ];then echo -e "${model_name}, FAIL" @@ -43,8 +53,7 @@ function _train(){ echo -e "${model_name}, SUCCESS" export job_fail_flag=0 fi - - trap 'for pid in $(jobs -pr); do kill -KILL $pid; done' INT QUIT TERM + kill -9 `ps -ef|grep 'python'|awk '{print $2}'` if [ $run_mode = "mp" -a -d mylog ]; then rm ${log_file} @@ -54,4 +63,3 @@ function _train(){ _set_params $@ _train - From 984cd2dcd3f963cb022a72d2203260c4ab66f5e8 Mon Sep 17 00:00:00 2001 From: huangyuxin Date: Sat, 9 Oct 2021 09:38:03 +0000 Subject: [PATCH 3/3] revise the run_benchmark.sh --- tests/benchmark/run_benchmark.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/benchmark/run_benchmark.sh b/tests/benchmark/run_benchmark.sh index c745aaaf3..c03a08f3b 100644 --- a/tests/benchmark/run_benchmark.sh +++ b/tests/benchmark/run_benchmark.sh @@ -53,7 +53,7 @@ function _train(){ echo -e "${model_name}, SUCCESS" export job_fail_flag=0 fi - kill -9 `ps -ef|grep 'python'|awk '{print $2}'` + trap 'for pid in $(jobs -pr); do kill -KILL $pid; done' INT QUIT TERM if [ $run_mode = "mp" -a -d mylog ]; then rm ${log_file}