parent
c66166e0fd
commit
aefe9e93a7
@ -0,0 +1,258 @@
|
||||
#!/bin/bash
|
||||
source test_tipc/common_func.sh
|
||||
|
||||
# set env
|
||||
python=python
|
||||
export model_branch=`git symbolic-ref HEAD 2>/dev/null | cut -d"/" -f 3`
|
||||
export model_commit=$(git log|head -n1|awk '{print $2}')
|
||||
export str_tmp=$(echo `pip list|grep paddlepaddle-gpu|awk -F ' ' '{print $2}'`)
|
||||
export frame_version=${str_tmp%%.post*}
|
||||
export frame_commit=$(echo `${python} -c "import paddle;print(paddle.version.commit)"`)
|
||||
|
||||
# run benchmark sh
|
||||
# Usage:
|
||||
# bash run_benchmark_train.sh config.txt params
|
||||
# or
|
||||
# bash run_benchmark_train.sh config.txt
|
||||
|
||||
function func_parser_params(){
|
||||
strs=$1
|
||||
IFS="="
|
||||
array=(${strs})
|
||||
tmp=${array[1]}
|
||||
echo ${tmp}
|
||||
}
|
||||
|
||||
function func_sed_params(){
|
||||
filename=$1
|
||||
line=$2
|
||||
param_value=$3
|
||||
params=`sed -n "${line}p" $filename`
|
||||
IFS=":"
|
||||
array=(${params})
|
||||
key=${array[0]}
|
||||
value=${array[1]}
|
||||
if [[ $value =~ 'benchmark_train' ]];then
|
||||
IFS='='
|
||||
_val=(${value})
|
||||
param_value="${_val[0]}=${param_value}"
|
||||
fi
|
||||
new_params="${key}:${param_value}"
|
||||
IFS=";"
|
||||
cmd="sed -i '${line}s/.*/${new_params}/' '${filename}'"
|
||||
eval $cmd
|
||||
}
|
||||
|
||||
function set_gpu_id(){
|
||||
string=$1
|
||||
_str=${string:1:6}
|
||||
IFS="C"
|
||||
arr=(${_str})
|
||||
M=${arr[0]}
|
||||
P=${arr[1]}
|
||||
gn=`expr $P - 1`
|
||||
gpu_num=`expr $gn / $M`
|
||||
seq=`seq -s "," 0 $gpu_num`
|
||||
echo $seq
|
||||
}
|
||||
|
||||
function get_repo_name(){
|
||||
IFS=";"
|
||||
cur_dir=$(pwd)
|
||||
IFS="/"
|
||||
arr=(${cur_dir})
|
||||
echo ${arr[-1]}
|
||||
}
|
||||
|
||||
FILENAME=$1
|
||||
# copy FILENAME as new
|
||||
new_filename="./test_tipc/benchmark_train.txt"
|
||||
cmd=`yes|cp $FILENAME $new_filename`
|
||||
FILENAME=$new_filename
|
||||
# MODE must be one of ['benchmark_train']
|
||||
MODE=$2
|
||||
PARAMS=$3
|
||||
# bash test_tipc/benchmark_train.sh test_tipc/configs/det_mv3_db_v2_0/train_benchmark.txt benchmark_train dynamic_bs8_null_DP_N1C1
|
||||
IFS=$'\n'
|
||||
# parser params from train_benchmark.txt
|
||||
dataline=`cat $FILENAME`
|
||||
# parser params
|
||||
IFS=$'\n'
|
||||
lines=(${dataline})
|
||||
model_name=$(func_parser_value "${lines[1]}")
|
||||
|
||||
# 获取benchmark_params所在的行数
|
||||
line_num=`grep -n "train_benchmark_params" $FILENAME | cut -d ":" -f 1`
|
||||
# for train log parser
|
||||
batch_size=$(func_parser_value "${lines[line_num]}")
|
||||
line_num=`expr $line_num + 1`
|
||||
fp_items=$(func_parser_value "${lines[line_num]}")
|
||||
line_num=`expr $line_num + 1`
|
||||
epoch=$(func_parser_value "${lines[line_num]}")
|
||||
|
||||
line_num=`expr $line_num + 1`
|
||||
profile_option_key=$(func_parser_key "${lines[line_num]}")
|
||||
profile_option_params=$(func_parser_value "${lines[line_num]}")
|
||||
profile_option="${profile_option_key}:${profile_option_params}"
|
||||
|
||||
line_num=`expr $line_num + 1`
|
||||
flags_value=$(func_parser_value "${lines[line_num]}")
|
||||
# set flags
|
||||
IFS=";"
|
||||
flags_list=(${flags_value})
|
||||
for _flag in ${flags_list[*]}; do
|
||||
cmd="export ${_flag}"
|
||||
eval $cmd
|
||||
done
|
||||
|
||||
# set log_name
|
||||
repo_name=$(get_repo_name )
|
||||
SAVE_LOG=${BENCHMARK_LOG_DIR:-$(pwd)} # */benchmark_log
|
||||
mkdir -p "${SAVE_LOG}/benchmark_log/"
|
||||
status_log="${SAVE_LOG}/benchmark_log/results.log"
|
||||
|
||||
# The number of lines in which train params can be replaced.
|
||||
line_python=3
|
||||
line_gpuid=4
|
||||
line_precision=6
|
||||
line_epoch=7
|
||||
line_batchsize=9
|
||||
line_profile=13
|
||||
line_eval_py=24
|
||||
line_export_py=30
|
||||
|
||||
func_sed_params "$FILENAME" "${line_eval_py}" "null"
|
||||
func_sed_params "$FILENAME" "${line_export_py}" "null"
|
||||
func_sed_params "$FILENAME" "${line_python}" "$python"
|
||||
|
||||
# if params
|
||||
if [ ! -n "$PARAMS" ] ;then
|
||||
# PARAMS input is not a word.
|
||||
IFS="|"
|
||||
batch_size_list=(${batch_size})
|
||||
fp_items_list=(${fp_items})
|
||||
device_num_list=(N1C4)
|
||||
run_mode="DP"
|
||||
else
|
||||
# parser params from input: modeltype_bs${bs_item}_${fp_item}_${run_mode}_${device_num}
|
||||
IFS="_"
|
||||
params_list=(${PARAMS})
|
||||
model_type=${params_list[0]}
|
||||
batch_size=${params_list[1]}
|
||||
batch_size=`echo ${batch_size} | tr -cd "[0-9]" `
|
||||
precision=${params_list[2]}
|
||||
# run_process_type=${params_list[3]}
|
||||
run_mode=${params_list[3]}
|
||||
device_num=${params_list[4]}
|
||||
IFS=";"
|
||||
|
||||
if [ ${precision} = "null" ];then
|
||||
precision="fp32"
|
||||
fi
|
||||
|
||||
fp_items_list=($precision)
|
||||
batch_size_list=($batch_size)
|
||||
device_num_list=($device_num)
|
||||
fi
|
||||
|
||||
IFS="|"
|
||||
for batch_size in ${batch_size_list[*]}; do
|
||||
for precision in ${fp_items_list[*]}; do
|
||||
for device_num in ${device_num_list[*]}; do
|
||||
# sed batchsize and precision
|
||||
func_sed_params "$FILENAME" "${line_precision}" "$precision"
|
||||
func_sed_params "$FILENAME" "${line_batchsize}" "$MODE=$batch_size"
|
||||
func_sed_params "$FILENAME" "${line_epoch}" "$MODE=$epoch"
|
||||
gpu_id=$(set_gpu_id $device_num)
|
||||
|
||||
if [ ${#gpu_id} -le 1 ];then
|
||||
run_process_type="SingleP"
|
||||
log_path="$SAVE_LOG/profiling_log"
|
||||
mkdir -p $log_path
|
||||
log_name="${repo_name}_${model_name}_bs${batch_size}_${precision}_${run_process_type}_${run_mode}_${device_num}_profiling"
|
||||
func_sed_params "$FILENAME" "${line_gpuid}" "0" # sed used gpu_id
|
||||
# set profile_option params
|
||||
tmp=`sed -i "${line_profile}s/.*/${profile_option}/" "${FILENAME}"`
|
||||
|
||||
# run test_train_inference_python.sh
|
||||
cmd="bash test_tipc/test_train_inference_python.sh ${FILENAME} benchmark_train > ${log_path}/${log_name} 2>&1 "
|
||||
echo $cmd
|
||||
eval $cmd
|
||||
eval "cat ${log_path}/${log_name}"
|
||||
|
||||
# without profile
|
||||
log_path="$SAVE_LOG/train_log"
|
||||
speed_log_path="$SAVE_LOG/index"
|
||||
mkdir -p $log_path
|
||||
mkdir -p $speed_log_path
|
||||
log_name="${repo_name}_${model_name}_bs${batch_size}_${precision}_${run_process_type}_${run_mode}_${device_num}_log"
|
||||
speed_log_name="${repo_name}_${model_name}_bs${batch_size}_${precision}_${run_process_type}_${run_mode}_${device_num}_speed"
|
||||
func_sed_params "$FILENAME" "${line_profile}" "null" # sed profile_id as null
|
||||
cmd="bash test_tipc/test_train_inference_python.sh ${FILENAME} benchmark_train > ${log_path}/${log_name} 2>&1 "
|
||||
echo $cmd
|
||||
job_bt=`date '+%Y%m%d%H%M%S'`
|
||||
eval $cmd
|
||||
job_et=`date '+%Y%m%d%H%M%S'`
|
||||
export model_run_time=$((${job_et}-${job_bt}))
|
||||
eval "cat ${log_path}/${log_name}"
|
||||
|
||||
# parser log
|
||||
_model_name="${model_name}_bs${batch_size}_${precision}_${run_process_type}_${run_mode}"
|
||||
cmd="${python} ${BENCHMARK_ROOT}/scripts/analysis.py --filename ${log_path}/${log_name} \
|
||||
--speed_log_file '${speed_log_path}/${speed_log_name}' \
|
||||
--model_name ${_model_name} \
|
||||
--base_batch_size ${batch_size} \
|
||||
--run_mode ${run_mode} \
|
||||
--run_process_type ${run_process_type} \
|
||||
--fp_item ${precision} \
|
||||
--keyword ips: \
|
||||
--skip_steps 2 \
|
||||
--device_num ${device_num} \
|
||||
--speed_unit samples/s \
|
||||
--convergence_key loss: "
|
||||
echo $cmd
|
||||
eval $cmd
|
||||
last_status=${PIPESTATUS[0]}
|
||||
status_check $last_status "${cmd}" "${status_log}"
|
||||
else
|
||||
IFS=";"
|
||||
unset_env=`unset CUDA_VISIBLE_DEVICES`
|
||||
run_process_type="MultiP"
|
||||
log_path="$SAVE_LOG/train_log"
|
||||
speed_log_path="$SAVE_LOG/index"
|
||||
mkdir -p $log_path
|
||||
mkdir -p $speed_log_path
|
||||
log_name="${repo_name}_${model_name}_bs${batch_size}_${precision}_${run_process_type}_${run_mode}_${device_num}_log"
|
||||
speed_log_name="${repo_name}_${model_name}_bs${batch_size}_${precision}_${run_process_type}_${run_mode}_${device_num}_speed"
|
||||
func_sed_params "$FILENAME" "${line_gpuid}" "$gpu_id" # sed used gpu_id
|
||||
func_sed_params "$FILENAME" "${line_profile}" "null" # sed --profile_option as null
|
||||
cmd="bash test_tipc/test_train_inference_python.sh ${FILENAME} benchmark_train > ${log_path}/${log_name} 2>&1 "
|
||||
echo $cmd
|
||||
job_bt=`date '+%Y%m%d%H%M%S'`
|
||||
eval $cmd
|
||||
job_et=`date '+%Y%m%d%H%M%S'`
|
||||
export model_run_time=$((${job_et}-${job_bt}))
|
||||
eval "cat ${log_path}/${log_name}"
|
||||
# parser log
|
||||
_model_name="${model_name}_bs${batch_size}_${precision}_${run_process_type}_${run_mode}"
|
||||
|
||||
cmd="${python} ${BENCHMARK_ROOT}/scripts/analysis.py --filename ${log_path}/${log_name} \
|
||||
--speed_log_file '${speed_log_path}/${speed_log_name}' \
|
||||
--model_name ${_model_name} \
|
||||
--base_batch_size ${batch_size} \
|
||||
--run_mode ${run_mode} \
|
||||
--run_process_type ${run_process_type} \
|
||||
--fp_item ${precision} \
|
||||
--keyword ips: \
|
||||
--skip_steps 2 \
|
||||
--device_num ${device_num} \
|
||||
--speed_unit images/s \
|
||||
--convergence_key loss: "
|
||||
echo $cmd
|
||||
eval $cmd
|
||||
last_status=${PIPESTATUS[0]}
|
||||
status_check $last_status "${cmd}" "${status_log}"
|
||||
fi
|
||||
done
|
||||
done
|
||||
done
|
@ -0,0 +1,57 @@
|
||||
===========================train_params===========================
|
||||
model_name:conformer
|
||||
python:python3.7
|
||||
gpu_list:0|0,1
|
||||
null:null
|
||||
null:null
|
||||
--benchmark-max-step:50
|
||||
null:null
|
||||
--benchmark-batch-size:16
|
||||
null:null
|
||||
null:null
|
||||
null:null
|
||||
null:null
|
||||
##
|
||||
trainer:norm_train
|
||||
norm_train: ../paddlespeech/s2t/exps/u2/bin/train.py --config test_tipc/conformer/benchmark_train/conf/conformer.yaml --output test_tipc/conformer/benchmark_train/outputs --seed 1024
|
||||
pact_train:null
|
||||
fpgm_train:null
|
||||
distill_train:null
|
||||
null:null
|
||||
null:null
|
||||
##
|
||||
===========================eval_params===========================
|
||||
eval:null
|
||||
null:null
|
||||
##
|
||||
===========================infer_params===========================
|
||||
null:null
|
||||
null:null
|
||||
norm_export: null
|
||||
quant_export:null
|
||||
fpgm_export:null
|
||||
distill_export:null
|
||||
export1:null
|
||||
export2:null
|
||||
null:null
|
||||
infer_model:null
|
||||
infer_export:null
|
||||
infer_quant:null
|
||||
inference:null
|
||||
null:null
|
||||
null:null
|
||||
null:null
|
||||
null:null
|
||||
null:null
|
||||
null:null
|
||||
null:null
|
||||
null:null
|
||||
null:null
|
||||
null:null
|
||||
null:null
|
||||
===========================train_benchmark_params==========================
|
||||
batch_size:16|30
|
||||
fp_items:fp32
|
||||
iteration:50
|
||||
--profiler-options:"batch_range=[10,35];state=GPU;tracer_option=Default;profile_path=model.profile"
|
||||
flags:FLAGS_eager_delete_tensor_gb=0.0;FLAGS_fraction_of_gpu_memory_to_use=0.98;FLAGS_conv_workspace_size_limit=4096"
|
@ -0,0 +1,159 @@
|
||||
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
"""Prepare Aishell mandarin dataset
|
||||
|
||||
Download, unpack and create manifest files.
|
||||
Manifest file is a json-format file with each line containing the
|
||||
meta data (i.e. audio filepath, transcript and audio duration)
|
||||
of each audio file in the data set.
|
||||
"""
|
||||
import argparse
|
||||
import codecs
|
||||
import json
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
import soundfile
|
||||
|
||||
from utils.utility import download
|
||||
from utils.utility import unpack
|
||||
|
||||
DATA_HOME = os.path.expanduser('~/.cache/paddle/dataset/speech')
|
||||
|
||||
URL_ROOT_TAG
|
||||
DATA_URL = URL_ROOT + '/data_aishell_tiny.tgz'
|
||||
MD5_DATA = '337b1b1ea016761d4fd3225c5b8799b4'
|
||||
RESOURCE_URL = URL_ROOT + '/resource_aishell.tgz'
|
||||
MD5_RESOURCE = '957d480a0fcac85fc18e550756f624e5'
|
||||
|
||||
parser = argparse.ArgumentParser(description=__doc__)
|
||||
parser.add_argument(
|
||||
"--target_dir",
|
||||
default=DATA_HOME + "/Aishell",
|
||||
type=str,
|
||||
help="Directory to save the dataset. (default: %(default)s)")
|
||||
parser.add_argument(
|
||||
"--manifest_prefix",
|
||||
default="manifest",
|
||||
type=str,
|
||||
help="Filepath prefix for output manifests. (default: %(default)s)")
|
||||
args = parser.parse_args()
|
||||
|
||||
|
||||
def create_manifest(data_dir, manifest_path_prefix):
|
||||
print("Creating manifest %s ..." % manifest_path_prefix)
|
||||
json_lines = []
|
||||
transcript_path = os.path.join(data_dir, 'transcript',
|
||||
'aishell_transcript_v0.8.txt')
|
||||
transcript_dict = {}
|
||||
for line in codecs.open(transcript_path, 'r', 'utf-8'):
|
||||
line = line.strip()
|
||||
if line == '':
|
||||
continue
|
||||
audio_id, text = line.split(' ', 1)
|
||||
# remove withespace, charactor text
|
||||
text = ''.join(text.split())
|
||||
transcript_dict[audio_id] = text
|
||||
|
||||
data_types = ['train', 'dev', 'test']
|
||||
for dtype in data_types:
|
||||
del json_lines[:]
|
||||
total_sec = 0.0
|
||||
total_text = 0.0
|
||||
total_num = 0
|
||||
|
||||
audio_dir = os.path.join(data_dir, 'wav', dtype)
|
||||
for subfolder, _, filelist in sorted(os.walk(audio_dir)):
|
||||
for fname in filelist:
|
||||
audio_path = os.path.abspath(os.path.join(subfolder, fname))
|
||||
audio_id = os.path.basename(fname)[:-4]
|
||||
# if no transcription for audio then skipped
|
||||
if audio_id not in transcript_dict:
|
||||
continue
|
||||
|
||||
utt2spk = Path(audio_path).parent.name
|
||||
audio_data, samplerate = soundfile.read(audio_path)
|
||||
duration = float(len(audio_data) / samplerate)
|
||||
text = transcript_dict[audio_id]
|
||||
json_lines.append(
|
||||
json.dumps(
|
||||
{
|
||||
'utt': audio_id,
|
||||
'utt2spk': str(utt2spk),
|
||||
'feat': audio_path,
|
||||
'feat_shape': (duration, ), # second
|
||||
'text': text
|
||||
},
|
||||
ensure_ascii=False))
|
||||
|
||||
total_sec += duration
|
||||
total_text += len(text)
|
||||
total_num += 1
|
||||
|
||||
manifest_path = manifest_path_prefix + '.' + dtype
|
||||
with codecs.open(manifest_path, 'w', 'utf-8') as fout:
|
||||
for line in json_lines:
|
||||
fout.write(line + '\n')
|
||||
|
||||
manifest_dir = os.path.dirname(manifest_path_prefix)
|
||||
meta_path = os.path.join(manifest_dir, dtype) + '.meta'
|
||||
with open(meta_path, 'w') as f:
|
||||
print(f"{dtype}:", file=f)
|
||||
print(f"{total_num} utts", file=f)
|
||||
print(f"{total_sec / (60*60)} h", file=f)
|
||||
print(f"{total_text} text", file=f)
|
||||
print(f"{total_text / total_sec} text/sec", file=f)
|
||||
print(f"{total_sec / total_num} sec/utt", file=f)
|
||||
|
||||
|
||||
def prepare_dataset(url, md5sum, target_dir, manifest_path=None):
|
||||
"""Download, unpack and create manifest file."""
|
||||
data_dir = os.path.join(target_dir, 'data_aishell_tiny')
|
||||
if not os.path.exists(data_dir):
|
||||
filepath = download(url, md5sum, target_dir)
|
||||
unpack(filepath, target_dir)
|
||||
# unpack all audio tar files
|
||||
audio_dir = os.path.join(data_dir, 'wav')
|
||||
for subfolder, _, filelist in sorted(os.walk(audio_dir)):
|
||||
for ftar in filelist:
|
||||
unpack(os.path.join(subfolder, ftar), subfolder, True)
|
||||
else:
|
||||
print("Skip downloading and unpacking. Data already exists in %s." %
|
||||
target_dir)
|
||||
|
||||
if manifest_path:
|
||||
create_manifest(data_dir, manifest_path)
|
||||
|
||||
|
||||
def main():
|
||||
if args.target_dir.startswith('~'):
|
||||
args.target_dir = os.path.expanduser(args.target_dir)
|
||||
|
||||
prepare_dataset(
|
||||
url=DATA_URL,
|
||||
md5sum=MD5_DATA,
|
||||
target_dir=args.target_dir,
|
||||
manifest_path=args.manifest_prefix)
|
||||
|
||||
prepare_dataset(
|
||||
url=RESOURCE_URL,
|
||||
md5sum=MD5_RESOURCE,
|
||||
target_dir=args.target_dir,
|
||||
manifest_path=None)
|
||||
|
||||
print("Data download and manifest prepare done!")
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
@ -0,0 +1,61 @@
|
||||
#!/bin/bash
|
||||
source test_tipc/common_func.sh
|
||||
|
||||
FILENAME=$1
|
||||
|
||||
# MODE be one of ['benchmark_train_lite_infer' 'benchmark_train_whole_infer' 'whole_train_whole_infer',
|
||||
# 'whole_infer', 'klquant_whole_infer',
|
||||
# 'cpp_infer', 'serving_infer', 'benchmark_train']
|
||||
|
||||
|
||||
MODE=$2
|
||||
|
||||
dataline=$(cat ${FILENAME})
|
||||
|
||||
# parser params
|
||||
IFS=$'\n'
|
||||
lines=(${dataline})
|
||||
|
||||
# The training params
|
||||
model_name=$(func_parser_value "${lines[1]}")
|
||||
|
||||
echo "model_name:"${model_name}
|
||||
trainer_list=$(func_parser_value "${lines[14]}")
|
||||
|
||||
if [ ${MODE} = "benchmark_train" ];then
|
||||
curPath=$(readlink -f "$(dirname "$0")")
|
||||
echo "curPath:"${curPath}
|
||||
cd ${curPath}/../..
|
||||
pip install .
|
||||
cd -
|
||||
if [ ${model_name} == "conformer" ]; then
|
||||
# set the URL for aishell_tiny dataset
|
||||
URL='None'
|
||||
echo "URL:"${URL}
|
||||
if [ ${URL} == 'None' ];then
|
||||
echo "please contact author to get the URL.\n"
|
||||
exit
|
||||
fi
|
||||
sed -i "s#^URL_ROOT_TAG#URL_ROOT = '${URL}'#g" ${curPath}/conformer/scripts/aishell_tiny.py
|
||||
cp ${curPath}/conformer/scripts/aishell_tiny.py ${curPath}/../../dataset/aishell/
|
||||
cd ${curPath}/../../examples/aishell/asr1
|
||||
source path.sh
|
||||
# download audio data
|
||||
sed -i "s#aishell.py#aishell_tiny.py#g" ./local/data.sh
|
||||
bash ./local/data.sh || exit -1
|
||||
if [ $? -ne 0 ]; then
|
||||
exit 1
|
||||
fi
|
||||
mkdir -p ${curPath}/conformer/benchmark_train/
|
||||
cp -rf conf ${curPath}/conformer/benchmark_train/
|
||||
cp -rf data ${curPath}/conformer/benchmark_train/
|
||||
cd ${curPath}
|
||||
|
||||
sed -i "s#accum_grad: 2#accum_grad: 1#g" ${curPath}/conformer/benchmark_train/conf/conformer.yaml
|
||||
sed -i "s#data/#test_tipc/conformer/benchmark_train/data/#g" ${curPath}/conformer/benchmark_train/conf/conformer.yaml
|
||||
sed -i "s#conf/#test_tipc/conformer/benchmark_train/conf/#g" ${curPath}/conformer/benchmark_train/conf/conformer.yaml
|
||||
sed -i "s#data/#test_tipc/conformer/benchmark_train/data/#g" ${curPath}/conformer/benchmark_train/conf/tuning/decode.yaml
|
||||
sed -i "s#data/#test_tipc/conformer/benchmark_train/data/#g" ${curPath}/conformer/benchmark_train/conf/preprocess.yaml
|
||||
|
||||
fi
|
||||
fi
|
@ -0,0 +1,385 @@
|
||||
#!/bin/bash
|
||||
source test_tipc/common_func.sh
|
||||
|
||||
FILENAME=$1
|
||||
# MODE be one of ['lite_train_lite_infer' 'lite_train_whole_infer' 'whole_train_whole_infer', 'whole_infer', 'klquant_whole_infer']
|
||||
MODE=$2
|
||||
|
||||
dataline=$(awk 'NR==1, NR==51{print}' $FILENAME)
|
||||
|
||||
# parser params
|
||||
IFS=$'\n'
|
||||
lines=(${dataline})
|
||||
|
||||
# The training params
|
||||
model_name=$(func_parser_value "${lines[1]}")
|
||||
python=$(func_parser_value "${lines[2]}")
|
||||
gpu_list=$(func_parser_value "${lines[3]}")
|
||||
train_use_gpu_key=$(func_parser_key "${lines[4]}")
|
||||
train_use_gpu_value=$(func_parser_value "${lines[4]}")
|
||||
autocast_list=$(func_parser_value "${lines[5]}")
|
||||
autocast_key=$(func_parser_key "${lines[5]}")
|
||||
epoch_key=$(func_parser_key "${lines[6]}")
|
||||
epoch_num=$(func_parser_params "${lines[6]}")
|
||||
save_model_key=$(func_parser_key "${lines[7]}")
|
||||
train_batch_key=$(func_parser_key "${lines[8]}")
|
||||
train_batch_value=$(func_parser_params "${lines[8]}")
|
||||
pretrain_model_key=$(func_parser_key "${lines[9]}")
|
||||
pretrain_model_value=$(func_parser_value "${lines[9]}")
|
||||
train_model_name=$(func_parser_value "${lines[10]}")
|
||||
train_infer_img_dir=$(func_parser_value "${lines[11]}")
|
||||
train_param_key1=$(func_parser_key "${lines[12]}")
|
||||
train_param_value1=$(func_parser_value "${lines[12]}")
|
||||
|
||||
trainer_list=$(func_parser_value "${lines[14]}")
|
||||
trainer_norm=$(func_parser_key "${lines[15]}")
|
||||
norm_trainer=$(func_parser_value "${lines[15]}")
|
||||
pact_key=$(func_parser_key "${lines[16]}")
|
||||
pact_trainer=$(func_parser_value "${lines[16]}")
|
||||
fpgm_key=$(func_parser_key "${lines[17]}")
|
||||
fpgm_trainer=$(func_parser_value "${lines[17]}")
|
||||
distill_key=$(func_parser_key "${lines[18]}")
|
||||
distill_trainer=$(func_parser_value "${lines[18]}")
|
||||
trainer_key1=$(func_parser_key "${lines[19]}")
|
||||
trainer_value1=$(func_parser_value "${lines[19]}")
|
||||
trainer_key2=$(func_parser_key "${lines[20]}")
|
||||
trainer_value2=$(func_parser_value "${lines[20]}")
|
||||
|
||||
eval_py=$(func_parser_value "${lines[23]}")
|
||||
eval_key1=$(func_parser_key "${lines[24]}")
|
||||
eval_value1=$(func_parser_value "${lines[24]}")
|
||||
|
||||
save_infer_key=$(func_parser_key "${lines[27]}")
|
||||
save_infer_value=$(func_parser_value "${lines[27]}")
|
||||
export_weight=$(func_parser_key "${lines[28]}")
|
||||
norm_export=$(func_parser_value "${lines[29]}")
|
||||
pact_export=$(func_parser_value "${lines[30]}")
|
||||
fpgm_export=$(func_parser_value "${lines[31]}")
|
||||
distill_export=$(func_parser_value "${lines[32]}")
|
||||
export_key1=$(func_parser_key "${lines[33]}")
|
||||
export_value1=$(func_parser_value "${lines[33]}")
|
||||
export_key2=$(func_parser_key "${lines[34]}")
|
||||
export_value2=$(func_parser_value "${lines[34]}")
|
||||
inference_dir=$(func_parser_value "${lines[35]}")
|
||||
|
||||
# parser inference model
|
||||
infer_model_dir_list=$(func_parser_value "${lines[36]}")
|
||||
infer_export_list=$(func_parser_value "${lines[37]}")
|
||||
infer_is_quant=$(func_parser_value "${lines[38]}")
|
||||
# parser inference
|
||||
inference_py=$(func_parser_value "${lines[39]}")
|
||||
use_gpu_key=$(func_parser_key "${lines[40]}")
|
||||
use_gpu_list=$(func_parser_value "${lines[40]}")
|
||||
use_mkldnn_key=$(func_parser_key "${lines[41]}")
|
||||
use_mkldnn_list=$(func_parser_value "${lines[41]}")
|
||||
cpu_threads_key=$(func_parser_key "${lines[42]}")
|
||||
cpu_threads_list=$(func_parser_value "${lines[42]}")
|
||||
batch_size_key=$(func_parser_key "${lines[43]}")
|
||||
batch_size_list=$(func_parser_value "${lines[43]}")
|
||||
use_trt_key=$(func_parser_key "${lines[44]}")
|
||||
use_trt_list=$(func_parser_value "${lines[44]}")
|
||||
precision_key=$(func_parser_key "${lines[45]}")
|
||||
precision_list=$(func_parser_value "${lines[45]}")
|
||||
infer_model_key=$(func_parser_key "${lines[46]}")
|
||||
image_dir_key=$(func_parser_key "${lines[47]}")
|
||||
infer_img_dir=$(func_parser_value "${lines[47]}")
|
||||
save_log_key=$(func_parser_key "${lines[48]}")
|
||||
benchmark_key=$(func_parser_key "${lines[49]}")
|
||||
benchmark_value=$(func_parser_value "${lines[49]}")
|
||||
infer_key1=$(func_parser_key "${lines[50]}")
|
||||
infer_value1=$(func_parser_value "${lines[50]}")
|
||||
|
||||
# parser klquant_infer
|
||||
if [ ${MODE} = "klquant_whole_infer" ]; then
|
||||
dataline=$(awk 'NR==1 NR==17{print}' $FILENAME)
|
||||
lines=(${dataline})
|
||||
model_name=$(func_parser_value "${lines[1]}")
|
||||
python=$(func_parser_value "${lines[2]}")
|
||||
# parser inference model
|
||||
infer_model_dir_list=$(func_parser_value "${lines[3]}")
|
||||
infer_export_list=$(func_parser_value "${lines[4]}")
|
||||
infer_is_quant=$(func_parser_value "${lines[5]}")
|
||||
# parser inference
|
||||
inference_py=$(func_parser_value "${lines[6]}")
|
||||
use_gpu_key=$(func_parser_key "${lines[7]}")
|
||||
use_gpu_list=$(func_parser_value "${lines[7]}")
|
||||
use_mkldnn_key=$(func_parser_key "${lines[8]}")
|
||||
use_mkldnn_list=$(func_parser_value "${lines[8]}")
|
||||
cpu_threads_key=$(func_parser_key "${lines[9]}")
|
||||
cpu_threads_list=$(func_parser_value "${lines[9]}")
|
||||
batch_size_key=$(func_parser_key "${lines[10]}")
|
||||
batch_size_list=$(func_parser_value "${lines[10]}")
|
||||
use_trt_key=$(func_parser_key "${lines[11]}")
|
||||
use_trt_list=$(func_parser_value "${lines[11]}")
|
||||
precision_key=$(func_parser_key "${lines[12]}")
|
||||
precision_list=$(func_parser_value "${lines[12]}")
|
||||
infer_model_key=$(func_parser_key "${lines[13]}")
|
||||
image_dir_key=$(func_parser_key "${lines[14]}")
|
||||
infer_img_dir=$(func_parser_value "${lines[14]}")
|
||||
save_log_key=$(func_parser_key "${lines[15]}")
|
||||
benchmark_key=$(func_parser_key "${lines[16]}")
|
||||
benchmark_value=$(func_parser_value "${lines[16]}")
|
||||
infer_key1=$(func_parser_key "${lines[17]}")
|
||||
infer_value1=$(func_parser_value "${lines[17]}")
|
||||
fi
|
||||
|
||||
save_model_value=$(func_parser_value "${lines[7]}")
|
||||
if [[ ${save_model_value} = " " ]] || [[ ${save_model_value} = "null" ]] || [[ ${save_model_value} = "" ]];then
|
||||
LOG_PATH="./test_tipc/output"
|
||||
else
|
||||
LOG_PATH=${save_model_value}
|
||||
fi
|
||||
mkdir -p ${LOG_PATH}
|
||||
status_log="${LOG_PATH}/results_python.log"
|
||||
|
||||
|
||||
function func_inference(){
|
||||
IFS='|'
|
||||
_python=$1
|
||||
_script=$2
|
||||
_model_dir=$3
|
||||
_log_path=$4
|
||||
_img_dir=$5
|
||||
_flag_quant=$6
|
||||
# inference
|
||||
for use_gpu in ${use_gpu_list[*]}; do
|
||||
if [ ${use_gpu} = "False" ] || [ ${use_gpu} = "cpu" ]; then
|
||||
for use_mkldnn in ${use_mkldnn_list[*]}; do
|
||||
if [ ${use_mkldnn} = "False" ] && [ ${_flag_quant} = "True" ]; then
|
||||
continue
|
||||
fi
|
||||
for threads in ${cpu_threads_list[*]}; do
|
||||
for batch_size in ${batch_size_list[*]}; do
|
||||
for precision in ${precision_list[*]}; do
|
||||
if [ ${use_mkldnn} = "False" ] && [ ${precision} = "fp16" ]; then
|
||||
continue
|
||||
fi # skip when enable fp16 but disable mkldnn
|
||||
if [ ${_flag_quant} = "True" ] && [ ${precision} != "int8" ]; then
|
||||
continue
|
||||
fi # skip when quant model inference but precision is not int8
|
||||
set_precision=$(func_set_params "${precision_key}" "${precision}")
|
||||
|
||||
_save_log_path="${_log_path}/python_infer_cpu_usemkldnn_${use_mkldnn}_threads_${threads}_precision_${precision}_batchsize_${batch_size}.log"
|
||||
set_infer_data=$(func_set_params "${image_dir_key}" "${_img_dir}")
|
||||
set_benchmark=$(func_set_params "${benchmark_key}" "${benchmark_value}")
|
||||
set_batchsize=$(func_set_params "${batch_size_key}" "${batch_size}")
|
||||
set_cpu_threads=$(func_set_params "${cpu_threads_key}" "${threads}")
|
||||
set_model_dir=$(func_set_params "${infer_model_key}" "${_model_dir}")
|
||||
set_infer_params1=$(func_set_params "${infer_key1}" "${infer_value1}")
|
||||
command="${_python} ${_script} ${use_gpu_key}=${use_gpu} ${use_mkldnn_key}=${use_mkldnn} ${set_cpu_threads} ${set_model_dir} ${set_batchsize} ${set_infer_data} ${set_benchmark} ${set_precision} ${set_infer_params1} > ${_save_log_path} 2>&1 "
|
||||
eval $command
|
||||
last_status=${PIPESTATUS[0]}
|
||||
eval "cat ${_save_log_path}"
|
||||
status_check $last_status "${command}" "${status_log}"
|
||||
done
|
||||
done
|
||||
done
|
||||
done
|
||||
elif [ ${use_gpu} = "True" ] || [ ${use_gpu} = "gpu" ]; then
|
||||
for use_trt in ${use_trt_list[*]}; do
|
||||
for precision in ${precision_list[*]}; do
|
||||
if [[ ${_flag_quant} = "False" ]] && [[ ${precision} =~ "int8" ]]; then
|
||||
continue
|
||||
fi
|
||||
if [[ ${precision} =~ "fp16" || ${precision} =~ "int8" ]] && [ ${use_trt} = "False" ]; then
|
||||
continue
|
||||
fi
|
||||
if [[ ${use_trt} = "False" || ${precision} =~ "int8" ]] && [ ${_flag_quant} = "True" ]; then
|
||||
continue
|
||||
fi
|
||||
for batch_size in ${batch_size_list[*]}; do
|
||||
_save_log_path="${_log_path}/python_infer_gpu_usetrt_${use_trt}_precision_${precision}_batchsize_${batch_size}.log"
|
||||
set_infer_data=$(func_set_params "${image_dir_key}" "${_img_dir}")
|
||||
set_benchmark=$(func_set_params "${benchmark_key}" "${benchmark_value}")
|
||||
set_batchsize=$(func_set_params "${batch_size_key}" "${batch_size}")
|
||||
set_tensorrt=$(func_set_params "${use_trt_key}" "${use_trt}")
|
||||
set_precision=$(func_set_params "${precision_key}" "${precision}")
|
||||
set_model_dir=$(func_set_params "${infer_model_key}" "${_model_dir}")
|
||||
set_infer_params1=$(func_set_params "${infer_key1}" "${infer_value1}")
|
||||
command="${_python} ${_script} ${use_gpu_key}=${use_gpu} ${set_tensorrt} ${set_precision} ${set_model_dir} ${set_batchsize} ${set_infer_data} ${set_benchmark} ${set_infer_params1} > ${_save_log_path} 2>&1 "
|
||||
eval $command
|
||||
last_status=${PIPESTATUS[0]}
|
||||
eval "cat ${_save_log_path}"
|
||||
status_check $last_status "${command}" "${status_log}"
|
||||
|
||||
done
|
||||
done
|
||||
done
|
||||
else
|
||||
echo "Does not support hardware other than CPU and GPU Currently!"
|
||||
fi
|
||||
done
|
||||
}
|
||||
|
||||
if [ ${MODE} = "whole_infer" ] || [ ${MODE} = "klquant_whole_infer" ]; then
|
||||
GPUID=$3
|
||||
if [ ${#GPUID} -le 0 ];then
|
||||
env=" "
|
||||
else
|
||||
env="export CUDA_VISIBLE_DEVICES=${GPUID}"
|
||||
fi
|
||||
# set CUDA_VISIBLE_DEVICES
|
||||
eval $env
|
||||
export Count=0
|
||||
IFS="|"
|
||||
infer_run_exports=(${infer_export_list})
|
||||
infer_quant_flag=(${infer_is_quant})
|
||||
for infer_model in ${infer_model_dir_list[*]}; do
|
||||
# run export
|
||||
if [ ${infer_run_exports[Count]} != "null" ];then
|
||||
save_infer_dir=$(dirname $infer_model)
|
||||
set_export_weight=$(func_set_params "${export_weight}" "${infer_model}")
|
||||
set_save_infer_key=$(func_set_params "${save_infer_key}" "${save_infer_dir}")
|
||||
# For lac which needs the `data_dir` args
|
||||
set_export1_key=$(func_set_params "${export_key1}" "${export_value1}")
|
||||
export_cmd="${python} ${infer_run_exports[Count]} ${set_export_weight} ${set_save_infer_key} ${set_export1_key}"
|
||||
echo ${infer_run_exports[Count]}
|
||||
echo $export_cmd
|
||||
eval $export_cmd
|
||||
status_export=$?
|
||||
status_check $status_export "${export_cmd}" "${status_log}"
|
||||
else
|
||||
save_infer_dir=${infer_model}
|
||||
fi
|
||||
#run inference
|
||||
is_quant=${infer_quant_flag[Count]}
|
||||
if [ ${MODE} = "klquant_infer" ]; then
|
||||
is_quant="True"
|
||||
fi
|
||||
func_inference "${python}" "${inference_py}" "${save_infer_dir}" "${LOG_PATH}" "${infer_img_dir}" ${is_quant}
|
||||
Count=$(($Count + 1))
|
||||
done
|
||||
else
|
||||
IFS="|"
|
||||
export Count=0
|
||||
USE_GPU_KEY=(${train_use_gpu_value})
|
||||
for gpu in ${gpu_list[*]}; do
|
||||
train_use_gpu=${USE_GPU_KEY[Count]}
|
||||
Count=$(($Count + 1))
|
||||
ips=""
|
||||
if [ ${gpu} = "-1" ];then
|
||||
env=""
|
||||
elif [ ${#gpu} -le 1 ];then
|
||||
env="export CUDA_VISIBLE_DEVICES=${gpu}"
|
||||
eval ${env}
|
||||
elif [ ${#gpu} -le 15 ];then
|
||||
IFS=","
|
||||
array=(${gpu})
|
||||
env="export CUDA_VISIBLE_DEVICES=${array[0]}"
|
||||
IFS="|"
|
||||
else
|
||||
IFS=";"
|
||||
array=(${gpu})
|
||||
ips=${array[0]}
|
||||
gpu=${array[1]}
|
||||
IFS="|"
|
||||
env=" "
|
||||
fi
|
||||
for autocast in ${autocast_list[*]}; do
|
||||
if [ ${autocast} = "amp" ]; then
|
||||
set_amp_config="Global.use_amp=True Global.scale_loss=1024.0 Global.use_dynamic_loss_scaling=True"
|
||||
else
|
||||
set_amp_config=" "
|
||||
fi
|
||||
for trainer in ${trainer_list[*]}; do
|
||||
flag_quant=False
|
||||
if [ ${trainer} = ${pact_key} ]; then
|
||||
run_train=${pact_trainer}
|
||||
run_export=${pact_export}
|
||||
flag_quant=True
|
||||
elif [ ${trainer} = "${fpgm_key}" ]; then
|
||||
run_train=${fpgm_trainer}
|
||||
run_export=${fpgm_export}
|
||||
elif [ ${trainer} = "${distill_key}" ]; then
|
||||
run_train=${distill_trainer}
|
||||
run_export=${distill_export}
|
||||
elif [ ${trainer} = ${trainer_key1} ]; then
|
||||
run_train=${trainer_value1}
|
||||
run_export=${export_value1}
|
||||
elif [[ ${trainer} = ${trainer_key2} ]]; then
|
||||
run_train=${trainer_value2}
|
||||
run_export=${export_value2}
|
||||
else
|
||||
run_train=${norm_trainer}
|
||||
run_export=${norm_export}
|
||||
fi
|
||||
|
||||
if [ ${run_train} = "null" ]; then
|
||||
continue
|
||||
fi
|
||||
set_autocast=$(func_set_params "${autocast_key}" "${autocast}")
|
||||
set_epoch=$(func_set_params "${epoch_key}" "${epoch_num}")
|
||||
set_pretrain=$(func_set_params "${pretrain_model_key}" "${pretrain_model_value}")
|
||||
set_batchsize=$(func_set_params "${train_batch_key}" "${train_batch_value}")
|
||||
set_train_params1=$(func_set_params "${train_param_key1}" "${train_param_value1}")
|
||||
set_use_gpu=$(func_set_params "${train_use_gpu_key}" "${train_use_gpu}")
|
||||
if [ ${#ips} -le 26 ];then
|
||||
save_log="${LOG_PATH}/${trainer}_gpus_${gpu}_autocast_${autocast}"
|
||||
nodes=1
|
||||
else
|
||||
IFS=","
|
||||
ips_array=(${ips})
|
||||
IFS="|"
|
||||
nodes=${#ips_array[@]}
|
||||
save_log="${LOG_PATH}/${trainer}_gpus_${gpu}_autocast_${autocast}_nodes_${nodes}"
|
||||
fi
|
||||
|
||||
# load pretrain from norm training if current trainer is pact or fpgm trainer
|
||||
if ([ ${trainer} = ${pact_key} ] || [ ${trainer} = ${fpgm_key} ]) && [ ${nodes} -le 1 ]; then
|
||||
set_pretrain="${load_norm_train_model}"
|
||||
fi
|
||||
|
||||
set_save_model=$(func_set_params "${save_model_key}" "${save_log}")
|
||||
if [ ${#gpu} -le 2 ];then # train with cpu or single gpu
|
||||
cmd="${python} ${run_train} ${set_use_gpu} ${set_save_model} ${set_epoch} ${set_pretrain} ${set_autocast} ${set_batchsize} ${set_train_params1} ${set_amp_config} "
|
||||
elif [ ${#ips} -le 26 ];then # train with multi-gpu
|
||||
cmd="${python} -m paddle.distributed.launch --gpus=${gpu} ${run_train} ${set_use_gpu} ${set_save_model} ${set_epoch} ${set_pretrain} ${set_autocast} ${set_batchsize} ${set_train_params1} ${set_amp_config}"
|
||||
else # train with multi-machine
|
||||
cmd="${python} -m paddle.distributed.launch --ips=${ips} --gpus=${gpu} ${run_train} ${set_use_gpu} ${set_save_model} ${set_pretrain} ${set_epoch} ${set_autocast} ${set_batchsize} ${set_train_params1} ${set_amp_config}"
|
||||
fi
|
||||
# run train
|
||||
eval "unset CUDA_VISIBLE_DEVICES"
|
||||
eval $cmd
|
||||
status_check $? "${cmd}" "${status_log}"
|
||||
|
||||
set_eval_pretrain=$(func_set_params "${pretrain_model_key}" "${save_log}/${train_model_name}")
|
||||
# save norm trained models to set pretrain for pact training and fpgm training
|
||||
if [ ${trainer} = ${trainer_norm} ] && [ ${nodes} -le 1 ]; then
|
||||
load_norm_train_model=${set_eval_pretrain}
|
||||
fi
|
||||
# run eval
|
||||
if [ ${eval_py} != "null" ]; then
|
||||
set_eval_params1=$(func_set_params "${eval_key1}" "${eval_value1}")
|
||||
eval_cmd="${python} ${eval_py} ${set_eval_pretrain} ${set_use_gpu} ${set_eval_params1}"
|
||||
eval $eval_cmd
|
||||
status_check $? "${eval_cmd}" "${status_log}"
|
||||
fi
|
||||
# run export model
|
||||
if [ ${run_export} != "null" ]; then
|
||||
# run export model
|
||||
save_infer_path="${save_infer_value}"
|
||||
set_export_weight=$(func_set_params "${export_weight}" "${save_log}/${train_model_name}")
|
||||
set_save_infer_key=$(func_set_params "${save_infer_key}" "${save_infer_path}")
|
||||
# For lac which needs the `data_dir` args
|
||||
set_export1_key=$(func_set_params "${export_key1}" "${export_value1}")
|
||||
export_cmd="${python} ${run_export} ${set_export_weight} ${set_save_infer_key} ${set_export1_key}"
|
||||
eval $export_cmd
|
||||
status_check $? "${export_cmd}" "${status_log}"
|
||||
|
||||
#run inference
|
||||
eval $env
|
||||
save_infer_path="${save_infer_value}"
|
||||
|
||||
if [ ${inference_dir} != "null" ] && [ ${inference_dir} != '##' ]; then
|
||||
infer_model_dir="${save_infer_path}/${inference_dir}"
|
||||
else
|
||||
infer_model_dir=${save_infer_path}
|
||||
fi
|
||||
func_inference "${python}" "${inference_py}" "${infer_model_dir}" "${LOG_PATH}" "${train_infer_img_dir}" "${flag_quant}"
|
||||
|
||||
eval "unset CUDA_VISIBLE_DEVICES"
|
||||
fi
|
||||
done # done with: for trainer in ${trainer_list[*]}; do
|
||||
done # done with: for autocast in ${autocast_list[*]}; do
|
||||
done # done with: for gpu in ${gpu_list[*]}; do
|
||||
fi # end if [ ${MODE} = "infer" ]; then
|
Loading…
Reference in new issue