add tipc benchmark of conformer

4 years ago · aefe9e93a7
parent c66166e0fd
commit aefe9e93a7
8 changed files with 976 additions and 4 deletions
--- a/paddlespeech/s2t/exps/u2/model.py
+++ b/paddlespeech/s2t/exps/u2/model.py
@ -175,7 +175,7 @@ class U2Trainer(Trainer):
                        observation['batch_cost'] = observation[
                            'reader_cost'] + observation['step_cost']
                        observation['samples'] = observation['batch_size']
-                        observation['ips,sent./sec'] = observation[
+                        observation['ips,samples/s'] = observation[
                            'batch_size'] / observation['batch_cost']
                        for k, v in observation.items():
                            msg += f" {k.split(',')[0]}: "
--- a/paddlespeech/s2t/training/trainer.py
+++ b/paddlespeech/s2t/training/trainer.py
@ -252,8 +252,7 @@ class Trainer():
        if self.args.benchmark_max_step and self.iteration > self.args.benchmark_max_step:
            logger.info(
                f"Reach benchmark-max-step: {self.args.benchmark_max_step}")
-            sys.exit(
+            sys.exit(0)
                f"Reach benchmark-max-step: {self.args.benchmark_max_step}")
    def do_train(self):
        """The training process control by epoch."""
@ -282,7 +281,7 @@ class Trainer():
                        observation['batch_cost'] = observation[
                            'reader_cost'] + observation['step_cost']
                        observation['samples'] = observation['batch_size']
-                        observation['ips[sent./sec]'] = observation[
+                        observation['ips samples/s'] = observation[
                            'batch_size'] / observation['batch_cost']
                        for k, v in observation.items():
                            msg += f" {k}: "
--- a/tests/test_tipc/benchmark_train.sh
+++ b/tests/test_tipc/benchmark_train.sh
@ -0,0 +1,258 @@
 #!/bin/bash
 source test_tipc/common_func.sh
 # set env
 python=python
 export model_branch=`git symbolic-ref HEAD 2>/dev/null | cut -d"/" -f 3`
 export model_commit=$(git log|head -n1|awk '{print $2}') 
 export str_tmp=$(echo `pip list|grep paddlepaddle-gpu|awk -F ' ' '{print $2}'`)
 export frame_version=${str_tmp%%.post*}
 export frame_commit=$(echo `${python} -c "import paddle;print(paddle.version.commit)"`)
 # run benchmark sh 
 # Usage:
 # bash run_benchmark_train.sh config.txt params
 # or 
 # bash run_benchmark_train.sh config.txt
 function func_parser_params(){
    strs=$1
    IFS="="
    array=(${strs})
    tmp=${array[1]}
    echo ${tmp}
 }
 function func_sed_params(){
    filename=$1
    line=$2
    param_value=$3
    params=`sed -n "${line}p" $filename`
    IFS=":"
    array=(${params})
    key=${array[0]}
    value=${array[1]}
    if [[ $value =~ 'benchmark_train' ]];then
        IFS='='
        _val=(${value})
        param_value="${_val[0]}=${param_value}"
    fi
    new_params="${key}:${param_value}"
    IFS=";"
    cmd="sed -i '${line}s/.*/${new_params}/' '${filename}'"
    eval $cmd
 }
 function set_gpu_id(){
    string=$1
    _str=${string:1:6}
    IFS="C"
    arr=(${_str})
    M=${arr[0]}
    P=${arr[1]}
    gn=`expr $P - 1`
    gpu_num=`expr $gn / $M`
    seq=`seq -s "," 0 $gpu_num`
    echo $seq
 }
 function get_repo_name(){
    IFS=";"
    cur_dir=$(pwd)
    IFS="/"
    arr=(${cur_dir})
    echo ${arr[-1]}
 }
 FILENAME=$1
 # copy FILENAME as new
 new_filename="./test_tipc/benchmark_train.txt"
 cmd=`yes|cp $FILENAME $new_filename`
 FILENAME=$new_filename
 # MODE must be one of ['benchmark_train']
 MODE=$2
 PARAMS=$3
 # bash test_tipc/benchmark_train.sh test_tipc/configs/det_mv3_db_v2_0/train_benchmark.txt  benchmark_train dynamic_bs8_null_DP_N1C1
 IFS=$'\n'
 # parser params from train_benchmark.txt
 dataline=`cat $FILENAME`
 # parser params
 IFS=$'\n'
 lines=(${dataline})
 model_name=$(func_parser_value "${lines[1]}")
 # 获取benchmark_params所在的行数
 line_num=`grep -n "train_benchmark_params" $FILENAME  | cut -d ":" -f 1`
 # for train log parser
 batch_size=$(func_parser_value "${lines[line_num]}")
 line_num=`expr $line_num + 1`
 fp_items=$(func_parser_value "${lines[line_num]}")
 line_num=`expr $line_num + 1`
 epoch=$(func_parser_value "${lines[line_num]}")
 line_num=`expr $line_num + 1`
 profile_option_key=$(func_parser_key "${lines[line_num]}")
 profile_option_params=$(func_parser_value "${lines[line_num]}")
 profile_option="${profile_option_key}:${profile_option_params}"
 line_num=`expr $line_num + 1`
 flags_value=$(func_parser_value "${lines[line_num]}")
 # set flags
 IFS=";"
 flags_list=(${flags_value})
 for _flag in ${flags_list[*]}; do
    cmd="export ${_flag}"
    eval $cmd
 done
 # set log_name
 repo_name=$(get_repo_name )
 SAVE_LOG=${BENCHMARK_LOG_DIR:-$(pwd)}   # */benchmark_log
 mkdir -p "${SAVE_LOG}/benchmark_log/"
 status_log="${SAVE_LOG}/benchmark_log/results.log"
 # The number of lines in which train params can be replaced.
 line_python=3
 line_gpuid=4
 line_precision=6
 line_epoch=7
 line_batchsize=9
 line_profile=13
 line_eval_py=24
 line_export_py=30
 func_sed_params "$FILENAME" "${line_eval_py}" "null"
 func_sed_params "$FILENAME" "${line_export_py}" "null"
 func_sed_params "$FILENAME" "${line_python}"  "$python"
 # if params
 if  [ ! -n "$PARAMS" ] ;then
    # PARAMS input is not a word.
    IFS="|"
    batch_size_list=(${batch_size})
    fp_items_list=(${fp_items})
    device_num_list=(N1C4)
    run_mode="DP"
 else
    # parser params from input: modeltype_bs${bs_item}_${fp_item}_${run_mode}_${device_num}
    IFS="_"
    params_list=(${PARAMS})
    model_type=${params_list[0]}
    batch_size=${params_list[1]}
    batch_size=`echo  ${batch_size} | tr -cd "[0-9]" `
    precision=${params_list[2]}
    # run_process_type=${params_list[3]}
    run_mode=${params_list[3]}
    device_num=${params_list[4]}
    IFS=";"
    if [ ${precision} = "null" ];then
        precision="fp32"
    fi
    fp_items_list=($precision)
    batch_size_list=($batch_size)
    device_num_list=($device_num)
 fi
 IFS="|"
 for batch_size in ${batch_size_list[*]}; do 
    for precision in ${fp_items_list[*]}; do
        for device_num in ${device_num_list[*]}; do
            # sed batchsize and precision
            func_sed_params "$FILENAME" "${line_precision}" "$precision"
            func_sed_params "$FILENAME" "${line_batchsize}" "$MODE=$batch_size"
            func_sed_params "$FILENAME" "${line_epoch}" "$MODE=$epoch"
            gpu_id=$(set_gpu_id $device_num)
            if [ ${#gpu_id} -le 1 ];then
                run_process_type="SingleP"
                log_path="$SAVE_LOG/profiling_log"
                mkdir -p $log_path
                log_name="${repo_name}_${model_name}_bs${batch_size}_${precision}_${run_process_type}_${run_mode}_${device_num}_profiling"
                func_sed_params "$FILENAME" "${line_gpuid}" "0"  # sed used gpu_id 
                # set profile_option params
                tmp=`sed -i "${line_profile}s/.*/${profile_option}/" "${FILENAME}"`
                # run test_train_inference_python.sh
                cmd="bash test_tipc/test_train_inference_python.sh ${FILENAME} benchmark_train > ${log_path}/${log_name} 2>&1 "
                echo $cmd
                eval $cmd
                eval "cat ${log_path}/${log_name}"
                # without profile
                log_path="$SAVE_LOG/train_log"
                speed_log_path="$SAVE_LOG/index"
                mkdir -p $log_path
                mkdir -p $speed_log_path
                log_name="${repo_name}_${model_name}_bs${batch_size}_${precision}_${run_process_type}_${run_mode}_${device_num}_log"
                speed_log_name="${repo_name}_${model_name}_bs${batch_size}_${precision}_${run_process_type}_${run_mode}_${device_num}_speed"
                func_sed_params "$FILENAME" "${line_profile}" "null"  # sed profile_id as null
                cmd="bash test_tipc/test_train_inference_python.sh ${FILENAME} benchmark_train > ${log_path}/${log_name} 2>&1 "
                echo $cmd
                job_bt=`date '+%Y%m%d%H%M%S'`
                eval $cmd
                job_et=`date '+%Y%m%d%H%M%S'`
                export model_run_time=$((${job_et}-${job_bt}))
                eval "cat ${log_path}/${log_name}"
                # parser log
                _model_name="${model_name}_bs${batch_size}_${precision}_${run_process_type}_${run_mode}"
                cmd="${python} ${BENCHMARK_ROOT}/scripts/analysis.py --filename ${log_path}/${log_name} \
                        --speed_log_file '${speed_log_path}/${speed_log_name}' \
                        --model_name ${_model_name} \
                        --base_batch_size ${batch_size} \
                        --run_mode ${run_mode} \
                        --run_process_type ${run_process_type} \
                        --fp_item ${precision} \
                        --keyword ips: \
                        --skip_steps 2 \
                        --device_num ${device_num} \
                        --speed_unit samples/s \
                        --convergence_key loss: "
                echo $cmd
                eval $cmd
                last_status=${PIPESTATUS[0]}
                status_check $last_status "${cmd}" "${status_log}"
            else
                IFS=";"
                unset_env=`unset CUDA_VISIBLE_DEVICES`
                run_process_type="MultiP"
                log_path="$SAVE_LOG/train_log"
                speed_log_path="$SAVE_LOG/index"
                mkdir -p $log_path
                mkdir -p $speed_log_path
                log_name="${repo_name}_${model_name}_bs${batch_size}_${precision}_${run_process_type}_${run_mode}_${device_num}_log"
                speed_log_name="${repo_name}_${model_name}_bs${batch_size}_${precision}_${run_process_type}_${run_mode}_${device_num}_speed"
                func_sed_params "$FILENAME" "${line_gpuid}" "$gpu_id"  # sed used gpu_id 
                func_sed_params "$FILENAME" "${line_profile}" "null"  # sed --profile_option as null
                cmd="bash test_tipc/test_train_inference_python.sh ${FILENAME} benchmark_train > ${log_path}/${log_name} 2>&1 "
                echo $cmd
                job_bt=`date '+%Y%m%d%H%M%S'`
                eval $cmd
                job_et=`date '+%Y%m%d%H%M%S'`
                export model_run_time=$((${job_et}-${job_bt}))
                eval "cat ${log_path}/${log_name}"
                # parser log
                _model_name="${model_name}_bs${batch_size}_${precision}_${run_process_type}_${run_mode}"
                cmd="${python} ${BENCHMARK_ROOT}/scripts/analysis.py --filename ${log_path}/${log_name} \
                        --speed_log_file '${speed_log_path}/${speed_log_name}' \
                        --model_name ${_model_name} \
                        --base_batch_size ${batch_size} \
                        --run_mode ${run_mode} \
                        --run_process_type ${run_process_type} \
                        --fp_item ${precision} \
                        --keyword ips: \
                        --skip_steps 2 \
                        --device_num ${device_num} \
                        --speed_unit images/s \
                        --convergence_key loss: "
                echo $cmd
                eval $cmd
                last_status=${PIPESTATUS[0]}
                status_check $last_status "${cmd}" "${status_log}"
            fi
        done
    done
 done
--- a/tests/test_tipc/configs/conformer/train_benchmark.txt
+++ b/tests/test_tipc/configs/conformer/train_benchmark.txt
@ -0,0 +1,57 @@
 ===========================train_params===========================
 model_name:conformer
 python:python3.7
 gpu_list:0|0,1
 null:null
 null:null
 --benchmark-max-step:50
 null:null
 --benchmark-batch-size:16
 null:null
 null:null
 null:null
 null:null
 ##
 trainer:norm_train
 norm_train: ../paddlespeech/s2t/exps/u2/bin/train.py --config  test_tipc/conformer/benchmark_train/conf/conformer.yaml --output test_tipc/conformer/benchmark_train/outputs --seed 1024
 pact_train:null
 fpgm_train:null
 distill_train:null
 null:null
 null:null
 ##
 ===========================eval_params===========================
 eval:null
 null:null
 ##
 ===========================infer_params===========================
 null:null
 null:null
 norm_export: null
 quant_export:null
 fpgm_export:null
 distill_export:null
 export1:null
 export2:null
 null:null
 infer_model:null
 infer_export:null
 infer_quant:null
 inference:null
 null:null
 null:null
 null:null
 null:null
 null:null
 null:null
 null:null
 null:null
 null:null
 null:null
 null:null
 ===========================train_benchmark_params==========================
 batch_size:16|30
 fp_items:fp32
 iteration:50
 --profiler-options:"batch_range=[10,35];state=GPU;tracer_option=Default;profile_path=model.profile"
 flags:FLAGS_eager_delete_tensor_gb=0.0;FLAGS_fraction_of_gpu_memory_to_use=0.98;FLAGS_conv_workspace_size_limit=4096"
--- a/tests/test_tipc/conformer/scripts/aishell_tiny.py
+++ b/tests/test_tipc/conformer/scripts/aishell_tiny.py
@ -0,0 +1,159 @@
 # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 """Prepare Aishell mandarin dataset
 Download, unpack and create manifest files.
 Manifest file is a json-format file with each line containing the
 meta data (i.e. audio filepath, transcript and audio duration)
 of each audio file in the data set.
 """
 import argparse
 import codecs
 import json
 import os
 from pathlib import Path
 import soundfile
 from utils.utility import download
 from utils.utility import unpack
 DATA_HOME = os.path.expanduser('~/.cache/paddle/dataset/speech')
 URL_ROOT_TAG
 DATA_URL = URL_ROOT + '/data_aishell_tiny.tgz'
 MD5_DATA = '337b1b1ea016761d4fd3225c5b8799b4'
 RESOURCE_URL = URL_ROOT + '/resource_aishell.tgz'
 MD5_RESOURCE = '957d480a0fcac85fc18e550756f624e5'
 parser = argparse.ArgumentParser(description=__doc__)
 parser.add_argument(
    "--target_dir",
    default=DATA_HOME + "/Aishell",
    type=str,
    help="Directory to save the dataset. (default: %(default)s)")
 parser.add_argument(
    "--manifest_prefix",
    default="manifest",
    type=str,
    help="Filepath prefix for output manifests. (default: %(default)s)")
 args = parser.parse_args()
 def create_manifest(data_dir, manifest_path_prefix):
    print("Creating manifest %s ..." % manifest_path_prefix)
    json_lines = []
    transcript_path = os.path.join(data_dir, 'transcript',
                                   'aishell_transcript_v0.8.txt')
    transcript_dict = {}
    for line in codecs.open(transcript_path, 'r', 'utf-8'):
        line = line.strip()
        if line == '':
            continue
        audio_id, text = line.split(' ', 1)
        # remove withespace, charactor text
        text = ''.join(text.split())
        transcript_dict[audio_id] = text
    data_types = ['train', 'dev', 'test']
    for dtype in data_types:
        del json_lines[:]
        total_sec = 0.0
        total_text = 0.0
        total_num = 0
        audio_dir = os.path.join(data_dir, 'wav', dtype)
        for subfolder, _, filelist in sorted(os.walk(audio_dir)):
            for fname in filelist:
                audio_path = os.path.abspath(os.path.join(subfolder, fname))
                audio_id = os.path.basename(fname)[:-4]
                # if no transcription for audio then skipped
                if audio_id not in transcript_dict:
                    continue
                utt2spk = Path(audio_path).parent.name
                audio_data, samplerate = soundfile.read(audio_path)
                duration = float(len(audio_data) / samplerate)
                text = transcript_dict[audio_id]
                json_lines.append(
                    json.dumps(
                        {
                            'utt': audio_id,
                            'utt2spk': str(utt2spk),
                            'feat': audio_path,
                            'feat_shape': (duration, ),  # second
                            'text': text
                        },
                        ensure_ascii=False))
                total_sec += duration
                total_text += len(text)
                total_num += 1
        manifest_path = manifest_path_prefix + '.' + dtype
        with codecs.open(manifest_path, 'w', 'utf-8') as fout:
            for line in json_lines:
                fout.write(line + '\n')
        manifest_dir = os.path.dirname(manifest_path_prefix)
        meta_path = os.path.join(manifest_dir, dtype) + '.meta'
        with open(meta_path, 'w') as f:
            print(f"{dtype}:", file=f)
            print(f"{total_num} utts", file=f)
            print(f"{total_sec / (60*60)} h", file=f)
            print(f"{total_text} text", file=f)
            print(f"{total_text / total_sec} text/sec", file=f)
            print(f"{total_sec / total_num} sec/utt", file=f)
 def prepare_dataset(url, md5sum, target_dir, manifest_path=None):
    """Download, unpack and create manifest file."""
    data_dir = os.path.join(target_dir, 'data_aishell_tiny')
    if not os.path.exists(data_dir):
        filepath = download(url, md5sum, target_dir)
        unpack(filepath, target_dir)
        # unpack all audio tar files
        audio_dir = os.path.join(data_dir, 'wav')
        for subfolder, _, filelist in sorted(os.walk(audio_dir)):
            for ftar in filelist:
                unpack(os.path.join(subfolder, ftar), subfolder, True)
    else:
        print("Skip downloading and unpacking. Data already exists in %s." %
              target_dir)
    if manifest_path:
        create_manifest(data_dir, manifest_path)
 def main():
    if args.target_dir.startswith('~'):
        args.target_dir = os.path.expanduser(args.target_dir)
    prepare_dataset(
        url=DATA_URL,
        md5sum=MD5_DATA,
        target_dir=args.target_dir,
        manifest_path=args.manifest_prefix)
    prepare_dataset(
        url=RESOURCE_URL,
        md5sum=MD5_RESOURCE,
        target_dir=args.target_dir,
        manifest_path=None)
    print("Data download and manifest prepare done!")
 if __name__ == '__main__':
    main()
--- a/tests/test_tipc/docs/benchmark_train.md
+++ b/tests/test_tipc/docs/benchmark_train.md
@ -0,0 +1,53 @@
 # TIPC Linux端Benchmark测试文档
 该文档为Benchmark测试说明，Benchmark预测功能测试的主程序为`benchmark_train.sh`，用于验证监控模型训练的性能。
 # 1. 测试流程
 ## 1.1 准备数据和环境安装
 请在 repo根目录/tests 下运行
 运行`test_tipc/prepare.sh`，完成训练数据准备和安装环境流程。
 ```shell
 # 运行格式：bash test_tipc/prepare.sh  train_benchmark.txt  mode
 bash test_tipc/prepare.sh test_tipc/configs/conformer/train_benchmark.txt benchmark_train
 ```
 ## 1.2 功能测试
 执行`test_tipc/benchmark_train.sh`，完成模型训练和日志解析
 ```shell
 # 运行格式：bash test_tipc/benchmark_train.sh train_benchmark.txt mode
 bash test_tipc/benchmark_train.sh test_tipc/configs/conformer/train_benchmark.txt benchmark_train
 ```
 `test_tipc/benchmark_train.sh`支持根据传入的第三个参数实现只运行某一个训练配置，如下：
 ```shell
 # 运行格式：bash test_tipc/benchmark_train.sh train_benchmark.txt mode
 bash test_tipc/benchmark_train.sh test_tipc/configs/conformer/train_benchmark.txt benchmark_train  dynamic_bs16_fp32_DP_N1C1
 ```
 dynamic_bs16_fp32_DP_N1C1为test_tipc/benchmark_train.sh传入的参数，格式如下：
 `${modeltype}_${batch_size}_${fp_item}_${run_mode}_${device_num}`
 包含的信息有：模型类型、batchsize大小、训练精度如fp32,fp16等、分布式运行模式以及分布式训练使用的机器信息如单机单卡（N1C1）。
 ## 2. 日志输出
 运行后将保存模型的训练日志和解析日志，使用 `test_tipc/configs/conformer/train_benchmark.txt` 参数文件的训练日志解析结果是：
 ```
 {"model_branch": "dygaph", "model_commit": "", "model_name": "conformer_bs16_fp32_SingleP_DP", "batch_size": 16, "fp_item": "fp32", "run_process_type": "SingleP", "run_mode": "DP", "convergence_value": "", "convergence_key": "loss:", "ips": , "speed_unit": "samples/s", "device_num": "N1C1", "model_run_time": "0", "frame_commit": "", "frame_version": "0.0.0"}
 ```
 训练日志和日志解析结果保存在test目录下，文件组织格式如下：
 ```
 test/
 ├── index
 │   ├── tests_conformer_bs16_fp32_SingleP_DP_N1C1_speed
 │   └── tests_conformer_bs16_fp32_SingleP_DP_N1C8_speed
 ├── profiling_log
 │   └── tests_conformer_bs16_fp32_SingleP_DP_N1C1_profiling
 └── train_log
     ├── tests_conformer_bs16_fp32_SingleP_DP_N1C1_log
     └── tests_conformer_bs16_fp32_SingleP_DP_N1C8_log
 ```
--- a/tests/test_tipc/prepare.sh
+++ b/tests/test_tipc/prepare.sh
@ -0,0 +1,61 @@
 #!/bin/bash
 source test_tipc/common_func.sh
 FILENAME=$1
 # MODE be one of ['benchmark_train_lite_infer' 'benchmark_train_whole_infer' 'whole_train_whole_infer',
 #                 'whole_infer', 'klquant_whole_infer',
 #                 'cpp_infer', 'serving_infer', 'benchmark_train']
 MODE=$2
 dataline=$(cat ${FILENAME})
 # parser params
 IFS=$'\n'
 lines=(${dataline})
 # The training params
 model_name=$(func_parser_value "${lines[1]}")
 echo "model_name:"${model_name}
 trainer_list=$(func_parser_value "${lines[14]}")
 if [ ${MODE} = "benchmark_train" ];then
    curPath=$(readlink -f "$(dirname "$0")")
        echo "curPath:"${curPath}
    cd ${curPath}/../..
    pip install .
    cd -
    if [ ${model_name} == "conformer" ]; then
        # set the URL for aishell_tiny dataset
        URL='None'
        echo "URL:"${URL}
        if [ ${URL} == 'None' ];then
            echo "please contact author to get the URL.\n"
            exit
        fi
        sed -i "s#^URL_ROOT_TAG#URL_ROOT = '${URL}'#g" ${curPath}/conformer/scripts/aishell_tiny.py
        cp ${curPath}/conformer/scripts/aishell_tiny.py ${curPath}/../../dataset/aishell/
        cd ${curPath}/../../examples/aishell/asr1
        source path.sh
        # download audio data
        sed -i "s#aishell.py#aishell_tiny.py#g" ./local/data.sh
        bash ./local/data.sh || exit -1
        if [ $? -ne 0 ]; then
        exit 1
        fi
        mkdir -p ${curPath}/conformer/benchmark_train/
        cp -rf conf ${curPath}/conformer/benchmark_train/
        cp -rf data ${curPath}/conformer/benchmark_train/
        cd ${curPath}
        sed -i "s#accum_grad: 2#accum_grad: 1#g" ${curPath}/conformer/benchmark_train/conf/conformer.yaml
        sed -i "s#data/#test_tipc/conformer/benchmark_train/data/#g" ${curPath}/conformer/benchmark_train/conf/conformer.yaml
        sed -i "s#conf/#test_tipc/conformer/benchmark_train/conf/#g" ${curPath}/conformer/benchmark_train/conf/conformer.yaml
        sed -i "s#data/#test_tipc/conformer/benchmark_train/data/#g" ${curPath}/conformer/benchmark_train/conf/tuning/decode.yaml
        sed -i "s#data/#test_tipc/conformer/benchmark_train/data/#g" ${curPath}/conformer/benchmark_train/conf/preprocess.yaml
    fi
 fi
--- a/tests/test_tipc/test_train_inference_python.sh
+++ b/tests/test_tipc/test_train_inference_python.sh
@ -0,0 +1,385 @@
 #!/bin/bash
 source test_tipc/common_func.sh
 FILENAME=$1
 # MODE be one of ['lite_train_lite_infer' 'lite_train_whole_infer' 'whole_train_whole_infer', 'whole_infer', 'klquant_whole_infer']
 MODE=$2
 dataline=$(awk 'NR==1, NR==51{print}'  $FILENAME)
 # parser params
 IFS=$'\n'
 lines=(${dataline})
 # The training params
 model_name=$(func_parser_value "${lines[1]}")
 python=$(func_parser_value "${lines[2]}")
 gpu_list=$(func_parser_value "${lines[3]}")
 train_use_gpu_key=$(func_parser_key "${lines[4]}")
 train_use_gpu_value=$(func_parser_value "${lines[4]}")
 autocast_list=$(func_parser_value "${lines[5]}")
 autocast_key=$(func_parser_key "${lines[5]}")
 epoch_key=$(func_parser_key "${lines[6]}")
 epoch_num=$(func_parser_params "${lines[6]}")
 save_model_key=$(func_parser_key "${lines[7]}")
 train_batch_key=$(func_parser_key "${lines[8]}")
 train_batch_value=$(func_parser_params "${lines[8]}")
 pretrain_model_key=$(func_parser_key "${lines[9]}")
 pretrain_model_value=$(func_parser_value "${lines[9]}")
 train_model_name=$(func_parser_value "${lines[10]}")
 train_infer_img_dir=$(func_parser_value "${lines[11]}")
 train_param_key1=$(func_parser_key "${lines[12]}")
 train_param_value1=$(func_parser_value "${lines[12]}")
 trainer_list=$(func_parser_value "${lines[14]}")
 trainer_norm=$(func_parser_key "${lines[15]}")
 norm_trainer=$(func_parser_value "${lines[15]}")
 pact_key=$(func_parser_key "${lines[16]}")
 pact_trainer=$(func_parser_value "${lines[16]}")
 fpgm_key=$(func_parser_key "${lines[17]}")
 fpgm_trainer=$(func_parser_value "${lines[17]}")
 distill_key=$(func_parser_key "${lines[18]}")
 distill_trainer=$(func_parser_value "${lines[18]}")
 trainer_key1=$(func_parser_key "${lines[19]}")
 trainer_value1=$(func_parser_value "${lines[19]}")
 trainer_key2=$(func_parser_key "${lines[20]}")
 trainer_value2=$(func_parser_value "${lines[20]}")
 eval_py=$(func_parser_value "${lines[23]}")
 eval_key1=$(func_parser_key "${lines[24]}")
 eval_value1=$(func_parser_value "${lines[24]}")
 save_infer_key=$(func_parser_key "${lines[27]}")
 save_infer_value=$(func_parser_value "${lines[27]}")
 export_weight=$(func_parser_key "${lines[28]}")
 norm_export=$(func_parser_value "${lines[29]}")
 pact_export=$(func_parser_value "${lines[30]}")
 fpgm_export=$(func_parser_value "${lines[31]}")
 distill_export=$(func_parser_value "${lines[32]}")
 export_key1=$(func_parser_key "${lines[33]}")
 export_value1=$(func_parser_value "${lines[33]}")
 export_key2=$(func_parser_key "${lines[34]}")
 export_value2=$(func_parser_value "${lines[34]}")
 inference_dir=$(func_parser_value "${lines[35]}")
 # parser inference model
 infer_model_dir_list=$(func_parser_value "${lines[36]}")
 infer_export_list=$(func_parser_value "${lines[37]}")
 infer_is_quant=$(func_parser_value "${lines[38]}")
 # parser inference
 inference_py=$(func_parser_value "${lines[39]}")
 use_gpu_key=$(func_parser_key "${lines[40]}")
 use_gpu_list=$(func_parser_value "${lines[40]}")
 use_mkldnn_key=$(func_parser_key "${lines[41]}")
 use_mkldnn_list=$(func_parser_value "${lines[41]}")
 cpu_threads_key=$(func_parser_key "${lines[42]}")
 cpu_threads_list=$(func_parser_value "${lines[42]}")
 batch_size_key=$(func_parser_key "${lines[43]}")
 batch_size_list=$(func_parser_value "${lines[43]}")
 use_trt_key=$(func_parser_key "${lines[44]}")
 use_trt_list=$(func_parser_value "${lines[44]}")
 precision_key=$(func_parser_key "${lines[45]}")
 precision_list=$(func_parser_value "${lines[45]}")
 infer_model_key=$(func_parser_key "${lines[46]}")
 image_dir_key=$(func_parser_key "${lines[47]}")
 infer_img_dir=$(func_parser_value "${lines[47]}")
 save_log_key=$(func_parser_key "${lines[48]}")
 benchmark_key=$(func_parser_key "${lines[49]}")
 benchmark_value=$(func_parser_value "${lines[49]}")
 infer_key1=$(func_parser_key "${lines[50]}")
 infer_value1=$(func_parser_value "${lines[50]}")
 # parser klquant_infer
 if [ ${MODE} = "klquant_whole_infer" ]; then
    dataline=$(awk 'NR==1 NR==17{print}'  $FILENAME)
    lines=(${dataline})
    model_name=$(func_parser_value "${lines[1]}")
    python=$(func_parser_value "${lines[2]}")
    # parser inference model
    infer_model_dir_list=$(func_parser_value "${lines[3]}")
    infer_export_list=$(func_parser_value "${lines[4]}")
    infer_is_quant=$(func_parser_value "${lines[5]}")
    # parser inference
    inference_py=$(func_parser_value "${lines[6]}")
    use_gpu_key=$(func_parser_key "${lines[7]}")
    use_gpu_list=$(func_parser_value "${lines[7]}")
    use_mkldnn_key=$(func_parser_key "${lines[8]}")
    use_mkldnn_list=$(func_parser_value "${lines[8]}")
    cpu_threads_key=$(func_parser_key "${lines[9]}")
    cpu_threads_list=$(func_parser_value "${lines[9]}")
    batch_size_key=$(func_parser_key "${lines[10]}")
    batch_size_list=$(func_parser_value "${lines[10]}")
    use_trt_key=$(func_parser_key "${lines[11]}")
    use_trt_list=$(func_parser_value "${lines[11]}")
    precision_key=$(func_parser_key "${lines[12]}")
    precision_list=$(func_parser_value "${lines[12]}")
    infer_model_key=$(func_parser_key "${lines[13]}")
    image_dir_key=$(func_parser_key "${lines[14]}")
    infer_img_dir=$(func_parser_value "${lines[14]}")
    save_log_key=$(func_parser_key "${lines[15]}")
    benchmark_key=$(func_parser_key "${lines[16]}")
    benchmark_value=$(func_parser_value "${lines[16]}")
    infer_key1=$(func_parser_key "${lines[17]}")
    infer_value1=$(func_parser_value "${lines[17]}")
 fi
 save_model_value=$(func_parser_value "${lines[7]}")
 if [[ ${save_model_value} = " " ]] || [[ ${save_model_value} = "null" ]] || [[ ${save_model_value} = "" ]];then
    LOG_PATH="./test_tipc/output"
 else
    LOG_PATH=${save_model_value}
 fi
 mkdir -p ${LOG_PATH}
 status_log="${LOG_PATH}/results_python.log"
 function func_inference(){
    IFS='|'
    _python=$1
    _script=$2
    _model_dir=$3
    _log_path=$4
    _img_dir=$5
    _flag_quant=$6
    # inference
    for use_gpu in ${use_gpu_list[*]}; do
        if [ ${use_gpu} = "False" ] || [ ${use_gpu} = "cpu" ]; then
            for use_mkldnn in ${use_mkldnn_list[*]}; do
                if [ ${use_mkldnn} = "False" ] && [ ${_flag_quant} = "True" ]; then
                    continue
                fi
                for threads in ${cpu_threads_list[*]}; do
                    for batch_size in ${batch_size_list[*]}; do
                        for precision in ${precision_list[*]}; do
                            if [ ${use_mkldnn} = "False" ] && [ ${precision} = "fp16" ]; then
                                continue
                            fi # skip when enable fp16 but disable mkldnn
                            if [ ${_flag_quant} = "True" ] && [ ${precision} != "int8" ]; then
                                continue
                            fi # skip when quant model inference but precision is not int8
                            set_precision=$(func_set_params "${precision_key}" "${precision}")
                            _save_log_path="${_log_path}/python_infer_cpu_usemkldnn_${use_mkldnn}_threads_${threads}_precision_${precision}_batchsize_${batch_size}.log"
                            set_infer_data=$(func_set_params "${image_dir_key}" "${_img_dir}")
                            set_benchmark=$(func_set_params "${benchmark_key}" "${benchmark_value}")
                            set_batchsize=$(func_set_params "${batch_size_key}" "${batch_size}")
                            set_cpu_threads=$(func_set_params "${cpu_threads_key}" "${threads}")
                            set_model_dir=$(func_set_params "${infer_model_key}" "${_model_dir}")
                            set_infer_params1=$(func_set_params "${infer_key1}" "${infer_value1}")
                            command="${_python} ${_script} ${use_gpu_key}=${use_gpu} ${use_mkldnn_key}=${use_mkldnn} ${set_cpu_threads} ${set_model_dir} ${set_batchsize} ${set_infer_data} ${set_benchmark} ${set_precision} ${set_infer_params1} > ${_save_log_path} 2>&1 "
                            eval $command
                            last_status=${PIPESTATUS[0]}
                            eval "cat ${_save_log_path}"
                            status_check $last_status "${command}" "${status_log}"
                        done
                    done
                done
            done
        elif [ ${use_gpu} = "True" ] || [ ${use_gpu} = "gpu" ]; then
            for use_trt in ${use_trt_list[*]}; do
                for precision in ${precision_list[*]}; do
                    if [[ ${_flag_quant} = "False" ]] && [[ ${precision} =~ "int8" ]]; then
                        continue
                    fi
                    if [[ ${precision} =~ "fp16" || ${precision} =~ "int8" ]] && [ ${use_trt} = "False" ]; then
                        continue
                    fi
                    if [[ ${use_trt} = "False" || ${precision} =~ "int8" ]] && [ ${_flag_quant} = "True" ]; then
                        continue
                    fi
                    for batch_size in ${batch_size_list[*]}; do
                        _save_log_path="${_log_path}/python_infer_gpu_usetrt_${use_trt}_precision_${precision}_batchsize_${batch_size}.log"
                        set_infer_data=$(func_set_params "${image_dir_key}" "${_img_dir}")
                        set_benchmark=$(func_set_params "${benchmark_key}" "${benchmark_value}")
                        set_batchsize=$(func_set_params "${batch_size_key}" "${batch_size}")
                        set_tensorrt=$(func_set_params "${use_trt_key}" "${use_trt}")
                        set_precision=$(func_set_params "${precision_key}" "${precision}")
                        set_model_dir=$(func_set_params "${infer_model_key}" "${_model_dir}")
                        set_infer_params1=$(func_set_params "${infer_key1}" "${infer_value1}")
                        command="${_python} ${_script} ${use_gpu_key}=${use_gpu} ${set_tensorrt} ${set_precision} ${set_model_dir} ${set_batchsize} ${set_infer_data} ${set_benchmark} ${set_infer_params1} > ${_save_log_path} 2>&1 "
                        eval $command
                        last_status=${PIPESTATUS[0]}
                        eval "cat ${_save_log_path}"
                        status_check $last_status "${command}" "${status_log}"
                    done
                done
            done
        else
            echo "Does not support hardware other than CPU and GPU Currently!"
        fi
    done
 }
 if [ ${MODE} = "whole_infer" ] || [ ${MODE} = "klquant_whole_infer" ]; then
    GPUID=$3
    if [ ${#GPUID} -le 0 ];then
        env=" "
    else
        env="export CUDA_VISIBLE_DEVICES=${GPUID}"
    fi
    # set CUDA_VISIBLE_DEVICES
    eval $env
    export Count=0
    IFS="|"
    infer_run_exports=(${infer_export_list})
    infer_quant_flag=(${infer_is_quant})
    for infer_model in ${infer_model_dir_list[*]}; do
        # run export
        if [ ${infer_run_exports[Count]} != "null" ];then
            save_infer_dir=$(dirname $infer_model)
            set_export_weight=$(func_set_params "${export_weight}" "${infer_model}")
            set_save_infer_key=$(func_set_params "${save_infer_key}" "${save_infer_dir}")
            # For lac which needs the `data_dir` args
            set_export1_key=$(func_set_params "${export_key1}" "${export_value1}")
            export_cmd="${python} ${infer_run_exports[Count]} ${set_export_weight} ${set_save_infer_key} ${set_export1_key}"
            echo ${infer_run_exports[Count]}
            echo  $export_cmd
            eval $export_cmd
            status_export=$?
            status_check $status_export "${export_cmd}" "${status_log}"
        else
            save_infer_dir=${infer_model}
        fi
        #run inference
        is_quant=${infer_quant_flag[Count]}
        if [ ${MODE} = "klquant_infer" ]; then
            is_quant="True"
        fi
        func_inference "${python}" "${inference_py}" "${save_infer_dir}" "${LOG_PATH}" "${infer_img_dir}" ${is_quant}
        Count=$(($Count + 1))
    done
 else
    IFS="|"
    export Count=0
    USE_GPU_KEY=(${train_use_gpu_value})
    for gpu in ${gpu_list[*]}; do
        train_use_gpu=${USE_GPU_KEY[Count]}
        Count=$(($Count + 1))
        ips=""
        if [ ${gpu} = "-1" ];then
            env=""
        elif [ ${#gpu} -le 1 ];then
            env="export CUDA_VISIBLE_DEVICES=${gpu}"
            eval ${env}
        elif [ ${#gpu} -le 15 ];then
            IFS=","
            array=(${gpu})
            env="export CUDA_VISIBLE_DEVICES=${array[0]}"
            IFS="|"
        else
            IFS=";"
            array=(${gpu})
            ips=${array[0]}
            gpu=${array[1]}
            IFS="|"
            env=" "
        fi
        for autocast in ${autocast_list[*]}; do
            if [ ${autocast} = "amp" ]; then
                set_amp_config="Global.use_amp=True Global.scale_loss=1024.0 Global.use_dynamic_loss_scaling=True"
            else
                set_amp_config=" "
            fi
            for trainer in ${trainer_list[*]}; do
                flag_quant=False
                if [ ${trainer} = ${pact_key} ]; then
                    run_train=${pact_trainer}
                    run_export=${pact_export}
                    flag_quant=True
                elif [ ${trainer} = "${fpgm_key}" ]; then
                    run_train=${fpgm_trainer}
                    run_export=${fpgm_export}
                elif [ ${trainer} = "${distill_key}" ]; then
                    run_train=${distill_trainer}
                    run_export=${distill_export}
                elif [ ${trainer} = ${trainer_key1} ]; then
                    run_train=${trainer_value1}
                    run_export=${export_value1}
                elif [[ ${trainer} = ${trainer_key2} ]]; then
                    run_train=${trainer_value2}
                    run_export=${export_value2}
                else
                    run_train=${norm_trainer}
                    run_export=${norm_export}
                fi
                if [ ${run_train} = "null" ]; then
                    continue
                fi
                set_autocast=$(func_set_params "${autocast_key}" "${autocast}")
                set_epoch=$(func_set_params "${epoch_key}" "${epoch_num}")
                set_pretrain=$(func_set_params "${pretrain_model_key}" "${pretrain_model_value}")
                set_batchsize=$(func_set_params "${train_batch_key}" "${train_batch_value}")
                set_train_params1=$(func_set_params "${train_param_key1}" "${train_param_value1}")
                set_use_gpu=$(func_set_params "${train_use_gpu_key}" "${train_use_gpu}")
                if [ ${#ips} -le 26 ];then
                    save_log="${LOG_PATH}/${trainer}_gpus_${gpu}_autocast_${autocast}"
                    nodes=1
                else
                    IFS=","
                    ips_array=(${ips})
                    IFS="|"
                    nodes=${#ips_array[@]}
                    save_log="${LOG_PATH}/${trainer}_gpus_${gpu}_autocast_${autocast}_nodes_${nodes}"
                fi
                # load pretrain from norm training if current trainer is pact or fpgm trainer
                if ([ ${trainer} = ${pact_key} ] || [ ${trainer} = ${fpgm_key} ]) && [ ${nodes} -le 1 ]; then
                    set_pretrain="${load_norm_train_model}"
                fi
                set_save_model=$(func_set_params "${save_model_key}" "${save_log}")
                if [ ${#gpu} -le 2 ];then  # train with cpu or single gpu
                    cmd="${python} ${run_train} ${set_use_gpu}  ${set_save_model} ${set_epoch} ${set_pretrain} ${set_autocast} ${set_batchsize} ${set_train_params1} ${set_amp_config} "
                elif [ ${#ips} -le 26 ];then  # train with multi-gpu
                    cmd="${python} -m paddle.distributed.launch --gpus=${gpu} ${run_train} ${set_use_gpu} ${set_save_model} ${set_epoch} ${set_pretrain} ${set_autocast} ${set_batchsize} ${set_train_params1} ${set_amp_config}"
                else     # train with multi-machine
                    cmd="${python} -m paddle.distributed.launch --ips=${ips} --gpus=${gpu} ${run_train} ${set_use_gpu} ${set_save_model} ${set_pretrain} ${set_epoch} ${set_autocast} ${set_batchsize} ${set_train_params1} ${set_amp_config}"
                fi
                # run train
                eval "unset CUDA_VISIBLE_DEVICES"
                eval $cmd
                status_check $? "${cmd}" "${status_log}"
                set_eval_pretrain=$(func_set_params "${pretrain_model_key}" "${save_log}/${train_model_name}")
                # save norm trained models to set pretrain for pact training and fpgm training
                if [ ${trainer} = ${trainer_norm} ] && [ ${nodes} -le 1 ]; then
                    load_norm_train_model=${set_eval_pretrain}
                fi
                # run eval
                if [ ${eval_py} != "null" ]; then
                    set_eval_params1=$(func_set_params "${eval_key1}" "${eval_value1}")
                    eval_cmd="${python} ${eval_py} ${set_eval_pretrain} ${set_use_gpu} ${set_eval_params1}"
                    eval $eval_cmd
                    status_check $? "${eval_cmd}" "${status_log}"
                fi
                # run export model
                if [ ${run_export} != "null" ]; then
                    # run export model
                    save_infer_path="${save_infer_value}"
                    set_export_weight=$(func_set_params "${export_weight}" "${save_log}/${train_model_name}")
                    set_save_infer_key=$(func_set_params "${save_infer_key}" "${save_infer_path}")
                    # For lac which needs the `data_dir` args
                    set_export1_key=$(func_set_params "${export_key1}" "${export_value1}")
                    export_cmd="${python} ${run_export} ${set_export_weight} ${set_save_infer_key} ${set_export1_key}"
                    eval $export_cmd
                    status_check $? "${export_cmd}" "${status_log}"
                    #run inference
                    eval $env
                    save_infer_path="${save_infer_value}"
                    if [ ${inference_dir} != "null" ] && [ ${inference_dir} != '##' ]; then
                        infer_model_dir="${save_infer_path}/${inference_dir}"
                    else
                        infer_model_dir=${save_infer_path}
                    fi
                    func_inference "${python}" "${inference_py}" "${infer_model_dir}" "${LOG_PATH}" "${train_infer_img_dir}" "${flag_quant}"
                    eval "unset CUDA_VISIBLE_DEVICES"
                fi
            done  # done with:    for trainer in ${trainer_list[*]}; do
        done      # done with:    for autocast in ${autocast_list[*]}; do
    done          # done with:    for gpu in ${gpu_list[*]}; do
 fi  # end if [ ${MODE} = "infer" ]; then