add tipc benchmark of conformer

4 years ago · aefe9e93a7
parent c66166e0fd
commit aefe9e93a7
8 changed files with 976 additions and 4 deletions
--- a/paddlespeech/s2t/exps/u2/model.py
+++ b/paddlespeech/s2t/exps/u2/model.py
@ -175,7 +175,7 @@ class U2Trainer(Trainer):
                        observation['batch_cost'] = observation[
                            'reader_cost'] + observation['step_cost']
                        observation['samples'] = observation['batch_size']
-                        observation['ips,sent./sec'] = observation[
+                        observation['ips,samples/s'] = observation[
                            'batch_size'] / observation['batch_cost']
                        for k, v in observation.items():
                            msg += f" {k.split(',')[0]}: "
--- a/paddlespeech/s2t/training/trainer.py
+++ b/paddlespeech/s2t/training/trainer.py
@ -252,8 +252,7 @@ class Trainer():
        if self.args.benchmark_max_step and self.iteration > self.args.benchmark_max_step:
            logger.info(
                f"Reach benchmark-max-step: {self.args.benchmark_max_step}")
-            sys.exit(
-                f"Reach benchmark-max-step: {self.args.benchmark_max_step}")
+            sys.exit(0)

    def do_train(self):
        """The training process control by epoch."""
@ -282,7 +281,7 @@ class Trainer():
                        observation['batch_cost'] = observation[
                            'reader_cost'] + observation['step_cost']
                        observation['samples'] = observation['batch_size']
-                        observation['ips[sent./sec]'] = observation[
+                        observation['ips samples/s'] = observation[
                            'batch_size'] / observation['batch_cost']
                        for k, v in observation.items():
                            msg += f" {k}: "
--- a/tests/test_tipc/benchmark_train.sh
+++ b/tests/test_tipc/benchmark_train.sh
@ -0,0 +1,258 @@
+#!/bin/bash
+source test_tipc/common_func.sh
+
+# set env
+python=python
+export model_branch=`git symbolic-ref HEAD 2>/dev/null | cut -d"/" -f 3`
+export model_commit=$(git log|head -n1|awk '{print $2}') 
+export str_tmp=$(echo `pip list|grep paddlepaddle-gpu|awk -F ' ' '{print $2}'`)
+export frame_version=${str_tmp%%.post*}
+export frame_commit=$(echo `${python} -c "import paddle;print(paddle.version.commit)"`)
+
+# run benchmark sh 
+# Usage:
+# bash run_benchmark_train.sh config.txt params
+# or 
+# bash run_benchmark_train.sh config.txt
+
+function func_parser_params(){
+    strs=$1
+    IFS="="
+    array=(${strs})
+    tmp=${array[1]}
+    echo ${tmp}
+}
+
+function func_sed_params(){
+    filename=$1
+    line=$2
+    param_value=$3
+    params=`sed -n "${line}p" $filename`
+    IFS=":"
+    array=(${params})
+    key=${array[0]}
+    value=${array[1]}
+    if [[ $value =~ 'benchmark_train' ]];then
+        IFS='='
+        _val=(${value})
+        param_value="${_val[0]}=${param_value}"
+    fi
+    new_params="${key}:${param_value}"
+    IFS=";"
+    cmd="sed -i '${line}s/.*/${new_params}/' '${filename}'"
+    eval $cmd
+}
+
+function set_gpu_id(){
+    string=$1
+    _str=${string:1:6}
+    IFS="C"
+    arr=(${_str})
+    M=${arr[0]}
+    P=${arr[1]}
+    gn=`expr $P - 1`
+    gpu_num=`expr $gn / $M`
+    seq=`seq -s "," 0 $gpu_num`
+    echo $seq
+}
+
+function get_repo_name(){
+    IFS=";"
+    cur_dir=$(pwd)
+    IFS="/"
+    arr=(${cur_dir})
+    echo ${arr[-1]}
+}
+
+FILENAME=$1
+# copy FILENAME as new
+new_filename="./test_tipc/benchmark_train.txt"
+cmd=`yes|cp $FILENAME $new_filename`
+FILENAME=$new_filename
+# MODE must be one of ['benchmark_train']
+MODE=$2
+PARAMS=$3
+# bash test_tipc/benchmark_train.sh test_tipc/configs/det_mv3_db_v2_0/train_benchmark.txt  benchmark_train dynamic_bs8_null_DP_N1C1
+IFS=$'\n'
+# parser params from train_benchmark.txt
+dataline=`cat $FILENAME`
+# parser params
+IFS=$'\n'
+lines=(${dataline})
+model_name=$(func_parser_value "${lines[1]}")
+
+# 获取benchmark_params所在的行数
+line_num=`grep -n "train_benchmark_params" $FILENAME  | cut -d ":" -f 1`
+# for train log parser
+batch_size=$(func_parser_value "${lines[line_num]}")
+line_num=`expr $line_num + 1`
+fp_items=$(func_parser_value "${lines[line_num]}")
+line_num=`expr $line_num + 1`
+epoch=$(func_parser_value "${lines[line_num]}")
+
+line_num=`expr $line_num + 1`
+profile_option_key=$(func_parser_key "${lines[line_num]}")
+profile_option_params=$(func_parser_value "${lines[line_num]}")
+profile_option="${profile_option_key}:${profile_option_params}"
+
+line_num=`expr $line_num + 1`
+flags_value=$(func_parser_value "${lines[line_num]}")
+# set flags
+IFS=";"
+flags_list=(${flags_value})
+for _flag in ${flags_list[*]}; do
+    cmd="export ${_flag}"
+    eval $cmd
+done
+
+# set log_name
+repo_name=$(get_repo_name )
+SAVE_LOG=${BENCHMARK_LOG_DIR:-$(pwd)}   # */benchmark_log
+mkdir -p "${SAVE_LOG}/benchmark_log/"
+status_log="${SAVE_LOG}/benchmark_log/results.log"
+
+# The number of lines in which train params can be replaced.
+line_python=3
+line_gpuid=4
+line_precision=6
+line_epoch=7
+line_batchsize=9
+line_profile=13
+line_eval_py=24
+line_export_py=30
+
+func_sed_params "$FILENAME" "${line_eval_py}" "null"
+func_sed_params "$FILENAME" "${line_export_py}" "null"
+func_sed_params "$FILENAME" "${line_python}"  "$python"
+
+# if params
+if  [ ! -n "$PARAMS" ] ;then
+    # PARAMS input is not a word.
+    IFS="|"
+    batch_size_list=(${batch_size})
+    fp_items_list=(${fp_items})
+    device_num_list=(N1C4)
+    run_mode="DP"
+else
+    # parser params from input: modeltype_bs${bs_item}_${fp_item}_${run_mode}_${device_num}
+    IFS="_"
+    params_list=(${PARAMS})
+    model_type=${params_list[0]}
+    batch_size=${params_list[1]}
+    batch_size=`echo  ${batch_size} | tr -cd "[0-9]" `
+    precision=${params_list[2]}
+    # run_process_type=${params_list[3]}
+    run_mode=${params_list[3]}
+    device_num=${params_list[4]}
+    IFS=";"
+
+    if [ ${precision} = "null" ];then
+        precision="fp32"
+    fi
+
+    fp_items_list=($precision)
+    batch_size_list=($batch_size)
+    device_num_list=($device_num)
+fi
+
+IFS="|"
+for batch_size in ${batch_size_list[*]}; do 
+    for precision in ${fp_items_list[*]}; do
+        for device_num in ${device_num_list[*]}; do
+            # sed batchsize and precision
+            func_sed_params "$FILENAME" "${line_precision}" "$precision"
+            func_sed_params "$FILENAME" "${line_batchsize}" "$MODE=$batch_size"
+            func_sed_params "$FILENAME" "${line_epoch}" "$MODE=$epoch"
+            gpu_id=$(set_gpu_id $device_num)
+
+            if [ ${#gpu_id} -le 1 ];then
+                run_process_type="SingleP"
+                log_path="$SAVE_LOG/profiling_log"
+                mkdir -p $log_path
+                log_name="${repo_name}_${model_name}_bs${batch_size}_${precision}_${run_process_type}_${run_mode}_${device_num}_profiling"
+                func_sed_params "$FILENAME" "${line_gpuid}" "0"  # sed used gpu_id 
+                # set profile_option params
+                tmp=`sed -i "${line_profile}s/.*/${profile_option}/" "${FILENAME}"`
+
+                # run test_train_inference_python.sh
+                cmd="bash test_tipc/test_train_inference_python.sh ${FILENAME} benchmark_train > ${log_path}/${log_name} 2>&1 "
+                echo $cmd
+                eval $cmd
+                eval "cat ${log_path}/${log_name}"
+
+                # without profile
+                log_path="$SAVE_LOG/train_log"
+                speed_log_path="$SAVE_LOG/index"
+                mkdir -p $log_path
+                mkdir -p $speed_log_path
+                log_name="${repo_name}_${model_name}_bs${batch_size}_${precision}_${run_process_type}_${run_mode}_${device_num}_log"
+                speed_log_name="${repo_name}_${model_name}_bs${batch_size}_${precision}_${run_process_type}_${run_mode}_${device_num}_speed"
+                func_sed_params "$FILENAME" "${line_profile}" "null"  # sed profile_id as null
+                cmd="bash test_tipc/test_train_inference_python.sh ${FILENAME} benchmark_train > ${log_path}/${log_name} 2>&1 "
+                echo $cmd
+                job_bt=`date '+%Y%m%d%H%M%S'`
+                eval $cmd
+                job_et=`date '+%Y%m%d%H%M%S'`
+                export model_run_time=$((${job_et}-${job_bt}))
+                eval "cat ${log_path}/${log_name}"
+
+                # parser log
+                _model_name="${model_name}_bs${batch_size}_${precision}_${run_process_type}_${run_mode}"
+                cmd="${python} ${BENCHMARK_ROOT}/scripts/analysis.py --filename ${log_path}/${log_name} \
+                        --speed_log_file '${speed_log_path}/${speed_log_name}' \
+                        --model_name ${_model_name} \
+                        --base_batch_size ${batch_size} \
+                        --run_mode ${run_mode} \
+                        --run_process_type ${run_process_type} \
+                        --fp_item ${precision} \
+                        --keyword ips: \
+                        --skip_steps 2 \
+                        --device_num ${device_num} \
+                        --speed_unit samples/s \
+                        --convergence_key loss: "
+                echo $cmd
+                eval $cmd
+                last_status=${PIPESTATUS[0]}
+                status_check $last_status "${cmd}" "${status_log}"
+            else
+                IFS=";"
+                unset_env=`unset CUDA_VISIBLE_DEVICES`
+                run_process_type="MultiP"
+                log_path="$SAVE_LOG/train_log"
+                speed_log_path="$SAVE_LOG/index"
+                mkdir -p $log_path
+                mkdir -p $speed_log_path
+                log_name="${repo_name}_${model_name}_bs${batch_size}_${precision}_${run_process_type}_${run_mode}_${device_num}_log"
+                speed_log_name="${repo_name}_${model_name}_bs${batch_size}_${precision}_${run_process_type}_${run_mode}_${device_num}_speed"
+                func_sed_params "$FILENAME" "${line_gpuid}" "$gpu_id"  # sed used gpu_id 
+                func_sed_params "$FILENAME" "${line_profile}" "null"  # sed --profile_option as null
+                cmd="bash test_tipc/test_train_inference_python.sh ${FILENAME} benchmark_train > ${log_path}/${log_name} 2>&1 "
+                echo $cmd
+                job_bt=`date '+%Y%m%d%H%M%S'`
+                eval $cmd
+                job_et=`date '+%Y%m%d%H%M%S'`
+                export model_run_time=$((${job_et}-${job_bt}))
+                eval "cat ${log_path}/${log_name}"
+                # parser log
+                _model_name="${model_name}_bs${batch_size}_${precision}_${run_process_type}_${run_mode}"
+                
+                cmd="${python} ${BENCHMARK_ROOT}/scripts/analysis.py --filename ${log_path}/${log_name} \
+                        --speed_log_file '${speed_log_path}/${speed_log_name}' \
+                        --model_name ${_model_name} \
+                        --base_batch_size ${batch_size} \
+                        --run_mode ${run_mode} \
+                        --run_process_type ${run_process_type} \
+                        --fp_item ${precision} \
+                        --keyword ips: \
+                        --skip_steps 2 \
+                        --device_num ${device_num} \
+                        --speed_unit images/s \
+                        --convergence_key loss: "
+                echo $cmd
+                eval $cmd
+                last_status=${PIPESTATUS[0]}
+                status_check $last_status "${cmd}" "${status_log}"
+            fi
+        done
+    done
+done
--- a/tests/test_tipc/configs/conformer/train_benchmark.txt
+++ b/tests/test_tipc/configs/conformer/train_benchmark.txt
@ -0,0 +1,57 @@
+===========================train_params===========================
+model_name:conformer
+python:python3.7
+gpu_list:0|0,1
+null:null
+null:null
+--benchmark-max-step:50
+null:null
+--benchmark-batch-size:16
+null:null
+null:null
+null:null
+null:null
+##
+trainer:norm_train
+norm_train: ../paddlespeech/s2t/exps/u2/bin/train.py --config  test_tipc/conformer/benchmark_train/conf/conformer.yaml --output test_tipc/conformer/benchmark_train/outputs --seed 1024
+pact_train:null
+fpgm_train:null
+distill_train:null
+null:null
+null:null
+##
+===========================eval_params===========================
+eval:null
+null:null
+##
+===========================infer_params===========================
+null:null
+null:null
+norm_export: null
+quant_export:null
+fpgm_export:null
+distill_export:null
+export1:null
+export2:null
+null:null
+infer_model:null
+infer_export:null
+infer_quant:null
+inference:null
+null:null
+null:null
+null:null
+null:null
+null:null
+null:null
+null:null
+null:null
+null:null
+null:null
+null:null
+===========================train_benchmark_params==========================
+batch_size:16|30
+fp_items:fp32
+iteration:50
+--profiler-options:"batch_range=[10,35];state=GPU;tracer_option=Default;profile_path=model.profile"
+flags:FLAGS_eager_delete_tensor_gb=0.0;FLAGS_fraction_of_gpu_memory_to_use=0.98;FLAGS_conv_workspace_size_limit=4096"
--- a/tests/test_tipc/conformer/scripts/aishell_tiny.py
+++ b/tests/test_tipc/conformer/scripts/aishell_tiny.py
@ -0,0 +1,159 @@
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Prepare Aishell mandarin dataset
+
+Download, unpack and create manifest files.
+Manifest file is a json-format file with each line containing the
+meta data (i.e. audio filepath, transcript and audio duration)
+of each audio file in the data set.
+"""
+import argparse
+import codecs
+import json
+import os
+from pathlib import Path
+
+import soundfile
+
+from utils.utility import download
+from utils.utility import unpack
+
+DATA_HOME = os.path.expanduser('~/.cache/paddle/dataset/speech')
+
+URL_ROOT_TAG
+DATA_URL = URL_ROOT + '/data_aishell_tiny.tgz'
+MD5_DATA = '337b1b1ea016761d4fd3225c5b8799b4'
+RESOURCE_URL = URL_ROOT + '/resource_aishell.tgz'
+MD5_RESOURCE = '957d480a0fcac85fc18e550756f624e5'
+
+parser = argparse.ArgumentParser(description=__doc__)
+parser.add_argument(
+    "--target_dir",
+    default=DATA_HOME + "/Aishell",
+    type=str,
+    help="Directory to save the dataset. (default: %(default)s)")
+parser.add_argument(
+    "--manifest_prefix",
+    default="manifest",
+    type=str,
+    help="Filepath prefix for output manifests. (default: %(default)s)")
+args = parser.parse_args()
+
+
+def create_manifest(data_dir, manifest_path_prefix):
+    print("Creating manifest %s ..." % manifest_path_prefix)
+    json_lines = []
+    transcript_path = os.path.join(data_dir, 'transcript',
+                                   'aishell_transcript_v0.8.txt')
+    transcript_dict = {}
+    for line in codecs.open(transcript_path, 'r', 'utf-8'):
+        line = line.strip()
+        if line == '':
+            continue
+        audio_id, text = line.split(' ', 1)
+        # remove withespace, charactor text
+        text = ''.join(text.split())
+        transcript_dict[audio_id] = text
+
+    data_types = ['train', 'dev', 'test']
+    for dtype in data_types:
+        del json_lines[:]
+        total_sec = 0.0
+        total_text = 0.0
+        total_num = 0
+
+        audio_dir = os.path.join(data_dir, 'wav', dtype)
+        for subfolder, _, filelist in sorted(os.walk(audio_dir)):
+            for fname in filelist:
+                audio_path = os.path.abspath(os.path.join(subfolder, fname))
+                audio_id = os.path.basename(fname)[:-4]
+                # if no transcription for audio then skipped
+                if audio_id not in transcript_dict:
+                    continue
+
+                utt2spk = Path(audio_path).parent.name
+                audio_data, samplerate = soundfile.read(audio_path)
+                duration = float(len(audio_data) / samplerate)
+                text = transcript_dict[audio_id]
+                json_lines.append(
+                    json.dumps(
+                        {
+                            'utt': audio_id,
+                            'utt2spk': str(utt2spk),
+                            'feat': audio_path,
+                            'feat_shape': (duration, ),  # second
+                            'text': text
+                        },
+                        ensure_ascii=False))
+
+                total_sec += duration
+                total_text += len(text)
+                total_num += 1
+
+        manifest_path = manifest_path_prefix + '.' + dtype
+        with codecs.open(manifest_path, 'w', 'utf-8') as fout:
+            for line in json_lines:
+                fout.write(line + '\n')
+
+        manifest_dir = os.path.dirname(manifest_path_prefix)
+        meta_path = os.path.join(manifest_dir, dtype) + '.meta'
+        with open(meta_path, 'w') as f:
+            print(f"{dtype}:", file=f)
+            print(f"{total_num} utts", file=f)
+            print(f"{total_sec / (60*60)} h", file=f)
+            print(f"{total_text} text", file=f)
+            print(f"{total_text / total_sec} text/sec", file=f)
+            print(f"{total_sec / total_num} sec/utt", file=f)
+
+
+def prepare_dataset(url, md5sum, target_dir, manifest_path=None):
+    """Download, unpack and create manifest file."""
+    data_dir = os.path.join(target_dir, 'data_aishell_tiny')
+    if not os.path.exists(data_dir):
+        filepath = download(url, md5sum, target_dir)
+        unpack(filepath, target_dir)
+        # unpack all audio tar files
+        audio_dir = os.path.join(data_dir, 'wav')
+        for subfolder, _, filelist in sorted(os.walk(audio_dir)):
+            for ftar in filelist:
+                unpack(os.path.join(subfolder, ftar), subfolder, True)
+    else:
+        print("Skip downloading and unpacking. Data already exists in %s." %
+              target_dir)
+
+    if manifest_path:
+        create_manifest(data_dir, manifest_path)
+
+
+def main():
+    if args.target_dir.startswith('~'):
+        args.target_dir = os.path.expanduser(args.target_dir)
+
+    prepare_dataset(
+        url=DATA_URL,
+        md5sum=MD5_DATA,
+        target_dir=args.target_dir,
+        manifest_path=args.manifest_prefix)
+
+    prepare_dataset(
+        url=RESOURCE_URL,
+        md5sum=MD5_RESOURCE,
+        target_dir=args.target_dir,
+        manifest_path=None)
+
+    print("Data download and manifest prepare done!")
+
+
+if __name__ == '__main__':
+    main()
--- a/tests/test_tipc/docs/benchmark_train.md
+++ b/tests/test_tipc/docs/benchmark_train.md
@ -0,0 +1,53 @@
+# TIPC Linux端Benchmark测试文档
+
+ 该文档为Benchmark测试说明，Benchmark预测功能测试的主程序为`benchmark_train.sh`，用于验证监控模型训练的性能。
+
+
+ # 1. 测试流程
+ ## 1.1 准备数据和环境安装
+请在 repo根目录/tests 下运行
+运行`test_tipc/prepare.sh`，完成训练数据准备和安装环境流程。
+
+ ```shell
+ # 运行格式：bash test_tipc/prepare.sh  train_benchmark.txt  mode
+ bash test_tipc/prepare.sh test_tipc/configs/conformer/train_benchmark.txt benchmark_train
+ ```
+
+ ## 1.2 功能测试
+ 执行`test_tipc/benchmark_train.sh`，完成模型训练和日志解析
+
+ ```shell
+ # 运行格式：bash test_tipc/benchmark_train.sh train_benchmark.txt mode
+ bash test_tipc/benchmark_train.sh test_tipc/configs/conformer/train_benchmark.txt benchmark_train
+ ```
+
+ `test_tipc/benchmark_train.sh`支持根据传入的第三个参数实现只运行某一个训练配置，如下：
+ ```shell
+ # 运行格式：bash test_tipc/benchmark_train.sh train_benchmark.txt mode
+ bash test_tipc/benchmark_train.sh test_tipc/configs/conformer/train_benchmark.txt benchmark_train  dynamic_bs16_fp32_DP_N1C1
+ ```
+ dynamic_bs16_fp32_DP_N1C1为test_tipc/benchmark_train.sh传入的参数，格式如下：
+ `${modeltype}_${batch_size}_${fp_item}_${run_mode}_${device_num}`
+ 包含的信息有：模型类型、batchsize大小、训练精度如fp32,fp16等、分布式运行模式以及分布式训练使用的机器信息如单机单卡（N1C1）。
+
+
+ ## 2. 日志输出
+
+ 运行后将保存模型的训练日志和解析日志，使用 `test_tipc/configs/conformer/train_benchmark.txt` 参数文件的训练日志解析结果是：
+
+ ```
+ {"model_branch": "dygaph", "model_commit": "", "model_name": "conformer_bs16_fp32_SingleP_DP", "batch_size": 16, "fp_item": "fp32", "run_process_type": "SingleP", "run_mode": "DP", "convergence_value": "", "convergence_key": "loss:", "ips": , "speed_unit": "samples/s", "device_num": "N1C1", "model_run_time": "0", "frame_commit": "", "frame_version": "0.0.0"}
+ ```
+
+ 训练日志和日志解析结果保存在test目录下，文件组织格式如下：
+ ```
+ test/
+ ├── index
+ │   ├── tests_conformer_bs16_fp32_SingleP_DP_N1C1_speed
+ │   └── tests_conformer_bs16_fp32_SingleP_DP_N1C8_speed
+ ├── profiling_log
+ │   └── tests_conformer_bs16_fp32_SingleP_DP_N1C1_profiling
+ └── train_log
+     ├── tests_conformer_bs16_fp32_SingleP_DP_N1C1_log
+     └── tests_conformer_bs16_fp32_SingleP_DP_N1C8_log
+ ```
--- a/tests/test_tipc/prepare.sh
+++ b/tests/test_tipc/prepare.sh
@ -0,0 +1,61 @@
+#!/bin/bash
+source test_tipc/common_func.sh
+
+FILENAME=$1
+
+# MODE be one of ['benchmark_train_lite_infer' 'benchmark_train_whole_infer' 'whole_train_whole_infer',
+#                 'whole_infer', 'klquant_whole_infer',
+#                 'cpp_infer', 'serving_infer', 'benchmark_train']
+
+
+MODE=$2
+
+dataline=$(cat ${FILENAME})
+
+# parser params
+IFS=$'\n'
+lines=(${dataline})
+
+# The training params
+model_name=$(func_parser_value "${lines[1]}")
+
+echo "model_name:"${model_name}
+trainer_list=$(func_parser_value "${lines[14]}")
+
+if [ ${MODE} = "benchmark_train" ];then
+    curPath=$(readlink -f "$(dirname "$0")")
+        echo "curPath:"${curPath}
+    cd ${curPath}/../..
+    pip install .
+    cd -
+    if [ ${model_name} == "conformer" ]; then
+        # set the URL for aishell_tiny dataset
+        URL='None'
+        echo "URL:"${URL}
+        if [ ${URL} == 'None' ];then
+            echo "please contact author to get the URL.\n"
+            exit
+        fi
+        sed -i "s#^URL_ROOT_TAG#URL_ROOT = '${URL}'#g" ${curPath}/conformer/scripts/aishell_tiny.py
+        cp ${curPath}/conformer/scripts/aishell_tiny.py ${curPath}/../../dataset/aishell/
+        cd ${curPath}/../../examples/aishell/asr1
+        source path.sh
+        # download audio data
+        sed -i "s#aishell.py#aishell_tiny.py#g" ./local/data.sh
+        bash ./local/data.sh || exit -1
+        if [ $? -ne 0 ]; then
+        exit 1
+        fi
+        mkdir -p ${curPath}/conformer/benchmark_train/
+        cp -rf conf ${curPath}/conformer/benchmark_train/
+        cp -rf data ${curPath}/conformer/benchmark_train/
+        cd ${curPath}
+
+        sed -i "s#accum_grad: 2#accum_grad: 1#g" ${curPath}/conformer/benchmark_train/conf/conformer.yaml
+        sed -i "s#data/#test_tipc/conformer/benchmark_train/data/#g" ${curPath}/conformer/benchmark_train/conf/conformer.yaml
+        sed -i "s#conf/#test_tipc/conformer/benchmark_train/conf/#g" ${curPath}/conformer/benchmark_train/conf/conformer.yaml
+        sed -i "s#data/#test_tipc/conformer/benchmark_train/data/#g" ${curPath}/conformer/benchmark_train/conf/tuning/decode.yaml
+        sed -i "s#data/#test_tipc/conformer/benchmark_train/data/#g" ${curPath}/conformer/benchmark_train/conf/preprocess.yaml
+
+    fi
+fi
--- a/tests/test_tipc/test_train_inference_python.sh
+++ b/tests/test_tipc/test_train_inference_python.sh
@ -0,0 +1,385 @@
+#!/bin/bash
+source test_tipc/common_func.sh
+
+FILENAME=$1
+# MODE be one of ['lite_train_lite_infer' 'lite_train_whole_infer' 'whole_train_whole_infer', 'whole_infer', 'klquant_whole_infer']
+MODE=$2
+
+dataline=$(awk 'NR==1, NR==51{print}'  $FILENAME)
+
+# parser params
+IFS=$'\n'
+lines=(${dataline})
+
+# The training params
+model_name=$(func_parser_value "${lines[1]}")
+python=$(func_parser_value "${lines[2]}")
+gpu_list=$(func_parser_value "${lines[3]}")
+train_use_gpu_key=$(func_parser_key "${lines[4]}")
+train_use_gpu_value=$(func_parser_value "${lines[4]}")
+autocast_list=$(func_parser_value "${lines[5]}")
+autocast_key=$(func_parser_key "${lines[5]}")
+epoch_key=$(func_parser_key "${lines[6]}")
+epoch_num=$(func_parser_params "${lines[6]}")
+save_model_key=$(func_parser_key "${lines[7]}")
+train_batch_key=$(func_parser_key "${lines[8]}")
+train_batch_value=$(func_parser_params "${lines[8]}")
+pretrain_model_key=$(func_parser_key "${lines[9]}")
+pretrain_model_value=$(func_parser_value "${lines[9]}")
+train_model_name=$(func_parser_value "${lines[10]}")
+train_infer_img_dir=$(func_parser_value "${lines[11]}")
+train_param_key1=$(func_parser_key "${lines[12]}")
+train_param_value1=$(func_parser_value "${lines[12]}")
+
+trainer_list=$(func_parser_value "${lines[14]}")
+trainer_norm=$(func_parser_key "${lines[15]}")
+norm_trainer=$(func_parser_value "${lines[15]}")
+pact_key=$(func_parser_key "${lines[16]}")
+pact_trainer=$(func_parser_value "${lines[16]}")
+fpgm_key=$(func_parser_key "${lines[17]}")
+fpgm_trainer=$(func_parser_value "${lines[17]}")
+distill_key=$(func_parser_key "${lines[18]}")
+distill_trainer=$(func_parser_value "${lines[18]}")
+trainer_key1=$(func_parser_key "${lines[19]}")
+trainer_value1=$(func_parser_value "${lines[19]}")
+trainer_key2=$(func_parser_key "${lines[20]}")
+trainer_value2=$(func_parser_value "${lines[20]}")
+
+eval_py=$(func_parser_value "${lines[23]}")
+eval_key1=$(func_parser_key "${lines[24]}")
+eval_value1=$(func_parser_value "${lines[24]}")
+
+save_infer_key=$(func_parser_key "${lines[27]}")
+save_infer_value=$(func_parser_value "${lines[27]}")
+export_weight=$(func_parser_key "${lines[28]}")
+norm_export=$(func_parser_value "${lines[29]}")
+pact_export=$(func_parser_value "${lines[30]}")
+fpgm_export=$(func_parser_value "${lines[31]}")
+distill_export=$(func_parser_value "${lines[32]}")
+export_key1=$(func_parser_key "${lines[33]}")
+export_value1=$(func_parser_value "${lines[33]}")
+export_key2=$(func_parser_key "${lines[34]}")
+export_value2=$(func_parser_value "${lines[34]}")
+inference_dir=$(func_parser_value "${lines[35]}")
+
+# parser inference model
+infer_model_dir_list=$(func_parser_value "${lines[36]}")
+infer_export_list=$(func_parser_value "${lines[37]}")
+infer_is_quant=$(func_parser_value "${lines[38]}")
+# parser inference
+inference_py=$(func_parser_value "${lines[39]}")
+use_gpu_key=$(func_parser_key "${lines[40]}")
+use_gpu_list=$(func_parser_value "${lines[40]}")
+use_mkldnn_key=$(func_parser_key "${lines[41]}")
+use_mkldnn_list=$(func_parser_value "${lines[41]}")
+cpu_threads_key=$(func_parser_key "${lines[42]}")
+cpu_threads_list=$(func_parser_value "${lines[42]}")
+batch_size_key=$(func_parser_key "${lines[43]}")
+batch_size_list=$(func_parser_value "${lines[43]}")
+use_trt_key=$(func_parser_key "${lines[44]}")
+use_trt_list=$(func_parser_value "${lines[44]}")
+precision_key=$(func_parser_key "${lines[45]}")
+precision_list=$(func_parser_value "${lines[45]}")
+infer_model_key=$(func_parser_key "${lines[46]}")
+image_dir_key=$(func_parser_key "${lines[47]}")
+infer_img_dir=$(func_parser_value "${lines[47]}")
+save_log_key=$(func_parser_key "${lines[48]}")
+benchmark_key=$(func_parser_key "${lines[49]}")
+benchmark_value=$(func_parser_value "${lines[49]}")
+infer_key1=$(func_parser_key "${lines[50]}")
+infer_value1=$(func_parser_value "${lines[50]}")
+
+# parser klquant_infer
+if [ ${MODE} = "klquant_whole_infer" ]; then
+    dataline=$(awk 'NR==1 NR==17{print}'  $FILENAME)
+    lines=(${dataline})
+    model_name=$(func_parser_value "${lines[1]}")
+    python=$(func_parser_value "${lines[2]}")
+    # parser inference model
+    infer_model_dir_list=$(func_parser_value "${lines[3]}")
+    infer_export_list=$(func_parser_value "${lines[4]}")
+    infer_is_quant=$(func_parser_value "${lines[5]}")
+    # parser inference
+    inference_py=$(func_parser_value "${lines[6]}")
+    use_gpu_key=$(func_parser_key "${lines[7]}")
+    use_gpu_list=$(func_parser_value "${lines[7]}")
+    use_mkldnn_key=$(func_parser_key "${lines[8]}")
+    use_mkldnn_list=$(func_parser_value "${lines[8]}")
+    cpu_threads_key=$(func_parser_key "${lines[9]}")
+    cpu_threads_list=$(func_parser_value "${lines[9]}")
+    batch_size_key=$(func_parser_key "${lines[10]}")
+    batch_size_list=$(func_parser_value "${lines[10]}")
+    use_trt_key=$(func_parser_key "${lines[11]}")
+    use_trt_list=$(func_parser_value "${lines[11]}")
+    precision_key=$(func_parser_key "${lines[12]}")
+    precision_list=$(func_parser_value "${lines[12]}")
+    infer_model_key=$(func_parser_key "${lines[13]}")
+    image_dir_key=$(func_parser_key "${lines[14]}")
+    infer_img_dir=$(func_parser_value "${lines[14]}")
+    save_log_key=$(func_parser_key "${lines[15]}")
+    benchmark_key=$(func_parser_key "${lines[16]}")
+    benchmark_value=$(func_parser_value "${lines[16]}")
+    infer_key1=$(func_parser_key "${lines[17]}")
+    infer_value1=$(func_parser_value "${lines[17]}")
+fi
+
+save_model_value=$(func_parser_value "${lines[7]}")
+if [[ ${save_model_value} = " " ]] || [[ ${save_model_value} = "null" ]] || [[ ${save_model_value} = "" ]];then
+    LOG_PATH="./test_tipc/output"
+else
+    LOG_PATH=${save_model_value}
+fi
+mkdir -p ${LOG_PATH}
+status_log="${LOG_PATH}/results_python.log"
+
+
+function func_inference(){
+    IFS='|'
+    _python=$1
+    _script=$2
+    _model_dir=$3
+    _log_path=$4
+    _img_dir=$5
+    _flag_quant=$6
+    # inference
+    for use_gpu in ${use_gpu_list[*]}; do
+        if [ ${use_gpu} = "False" ] || [ ${use_gpu} = "cpu" ]; then
+            for use_mkldnn in ${use_mkldnn_list[*]}; do
+                if [ ${use_mkldnn} = "False" ] && [ ${_flag_quant} = "True" ]; then
+                    continue
+                fi
+                for threads in ${cpu_threads_list[*]}; do
+                    for batch_size in ${batch_size_list[*]}; do
+                        for precision in ${precision_list[*]}; do
+                            if [ ${use_mkldnn} = "False" ] && [ ${precision} = "fp16" ]; then
+                                continue
+                            fi # skip when enable fp16 but disable mkldnn
+                            if [ ${_flag_quant} = "True" ] && [ ${precision} != "int8" ]; then
+                                continue
+                            fi # skip when quant model inference but precision is not int8
+                            set_precision=$(func_set_params "${precision_key}" "${precision}")
+
+                            _save_log_path="${_log_path}/python_infer_cpu_usemkldnn_${use_mkldnn}_threads_${threads}_precision_${precision}_batchsize_${batch_size}.log"
+                            set_infer_data=$(func_set_params "${image_dir_key}" "${_img_dir}")
+                            set_benchmark=$(func_set_params "${benchmark_key}" "${benchmark_value}")
+                            set_batchsize=$(func_set_params "${batch_size_key}" "${batch_size}")
+                            set_cpu_threads=$(func_set_params "${cpu_threads_key}" "${threads}")
+                            set_model_dir=$(func_set_params "${infer_model_key}" "${_model_dir}")
+                            set_infer_params1=$(func_set_params "${infer_key1}" "${infer_value1}")
+                            command="${_python} ${_script} ${use_gpu_key}=${use_gpu} ${use_mkldnn_key}=${use_mkldnn} ${set_cpu_threads} ${set_model_dir} ${set_batchsize} ${set_infer_data} ${set_benchmark} ${set_precision} ${set_infer_params1} > ${_save_log_path} 2>&1 "
+                            eval $command
+                            last_status=${PIPESTATUS[0]}
+                            eval "cat ${_save_log_path}"
+                            status_check $last_status "${command}" "${status_log}"
+                        done
+                    done
+                done
+            done
+        elif [ ${use_gpu} = "True" ] || [ ${use_gpu} = "gpu" ]; then
+            for use_trt in ${use_trt_list[*]}; do
+                for precision in ${precision_list[*]}; do
+                    if [[ ${_flag_quant} = "False" ]] && [[ ${precision} =~ "int8" ]]; then
+                        continue
+                    fi
+                    if [[ ${precision} =~ "fp16" || ${precision} =~ "int8" ]] && [ ${use_trt} = "False" ]; then
+                        continue
+                    fi
+                    if [[ ${use_trt} = "False" || ${precision} =~ "int8" ]] && [ ${_flag_quant} = "True" ]; then
+                        continue
+                    fi
+                    for batch_size in ${batch_size_list[*]}; do
+                        _save_log_path="${_log_path}/python_infer_gpu_usetrt_${use_trt}_precision_${precision}_batchsize_${batch_size}.log"
+                        set_infer_data=$(func_set_params "${image_dir_key}" "${_img_dir}")
+                        set_benchmark=$(func_set_params "${benchmark_key}" "${benchmark_value}")
+                        set_batchsize=$(func_set_params "${batch_size_key}" "${batch_size}")
+                        set_tensorrt=$(func_set_params "${use_trt_key}" "${use_trt}")
+                        set_precision=$(func_set_params "${precision_key}" "${precision}")
+                        set_model_dir=$(func_set_params "${infer_model_key}" "${_model_dir}")
+                        set_infer_params1=$(func_set_params "${infer_key1}" "${infer_value1}")
+                        command="${_python} ${_script} ${use_gpu_key}=${use_gpu} ${set_tensorrt} ${set_precision} ${set_model_dir} ${set_batchsize} ${set_infer_data} ${set_benchmark} ${set_infer_params1} > ${_save_log_path} 2>&1 "
+                        eval $command
+                        last_status=${PIPESTATUS[0]}
+                        eval "cat ${_save_log_path}"
+                        status_check $last_status "${command}" "${status_log}"
+
+                    done
+                done
+            done
+        else
+            echo "Does not support hardware other than CPU and GPU Currently!"
+        fi
+    done
+}
+
+if [ ${MODE} = "whole_infer" ] || [ ${MODE} = "klquant_whole_infer" ]; then
+    GPUID=$3
+    if [ ${#GPUID} -le 0 ];then
+        env=" "
+    else
+        env="export CUDA_VISIBLE_DEVICES=${GPUID}"
+    fi
+    # set CUDA_VISIBLE_DEVICES
+    eval $env
+    export Count=0
+    IFS="|"
+    infer_run_exports=(${infer_export_list})
+    infer_quant_flag=(${infer_is_quant})
+    for infer_model in ${infer_model_dir_list[*]}; do
+        # run export
+        if [ ${infer_run_exports[Count]} != "null" ];then
+            save_infer_dir=$(dirname $infer_model)
+            set_export_weight=$(func_set_params "${export_weight}" "${infer_model}")
+            set_save_infer_key=$(func_set_params "${save_infer_key}" "${save_infer_dir}")
+            # For lac which needs the `data_dir` args
+            set_export1_key=$(func_set_params "${export_key1}" "${export_value1}")
+            export_cmd="${python} ${infer_run_exports[Count]} ${set_export_weight} ${set_save_infer_key} ${set_export1_key}"
+            echo ${infer_run_exports[Count]}
+            echo  $export_cmd
+            eval $export_cmd
+            status_export=$?
+            status_check $status_export "${export_cmd}" "${status_log}"
+        else
+            save_infer_dir=${infer_model}
+        fi
+        #run inference
+        is_quant=${infer_quant_flag[Count]}
+        if [ ${MODE} = "klquant_infer" ]; then
+            is_quant="True"
+        fi
+        func_inference "${python}" "${inference_py}" "${save_infer_dir}" "${LOG_PATH}" "${infer_img_dir}" ${is_quant}
+        Count=$(($Count + 1))
+    done
+else
+    IFS="|"
+    export Count=0
+    USE_GPU_KEY=(${train_use_gpu_value})
+    for gpu in ${gpu_list[*]}; do
+        train_use_gpu=${USE_GPU_KEY[Count]}
+        Count=$(($Count + 1))
+        ips=""
+        if [ ${gpu} = "-1" ];then
+            env=""
+        elif [ ${#gpu} -le 1 ];then
+            env="export CUDA_VISIBLE_DEVICES=${gpu}"
+            eval ${env}
+        elif [ ${#gpu} -le 15 ];then
+            IFS=","
+            array=(${gpu})
+            env="export CUDA_VISIBLE_DEVICES=${array[0]}"
+            IFS="|"
+        else
+            IFS=";"
+            array=(${gpu})
+            ips=${array[0]}
+            gpu=${array[1]}
+            IFS="|"
+            env=" "
+        fi
+        for autocast in ${autocast_list[*]}; do
+            if [ ${autocast} = "amp" ]; then
+                set_amp_config="Global.use_amp=True Global.scale_loss=1024.0 Global.use_dynamic_loss_scaling=True"
+            else
+                set_amp_config=" "
+            fi
+            for trainer in ${trainer_list[*]}; do
+                flag_quant=False
+                if [ ${trainer} = ${pact_key} ]; then
+                    run_train=${pact_trainer}
+                    run_export=${pact_export}
+                    flag_quant=True
+                elif [ ${trainer} = "${fpgm_key}" ]; then
+                    run_train=${fpgm_trainer}
+                    run_export=${fpgm_export}
+                elif [ ${trainer} = "${distill_key}" ]; then
+                    run_train=${distill_trainer}
+                    run_export=${distill_export}
+                elif [ ${trainer} = ${trainer_key1} ]; then
+                    run_train=${trainer_value1}
+                    run_export=${export_value1}
+                elif [[ ${trainer} = ${trainer_key2} ]]; then
+                    run_train=${trainer_value2}
+                    run_export=${export_value2}
+                else
+                    run_train=${norm_trainer}
+                    run_export=${norm_export}
+                fi
+
+                if [ ${run_train} = "null" ]; then
+                    continue
+                fi
+                set_autocast=$(func_set_params "${autocast_key}" "${autocast}")
+                set_epoch=$(func_set_params "${epoch_key}" "${epoch_num}")
+                set_pretrain=$(func_set_params "${pretrain_model_key}" "${pretrain_model_value}")
+                set_batchsize=$(func_set_params "${train_batch_key}" "${train_batch_value}")
+                set_train_params1=$(func_set_params "${train_param_key1}" "${train_param_value1}")
+                set_use_gpu=$(func_set_params "${train_use_gpu_key}" "${train_use_gpu}")
+                if [ ${#ips} -le 26 ];then
+                    save_log="${LOG_PATH}/${trainer}_gpus_${gpu}_autocast_${autocast}"
+                    nodes=1
+                else
+                    IFS=","
+                    ips_array=(${ips})
+                    IFS="|"
+                    nodes=${#ips_array[@]}
+                    save_log="${LOG_PATH}/${trainer}_gpus_${gpu}_autocast_${autocast}_nodes_${nodes}"
+                fi
+
+                # load pretrain from norm training if current trainer is pact or fpgm trainer
+                if ([ ${trainer} = ${pact_key} ] || [ ${trainer} = ${fpgm_key} ]) && [ ${nodes} -le 1 ]; then
+                    set_pretrain="${load_norm_train_model}"
+                fi
+
+                set_save_model=$(func_set_params "${save_model_key}" "${save_log}")
+                if [ ${#gpu} -le 2 ];then  # train with cpu or single gpu
+                    cmd="${python} ${run_train} ${set_use_gpu}  ${set_save_model} ${set_epoch} ${set_pretrain} ${set_autocast} ${set_batchsize} ${set_train_params1} ${set_amp_config} "
+                elif [ ${#ips} -le 26 ];then  # train with multi-gpu
+                    cmd="${python} -m paddle.distributed.launch --gpus=${gpu} ${run_train} ${set_use_gpu} ${set_save_model} ${set_epoch} ${set_pretrain} ${set_autocast} ${set_batchsize} ${set_train_params1} ${set_amp_config}"
+                else     # train with multi-machine
+                    cmd="${python} -m paddle.distributed.launch --ips=${ips} --gpus=${gpu} ${run_train} ${set_use_gpu} ${set_save_model} ${set_pretrain} ${set_epoch} ${set_autocast} ${set_batchsize} ${set_train_params1} ${set_amp_config}"
+                fi
+                # run train
+                eval "unset CUDA_VISIBLE_DEVICES"
+                eval $cmd
+                status_check $? "${cmd}" "${status_log}"
+
+                set_eval_pretrain=$(func_set_params "${pretrain_model_key}" "${save_log}/${train_model_name}")
+                # save norm trained models to set pretrain for pact training and fpgm training
+                if [ ${trainer} = ${trainer_norm} ] && [ ${nodes} -le 1 ]; then
+                    load_norm_train_model=${set_eval_pretrain}
+                fi
+                # run eval
+                if [ ${eval_py} != "null" ]; then
+                    set_eval_params1=$(func_set_params "${eval_key1}" "${eval_value1}")
+                    eval_cmd="${python} ${eval_py} ${set_eval_pretrain} ${set_use_gpu} ${set_eval_params1}"
+                    eval $eval_cmd
+                    status_check $? "${eval_cmd}" "${status_log}"
+                fi
+                # run export model
+                if [ ${run_export} != "null" ]; then
+                    # run export model
+                    save_infer_path="${save_infer_value}"
+                    set_export_weight=$(func_set_params "${export_weight}" "${save_log}/${train_model_name}")
+                    set_save_infer_key=$(func_set_params "${save_infer_key}" "${save_infer_path}")
+                    # For lac which needs the `data_dir` args
+                    set_export1_key=$(func_set_params "${export_key1}" "${export_value1}")
+                    export_cmd="${python} ${run_export} ${set_export_weight} ${set_save_infer_key} ${set_export1_key}"
+                    eval $export_cmd
+                    status_check $? "${export_cmd}" "${status_log}"
+
+                    #run inference
+                    eval $env
+                    save_infer_path="${save_infer_value}"
+
+                    if [ ${inference_dir} != "null" ] && [ ${inference_dir} != '##' ]; then
+                        infer_model_dir="${save_infer_path}/${inference_dir}"
+                    else
+                        infer_model_dir=${save_infer_path}
+                    fi
+                    func_inference "${python}" "${inference_py}" "${infer_model_dir}" "${LOG_PATH}" "${train_infer_img_dir}" "${flag_quant}"
+
+                    eval "unset CUDA_VISIBLE_DEVICES"
+                fi
+            done  # done with:    for trainer in ${trainer_list[*]}; do
+        done      # done with:    for autocast in ${autocast_list[*]}; do
+    done          # done with:    for gpu in ${gpu_list[*]}; do
+fi  # end if [ ${MODE} = "infer" ]; then