From 438e1bd34fbf5e1c1181559b4f19301657d6b4c7 Mon Sep 17 00:00:00 2001
From: Hui Zhang <zhtclz@foxmail.com>
Date: Wed, 15 Sep 2021 11:03:18 +0000
Subject: [PATCH 1/6] add benmark scripts

---
 tests/benchmark/.gitignore       |   2 +
 tests/benchmark/README.md        |  12 +++
 tests/benchmark/run_all.sh       |  33 ++++++
 tests/benchmark/run_benchmark.sh |  54 ++++++++++
 utils/pd_env_collect.sh          | 167 +++++++++++++++++++++++++++++++
 5 files changed, 268 insertions(+)
 create mode 100644 tests/benchmark/.gitignore
 create mode 100644 tests/benchmark/README.md
 create mode 100644 tests/benchmark/run_all.sh
 create mode 100644 tests/benchmark/run_benchmark.sh
 create mode 100644 utils/pd_env_collect.sh

diff --git a/tests/benchmark/.gitignore b/tests/benchmark/.gitignore
new file mode 100644
index 000000000..7d166b066
--- /dev/null
+++ b/tests/benchmark/.gitignore
@@ -0,0 +1,2 @@
+old-pd_env.txt
+pd_env.txt
diff --git a/tests/benchmark/README.md b/tests/benchmark/README.md
new file mode 100644
index 000000000..8ec43f89e
--- /dev/null
+++ b/tests/benchmark/README.md
@@ -0,0 +1,12 @@
+# Benchmark Test
+
+## Data
+
+* Aishell
+
+## Docker 
+
+```
+registry.baidubce.com/paddlepaddle/paddle   2.1.1-gpu-cuda10.2-cudnn7   59d5ec1de486  
+```
+
diff --git a/tests/benchmark/run_all.sh b/tests/benchmark/run_all.sh
new file mode 100644
index 000000000..7564174b4
--- /dev/null
+++ b/tests/benchmark/run_all.sh
@@ -0,0 +1,33 @@
+#!/bin/bash
+
+# collect env info
+bash ../../utils/pd_env_collect.sh
+
+
+
+# 提供可稳定复现性能的脚本，默认在标准docker环境内py37执行： paddlepaddle/paddle:latest-gpu-cuda10.1-cudnn7  paddle=2.1.2  py=37
+# 执行目录：需说明
+cd **
+# 1 安装该模型需要的依赖 (如需开启优化策略请注明)
+pip install ...
+# 2 拷贝该模型需要数据、预训练模型
+# 3 批量运行（如不方便批量，1，2需放到单个模型中）
+
+model_mode_list=(MobileNetv1 MobileNetv2)
+fp_item_list=(fp32 fp16)
+bs_item=(32 64 96)
+for model_mode in ${model_mode_list[@]}; do
+      for fp_item in ${fp_item_list[@]}; do
+          for bs_item in ${bs_list[@]}
+            do
+            echo "index is speed, 1gpus, begin, ${model_name}"
+            run_mode=sp
+            CUDA_VISIBLE_DEVICES=0 bash run_benchmark.sh ${run_mode} ${bs_item} ${fp_item} 500 ${model_mode}     #  (5min)
+            sleep 60
+            echo "index is speed, 8gpus, run_mode is multi_process, begin, ${model_name}"
+            run_mode=mp
+            CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 bash run_benchmark.sh ${run_mode} ${bs_item} ${fp_item} 500 ${model_mode}
+            sleep 60
+            done
+      done
+done
diff --git a/tests/benchmark/run_benchmark.sh b/tests/benchmark/run_benchmark.sh
new file mode 100644
index 000000000..2b9cf70fd
--- /dev/null
+++ b/tests/benchmark/run_benchmark.sh
@@ -0,0 +1,54 @@
+#!/bin/bash
+
+set -xe
+# 运行示例：CUDA_VISIBLE_DEVICES=0 bash run_benchmark.sh ${run_mode} ${bs_item} ${fp_item} 500 ${model_mode}
+# 参数说明
+function _set_params(){
+    run_mode=${1:-"sp"}          # 单卡sp|多卡mp
+    batch_size=${2:-"64"}
+    fp_item=${3:-"fp32"}        # fp32|fp16
+    max_iter=${4:-"500"}       # 可选，如果需要修改代码提前中断
+    model_name=${5:-"model_name"}
+    run_log_path=${TRAIN_LOG_DIR:-$(pwd)}  # TRAIN_LOG_DIR 后续QA设置该参数
+
+#   以下不用修改
+    device=${CUDA_VISIBLE_DEVICES//,/ }
+    arr=(${device})
+    num_gpu_devices=${#arr[*]}
+    log_file=${run_log_path}/${model_name}_${run_mode}_bs${batch_size}_${fp_item}_${num_gpu_devices}
+}
+function _train(){
+    echo "Train on ${num_gpu_devices} GPUs"
+    echo "current CUDA_VISIBLE_DEVICES=$CUDA_VISIBLE_DEVICES, gpus=$num_gpu_devices, batch_size=$batch_size"
+
+    train_cmd="--model_name=${model_name}
+               --batch_size=${batch_size}
+               --fp=${fp_item} \
+               --max_iter=${max_iter} "
+    case ${run_mode} in
+    sp) train_cmd="python -u tools/train.py "${train_cmd}" ;;
+    mp)
+        train_cmd="python -m paddle.distributed.launch --log_dir=./mylog --gpus=$CUDA_VISIBLE_DEVICES tools/train.py "${train_cmd}"
+        log_parse_file="mylog/workerlog.0" ;;
+    *) echo "choose run_mode(sp or mp)"; exit 1;
+    esac
+# 以下不用修改
+    timeout 15m ${train_cmd} > ${log_file} 2>&1
+    if [ $? -ne 0 ];then
+        echo -e "${model_name}, FAIL"
+        export job_fail_flag=1
+    else
+        echo -e "${model_name}, SUCCESS"
+        export job_fail_flag=0
+    fi
+    kill -9 `ps -ef|grep 'python'|awk '{print $2}'`
+
+    if [ $run_mode = "mp" -a -d mylog ]; then
+        rm ${log_file}
+        cp mylog/workerlog.0 ${log_file}
+    fi
+}
+
+_set_params $@
+_train
+
diff --git a/utils/pd_env_collect.sh b/utils/pd_env_collect.sh
new file mode 100644
index 000000000..64ff8886c
--- /dev/null
+++ b/utils/pd_env_collect.sh
@@ -0,0 +1,167 @@
+#!/usr/bin/env bash
+
+unset GREP_OPTIONS
+
+set -u  # Check for undefined variables
+
+die() {
+  # Print a message and exit with code 1.
+  #
+  # Usage: die <error_message>
+  #   e.g., die "Something bad happened."
+
+  echo $@
+  exit 1
+}
+
+echo "Collecting system information..."
+
+OUTPUT_FILE=pd_env.txt
+python_bin_path=$(which python || which python3 || die "Cannot find Python binary")
+
+{
+echo
+echo '== check python ==================================================='
+} >> ${OUTPUT_FILE}
+
+cat <<EOF > /tmp/check_python.py
+import platform
+print("""python version: %s
+python branch: %s
+python build version: %s
+python compiler version: %s
+python implementation: %s
+""" % (
+platform.python_version(),
+platform.python_branch(),
+platform.python_build(),
+platform.python_compiler(),
+platform.python_implementation(),
+))
+EOF
+${python_bin_path} /tmp/check_python.py 2>&1  >> ${OUTPUT_FILE}
+
+{
+echo
+echo '== check os platform ==============================================='
+} >> ${OUTPUT_FILE}
+
+cat <<EOF > /tmp/check_os.py
+import platform
+print("""os: %s
+os kernel version: %s
+os release version: %s
+os platform: %s
+linux distribution: %s
+linux os distribution: %s
+mac version: %s
+uname: %s
+architecture: %s
+machine: %s
+""" % (
+platform.system(),
+platform.version(),
+platform.release(),
+platform.platform(),
+platform.linux_distribution(),
+platform.dist(),
+platform.mac_ver(),
+platform.uname(),
+platform.architecture(),
+platform.machine(),
+))
+EOF
+${python_bin_path} /tmp/check_os.py 2>&1  >> ${OUTPUT_FILE}
+
+{
+  echo
+  echo '== are we in docker ============================================='
+  num=`cat /proc/1/cgroup | grep docker | wc -l`;
+  if [ $num -ge 1 ]; then
+    echo "Yes"
+  else
+    echo "No"
+  fi
+
+  echo
+  echo '== compiler ====================================================='
+  c++ --version 2>&1
+
+  echo
+  echo '== check pips ==================================================='
+  pip list 2>&1 | grep "proto\|numpy\|paddlepaddle"
+
+
+  echo
+  echo '== check for virtualenv ========================================='
+  ${python_bin_path} -c "import sys;print(hasattr(sys, \"real_prefix\"))"
+
+  echo
+  echo '== paddlepaddle import ============================================'
+} >> ${OUTPUT_FILE}
+
+cat <<EOF > /tmp/check_pd.py
+import paddle as pd;
+pd.set_device('cpu')
+print("pd.version.full_version = %s" % pd.version.full_version)
+print("pd.version.commit = %s" % pd.version.commit)
+print("pd.__version__ = %s" % pd.__version__)
+print("Sanity check: %r" % pd.zeros([1,2,3])[:1])
+EOF
+${python_bin_path} /tmp/check_pd.py 2>&1  >> ${OUTPUT_FILE}
+
+LD_DEBUG=libs ${python_bin_path} -c "import paddle"  2>>${OUTPUT_FILE} > /tmp/loadedlibs
+
+{
+  grep libcudnn.so /tmp/loadedlibs
+  echo
+  echo '== env =========================================================='
+  if [ -z ${LD_LIBRARY_PATH+x} ]; then
+    echo "LD_LIBRARY_PATH is unset";
+  else
+    echo LD_LIBRARY_PATH ${LD_LIBRARY_PATH} ;
+  fi
+  if [ -z ${DYLD_LIBRARY_PATH+x} ]; then
+    echo "DYLD_LIBRARY_PATH is unset";
+  else
+    echo DYLD_LIBRARY_PATH ${DYLD_LIBRARY_PATH} ;
+  fi
+
+
+  echo
+  echo '== nvidia-smi ==================================================='
+  nvidia-smi 2>&1
+
+  echo
+  echo '== cuda libs  ==================================================='
+} >> ${OUTPUT_FILE}
+
+find /usr/local -type f -name 'libcudart*'  2>/dev/null | grep cuda |  grep -v "\\.cache" >> ${OUTPUT_FILE}
+find /usr/local -type f -name 'libudnn*'  2>/dev/null | grep cuda |  grep -v "\\.cache" >> ${OUTPUT_FILE}
+
+{
+  echo
+  echo '== paddlepaddle installed from info =================='
+  pip show paddlepaddle-gpu
+
+  echo
+  echo '== python version  =============================================='
+  echo '(major, minor, micro, releaselevel, serial)'
+  python -c 'import sys; print(sys.version_info[:])'
+
+  echo
+  echo '== bazel version  ==============================================='
+  bazel version
+  echo '== cmake version  ==============================================='
+  cmake --version
+} >> ${OUTPUT_FILE}
+
+# Remove any words with google.
+mv $OUTPUT_FILE old-$OUTPUT_FILE
+grep -v -i google old-${OUTPUT_FILE} > $OUTPUT_FILE
+
+echo "Wrote environment to ${OUTPUT_FILE}. You can review the contents of that file."
+echo "and use it to populate the fields in the github issue template."
+echo
+echo "cat ${OUTPUT_FILE}"
+echo

From 7907319288d58a784d8cee981f6c2e59c609aeab Mon Sep 17 00:00:00 2001
From: Hui Zhang <zhtclz@foxmail.com>
Date: Wed, 15 Sep 2021 11:26:07 +0000
Subject: [PATCH 2/6] fix profiler

---
 deepspeech/training/trainer.py         | 3 ++-
 deepspeech/utils/profiler.py           | 3 +++
 examples/tiny/s0/conf/deepspeech2.yaml | 2 +-
 examples/tiny/s0/local/train.sh        | 2 +-
 4 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/deepspeech/training/trainer.py b/deepspeech/training/trainer.py
index bdb68310a..b31ddcad6 100644
--- a/deepspeech/training/trainer.py
+++ b/deepspeech/training/trainer.py
@@ -185,7 +185,8 @@ class Trainer():
                 batch_sampler.set_epoch(self.epoch)
 
     def after_train_batch(self):
-        profiler.add_profiler_step(self.args.profiler_options)
+        if self.args.profiler_options:
+            profiler.add_profiler_step(self.args.profiler_options)
 
     def train(self):
         """The training process control by epoch."""
diff --git a/deepspeech/utils/profiler.py b/deepspeech/utils/profiler.py
index 5b8389be8..357840a62 100644
--- a/deepspeech/utils/profiler.py
+++ b/deepspeech/utils/profiler.py
@@ -61,6 +61,9 @@ class ProfilerOptions(object):
         self._parse_from_string(options_str)
 
     def _parse_from_string(self, options_str):
+        if not options_str:
+            return
+            
         for kv in options_str.replace(' ', '').split(';'):
             key, value = kv.split('=')
             if key == 'batch_range':
diff --git a/examples/tiny/s0/conf/deepspeech2.yaml b/examples/tiny/s0/conf/deepspeech2.yaml
index 64598b4be..408996557 100644
--- a/examples/tiny/s0/conf/deepspeech2.yaml
+++ b/examples/tiny/s0/conf/deepspeech2.yaml
@@ -48,7 +48,7 @@ training:
   n_epoch: 10
   accum_grad: 1
   lr: 1e-5 
-  lr_decay: 1.0 
+  lr_decay: 0.8 
   weight_decay: 1e-06
   global_grad_clip: 5.0
   log_interval: 1
diff --git a/examples/tiny/s0/local/train.sh b/examples/tiny/s0/local/train.sh
index a657ce345..f96508b4f 100755
--- a/examples/tiny/s0/local/train.sh
+++ b/examples/tiny/s0/local/train.sh
@@ -38,7 +38,7 @@ python3 -u ${BIN_DIR}/train.py \
 --config ${config_path} \
 --output exp/${ckpt_name} \
 --model_type ${model_type} \
---profiler_options ${profiler_options} \
+--profiler_options "${profiler_options}" \
 --seed ${seed}
 
 if [ ${seed} != 0  ]; then

From 3a5258f6a00ba0c660bd92d241e3b24cf0554520 Mon Sep 17 00:00:00 2001
From: Hui Zhang <zhtclz@foxmail.com>
Date: Wed, 15 Sep 2021 11:36:35 +0000
Subject: [PATCH 3/6] lr and opt param will restore from ckpt, so we do not set
 lr manully

---
 deepspeech/exps/u2/model.py    | 5 +++--
 deepspeech/training/trainer.py | 4 +++-
 2 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/deepspeech/exps/u2/model.py b/deepspeech/exps/u2/model.py
index 67b666ed0..1328a1cb7 100644
--- a/deepspeech/exps/u2/model.py
+++ b/deepspeech/exps/u2/model.py
@@ -182,9 +182,10 @@ class U2Trainer(Trainer):
         from_scratch = self.resume_or_scratch()
         if from_scratch:
             # save init model, i.e. 0 epoch
-            self.save(tag='init')
+            self.save(tag='init', infos=None)
 
-        self.lr_scheduler.step(self.iteration)
+        # lr will resotre from optimizer ckpt
+        # self.lr_scheduler.step(self.iteration)
         if self.parallel and hasattr(self.train_loader, 'batch_sampler'):
             self.train_loader.batch_sampler.set_epoch(self.epoch)
 
diff --git a/deepspeech/training/trainer.py b/deepspeech/training/trainer.py
index b31ddcad6..6587f1290 100644
--- a/deepspeech/training/trainer.py
+++ b/deepspeech/training/trainer.py
@@ -194,7 +194,9 @@ class Trainer():
         if from_scratch:
             # save init model, i.e. 0 epoch
             self.save(tag='init', infos=None)
-        self.lr_scheduler.step(self.epoch)
+
+        # lr will resotre from optimizer ckpt
+        # self.lr_scheduler.step(self.epoch)
         if self.parallel and hasattr(self.train_loader, "batch_sampler"):
             self.train_loader.batch_sampler.set_epoch(self.epoch)
 

From 16b9c33deb10c3f895286d7c7abe64d96f618e39 Mon Sep 17 00:00:00 2001
From: Hui Zhang <zhtclz@foxmail.com>
Date: Wed, 15 Sep 2021 11:42:07 +0000
Subject: [PATCH 4/6] format

---
 deepspeech/utils/profiler.py | 2 +-
 tests/benchmark/README.md    | 3 +--
 2 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/deepspeech/utils/profiler.py b/deepspeech/utils/profiler.py
index 357840a62..83b003cad 100644
--- a/deepspeech/utils/profiler.py
+++ b/deepspeech/utils/profiler.py
@@ -63,7 +63,7 @@ class ProfilerOptions(object):
     def _parse_from_string(self, options_str):
         if not options_str:
             return
-            
+
         for kv in options_str.replace(' ', '').split(';'):
             key, value = kv.split('=')
             if key == 'batch_range':
diff --git a/tests/benchmark/README.md b/tests/benchmark/README.md
index 8ec43f89e..d21999ab3 100644
--- a/tests/benchmark/README.md
+++ b/tests/benchmark/README.md
@@ -4,9 +4,8 @@
 
 * Aishell
 
-## Docker 
+## Docker
 
 ```
 registry.baidubce.com/paddlepaddle/paddle   2.1.1-gpu-cuda10.2-cudnn7   59d5ec1de486  
 ```
-

From 0f3e5a3872defc3e7197e01f8ae7e760b22c00bf Mon Sep 17 00:00:00 2001
From: Hui Zhang <zhtclz@foxmail.com>
Date: Thu, 16 Sep 2021 05:59:16 +0000
Subject: [PATCH 5/6] run_all with aishell/s1

---
 tests/benchmark/run_all.sh       | 27 +++++++++++++++++++--------
 tests/benchmark/run_benchmark.sh |  2 ++
 2 files changed, 21 insertions(+), 8 deletions(-)

diff --git a/tests/benchmark/run_all.sh b/tests/benchmark/run_all.sh
index 7564174b4..7aa11d0f2 100644
--- a/tests/benchmark/run_all.sh
+++ b/tests/benchmark/run_all.sh
@@ -1,20 +1,29 @@
 #!/bin/bash
 
-# collect env info
-bash ../../utils/pd_env_collect.sh
-
+ROOT_DIR=../../
 
+# 提供可稳定复现性能的脚本，默认在标准docker环境内py37执行：
+# collect env info
+bash ${ROOT_DIR}/utils/pd_env_collect.sh
+cat pd_env.txt
 
-# 提供可稳定复现性能的脚本，默认在标准docker环境内py37执行： paddlepaddle/paddle:latest-gpu-cuda10.1-cudnn7  paddle=2.1.2  py=37
 # 执行目录：需说明
-cd **
+pushd ${ROOT_DIR}/examples/aishell/s1
+
 # 1 安装该模型需要的依赖 (如需开启优化策略请注明)
-pip install ...
+pushd ${ROOT_DIR}/tools; make; popd
+source ${ROOT_DIR}/tools/venv/bin/activate
+pushd ${ROOT_DIR}; bash setup.sh; popd
+
+
 # 2 拷贝该模型需要数据、预训练模型
+mkdir -p exp/log
+loca/data.sh &> exp/log/data.log
+
 # 3 批量运行（如不方便批量，1，2需放到单个模型中）
 
-model_mode_list=(MobileNetv1 MobileNetv2)
-fp_item_list=(fp32 fp16)
+model_mode_list=(conformer)
+fp_item_list=(fp32)
 bs_item=(32 64 96)
 for model_mode in ${model_mode_list[@]}; do
       for fp_item in ${fp_item_list[@]}; do
@@ -31,3 +40,5 @@ for model_mode in ${model_mode_list[@]}; do
             done
       done
 done
+
+popd # aishell/s1
diff --git a/tests/benchmark/run_benchmark.sh b/tests/benchmark/run_benchmark.sh
index 2b9cf70fd..625d36160 100644
--- a/tests/benchmark/run_benchmark.sh
+++ b/tests/benchmark/run_benchmark.sh
@@ -1,6 +1,7 @@
 #!/bin/bash
 
 set -xe
+
 # 运行示例：CUDA_VISIBLE_DEVICES=0 bash run_benchmark.sh ${run_mode} ${bs_item} ${fp_item} 500 ${model_mode}
 # 参数说明
 function _set_params(){
@@ -17,6 +18,7 @@ function _set_params(){
     num_gpu_devices=${#arr[*]}
     log_file=${run_log_path}/${model_name}_${run_mode}_bs${batch_size}_${fp_item}_${num_gpu_devices}
 }
+
 function _train(){
     echo "Train on ${num_gpu_devices} GPUs"
     echo "current CUDA_VISIBLE_DEVICES=$CUDA_VISIBLE_DEVICES, gpus=$num_gpu_devices, batch_size=$batch_size"

From a997b5a61cd3573bb920b3c8b4a880aeee28432f Mon Sep 17 00:00:00 2001
From: Hui Zhang <zhtclz@foxmail.com>
Date: Thu, 16 Sep 2021 06:00:35 +0000
Subject: [PATCH 6/6] rename ckpt suffix to np

---
 deepspeech/training/extensions/snapshot.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/deepspeech/training/extensions/snapshot.py b/deepspeech/training/extensions/snapshot.py
index 1d3fe70cb..e81eb97fc 100644
--- a/deepspeech/training/extensions/snapshot.py
+++ b/deepspeech/training/extensions/snapshot.py
@@ -101,7 +101,7 @@ class Snapshot(extension.Extension):
         iteration = trainer.updater.state.iteration
         epoch = trainer.updater.state.epoch
         num = epoch if self.trigger[1] == 'epoch' else iteration
-        path = self.checkpoint_dir / f"{num}.pdz"
+        path = self.checkpoint_dir / f"{num}.np"
 
         # add the new one
         trainer.updater.save(path)