From e79e00a6b27504ca4962f56a6e9b71b56a12e9e7 Mon Sep 17 00:00:00 2001
From: Hui Zhang <zhtclz@foxmail.com>
Date: Mon, 22 Nov 2021 09:29:23 +0000
Subject: [PATCH] pack model

---
 examples/wenetspeech/asr1/README.md  |  30 ++---
 examples/wenetspeech/asr1/RESULTS.md |  24 ++++
 examples/wenetspeech/asr1/utils      |   1 +
 utils/pack_model.sh                  | 169 +++++++++++++++++++++++++++
 utils/show_results.sh                |  74 ++++++++++++
 5 files changed, 278 insertions(+), 20 deletions(-)
 create mode 100644 examples/wenetspeech/asr1/RESULTS.md
 create mode 120000 examples/wenetspeech/asr1/utils
 create mode 100755 utils/pack_model.sh
 create mode 100755 utils/show_results.sh

diff --git a/examples/wenetspeech/asr1/README.md b/examples/wenetspeech/asr1/README.md
index 5aff041f..c08b94e2 100644
--- a/examples/wenetspeech/asr1/README.md
+++ b/examples/wenetspeech/asr1/README.md
@@ -1,24 +1,14 @@
-# WenetSpeech
+## Pack Model
 
+pack model to tar.gz, e.g.
 
-## Conformer
+```bash
+./utils/pack_model.sh  --preprocess_conf conf/preprocess.yaml --dict data/vocab.txt conf/conformer.yaml '' data/mean_std.json exp/conformer/checkpoints/wenetspeec
+h.pdparams 
 
-| Model | Params | Config | Augmentation| Test set | Decode method | Loss | WER |  
-| --- | --- | --- | --- | --- | --- | --- | --- |
-| conformer | 32.52 M | conf/conformer.yaml | spec_aug  | dev | attention |  |  |  
-| conformer | 32.52 M | conf/conformer.yaml | spec_aug  | test net | ctc_greedy_search |  |  |  
-| conformer | 32.52 M | conf/conformer.yaml | spec_aug  | test meeting | ctc_prefix_beam_search |  |  |  
-| conformer | 32.52 M | conf/conformer.yaml | spec_aug  | test net | attention_rescoring |  |  |  
+```
 
-
-
-## Conformer Pretrain Model
-
-Pretrain model from http://mobvoi-speech-public.ufile.ucloud.cn/public/wenet/wenetspeech/20211025_conformer_exp.tar.gz
-
-| Model | Params | Config | Augmentation| Test set | Decode method | Loss | WER |  
-| --- | --- | --- | --- | --- | --- | --- | --- |
-| conformer | 32.52 M | conf/conformer.yaml | spec_aug  | aishell1 | attention | - | 0.048456 |  
-| conformer | 32.52 M | conf/conformer.yaml | spec_aug  | aishell1 | ctc_greedy_search | - | 0.052534 |  
-| conformer | 32.52 M | conf/conformer.yaml | spec_aug  | aishell1 | ctc_prefix_beam_search | - | 0.052915 |  
-| conformer | 32.52 M | conf/conformer.yaml | spec_aug  | aishell1 | attention_rescoring | - | 0.047904 |  
\ No newline at end of file
+show model.tar.gz
+```
+tar tf model.tar.gz 
+```
diff --git a/examples/wenetspeech/asr1/RESULTS.md b/examples/wenetspeech/asr1/RESULTS.md
new file mode 100644
index 00000000..5aff041f
--- /dev/null
+++ b/examples/wenetspeech/asr1/RESULTS.md
@@ -0,0 +1,24 @@
+# WenetSpeech
+
+
+## Conformer
+
+| Model | Params | Config | Augmentation| Test set | Decode method | Loss | WER |  
+| --- | --- | --- | --- | --- | --- | --- | --- |
+| conformer | 32.52 M | conf/conformer.yaml | spec_aug  | dev | attention |  |  |  
+| conformer | 32.52 M | conf/conformer.yaml | spec_aug  | test net | ctc_greedy_search |  |  |  
+| conformer | 32.52 M | conf/conformer.yaml | spec_aug  | test meeting | ctc_prefix_beam_search |  |  |  
+| conformer | 32.52 M | conf/conformer.yaml | spec_aug  | test net | attention_rescoring |  |  |  
+
+
+
+## Conformer Pretrain Model
+
+Pretrain model from http://mobvoi-speech-public.ufile.ucloud.cn/public/wenet/wenetspeech/20211025_conformer_exp.tar.gz
+
+| Model | Params | Config | Augmentation| Test set | Decode method | Loss | WER |  
+| --- | --- | --- | --- | --- | --- | --- | --- |
+| conformer | 32.52 M | conf/conformer.yaml | spec_aug  | aishell1 | attention | - | 0.048456 |  
+| conformer | 32.52 M | conf/conformer.yaml | spec_aug  | aishell1 | ctc_greedy_search | - | 0.052534 |  
+| conformer | 32.52 M | conf/conformer.yaml | spec_aug  | aishell1 | ctc_prefix_beam_search | - | 0.052915 |  
+| conformer | 32.52 M | conf/conformer.yaml | spec_aug  | aishell1 | attention_rescoring | - | 0.047904 |  
\ No newline at end of file
diff --git a/examples/wenetspeech/asr1/utils b/examples/wenetspeech/asr1/utils
new file mode 120000
index 00000000..973afe67
--- /dev/null
+++ b/examples/wenetspeech/asr1/utils
@@ -0,0 +1 @@
+../../../utils
\ No newline at end of file
diff --git a/utils/pack_model.sh b/utils/pack_model.sh
new file mode 100755
index 00000000..5bd40c84
--- /dev/null
+++ b/utils/pack_model.sh
@@ -0,0 +1,169 @@
+#!/usr/bin/env bash
+
+# Copyright 2019 Johns Hopkins University (Shinji Watanabe)
+#  Apache 2.0  (http://www.apache.org/licenses/LICENSE-2.0)
+
+[ -f ./path.sh ] && . ./path.sh
+
+results=""
+# e.g., "exp/tr_it_pytorch_train/decode_dt_it_decode/result.wrd.txt
+#        exp/tr_it_pytorch_train/decode_et_it_decode/result.wrd.txt"'
+lm=""
+dict=""
+etc=""
+outfile="model"
+preprocess_conf=""
+
+help_message=$(cat <<EOF
+Usage: $0 --lm <lm> --dict <dict> <train_conf> <dec_conf> <cmvn> <e2e>, for example:
+<lm>:       exp/train_rnnlm/rnnlm.model.best
+<dict>:     data/lang_char
+<train_conf>:  conf/train.yaml
+<dec_conf>: conf/decode.yaml
+<cmvn>:     data/tr_it/cmvn.ark
+<e2e>:      exp/tr_it_pytorch_train/results/model.last10.avg.best
+EOF
+)
+
+. utils/parse_options.sh
+
+echo $PWD
+echo $dict
+
+if [ $# != 4 ]; then
+    echo "${help_message}"
+    exit 1
+fi
+
+tr_conf=$1
+dec_conf=$2
+cmvn=$3
+e2e=$4
+
+echo "  - Model files (archived to ${outfile}.tar.gz by \`\$ pack_model.sh\`)"
+echo "    - model link: (put the model link manually.)"
+
+# configs
+if [ -e ${tr_conf} ]; then
+    tar cfh ${outfile}.tar ${tr_conf}
+    echo -n "    - training config file: \`"
+    echo ${tr_conf} | sed -e "s/$/\`/"
+else
+    echo "missing ${tr_conf}"
+    exit 1
+fi
+if [ -e ${dec_conf} ]; then
+    tar rfh ${outfile}.tar ${dec_conf}
+    echo -n "    - decoding config file: \`"
+    echo ${dec_conf} | sed -e "s/$/\`/"
+else
+    echo "missing ${dec_conf}"
+    exit 1
+fi
+# NOTE(kan-bayashi): preprocess conf is optional
+if [ -n "${preprocess_conf}" ]; then
+    tar rfh ${outfile}.tar ${preprocess_conf}
+    echo -n "    - preprocess config file: \`"
+    echo ${preprocess_conf} | sed -e "s/$/\`/"
+fi
+
+# cmvn
+if [ -e ${cmvn} ]; then
+    tar rfh ${outfile}.tar ${cmvn}
+    echo -n "    - cmvn file: \`"
+    echo ${cmvn} | sed -e "s/$/\`/"
+else
+    echo "missing ${cmvn}"
+    exit 1
+fi
+
+# e2e
+if [ -e ${e2e} ]; then
+    tar rfh ${outfile}.tar ${e2e}
+    echo -n "    - e2e file: \`"
+    echo ${e2e} | sed -e "s/$/\`/"
+
+    e2e_conf=$(dirname ${e2e})/model.json
+    if [ ! -e ${e2e_conf} ]; then
+	echo missing ${e2e_conf}
+	#exit 1
+    else
+	echo -n "    - e2e JSON file: \`"
+	echo ${e2e_conf} | sed -e "s/$/\`/"
+	tar rfh ${outfile}.tar ${e2e_conf}
+    fi
+else
+    echo "missing ${e2e}"
+    exit 1
+fi
+
+# lm
+if [ -n "${lm}" ]; then
+    if [ -e ${lm} ]; then
+	tar rfh ${outfile}.tar ${lm}
+	echo -n "    - lm file: \`"
+	echo ${lm} | sed -e "s/$/\`/"
+
+	lm_conf=$(dirname ${lm})/model.json
+	if [ ! -e ${lm_conf} ]; then
+	    echo missing ${lm_conf}
+	    exit 1
+	else
+	    echo -n "    - lm JSON file: \`"
+	    echo ${lm_conf} | sed -e "s/$/\`/"
+	    tar rfh ${outfile}.tar ${lm_conf}
+	fi
+    else
+	echo "missing ${lm}"
+	exit 1
+    fi
+fi
+
+echo ${dict}
+echo test -e ${dict}
+# dict
+if [ -n "${dict}" ]; then
+    if [ -e ${dict} ]; then
+	tar rfh ${outfile}.tar ${dict}
+	echo -n "    - dict file: \`"
+	echo ${dict} | sed -e "s/$/\`/"
+    else
+	echo "missing ${dict}"
+	exit 1
+    fi
+fi
+
+# etc
+for x in ${etc}; do
+    if [ -e ${x} ]; then
+	tar rfh ${outfile}.tar ${x}
+	echo -n "    - etc file: \`"
+	echo ${x} | sed -e "s/$/\`/"
+    else
+	echo "missing ${x}"
+	exit 1
+    fi
+done
+
+# finally compress the tar file
+gzip -f ${outfile}.tar
+
+# results
+if [ -n "${results}" ]; then
+    echo "  - Results (paste them by yourself or obtained by \`\$ pack_model.sh --results <results>\`)"
+    echo "\`\`\`"
+fi
+for x in ${results}; do
+    if [ -e ${x} ]; then
+	echo "${x}"
+	grep -e Avg -e SPKR -m 2 ${x}
+    else
+	echo "missing ${x}"
+	exit 1
+    fi
+done
+if [ -n "${results}" ]; then
+    echo "\`\`\`"
+fi
+
+exit 0
diff --git a/utils/show_results.sh b/utils/show_results.sh
new file mode 100755
index 00000000..42f80ee6
--- /dev/null
+++ b/utils/show_results.sh
@@ -0,0 +1,74 @@
+#!/usr/bin/env bash
+mindepth=0
+maxdepth=1
+
+. utils/parse_options.sh
+
+if [ $# -gt 1 ]; then
+    echo "Usage: $0 --mindepth 0 --maxdepth 1 [exp]" 1>&2
+    echo ""
+    echo "Show the system environments and the evaluation results in Markdown format."
+    echo 'The default of <exp> is "exp/".'
+    exit 1
+fi
+
+[ -f ./path.sh ] && . ./path.sh
+set -euo pipefail
+if [ $# -eq 1 ]; then
+    exp=$1
+else
+    exp=exp
+fi
+
+
+cat << EOF
+<!-- Generated by $0 -->
+# RESULTS
+## Environments
+- date: \`$(LC_ALL=C date)\`
+EOF
+
+python3 << EOF
+import sys, paddle
+pyversion = sys.version.replace('\n', ' ')
+
+print(f"""- python version: \`{pyversion}\`
+- paddle version: \`paddle {paddle.__version__}\`""")
+EOF
+
+cat << EOF
+- Git hash: \`$(git rev-parse HEAD)\`
+  - Commit date: \`$(git log -1 --format='%cd')\`
+
+EOF
+
+while IFS= read -r expdir; do
+    if ls ${expdir}/decode_*/result.txt &> /dev/null; then
+    # 1. Show the result table
+    cat << EOF
+## $(basename ${expdir})
+### CER
+
+|dataset|Snt|Wrd|Corr|Sub|Del|Ins|Err|S.Err|
+|---|---|---|---|---|---|---|---|---|
+EOF
+        grep -e Avg ${expdir}/decode_*/result.txt \
+            | sed -e "s#${expdir}/\([^/]*\)/result.txt:#|\1#g" \
+            | sed -e 's#Sum/Avg##g' | tr '|' ' ' | tr -s ' ' '|'
+        echo
+
+        # 2. Show the result table for WER
+        if ls ${expdir}/decode_*/result.wrd.txt &> /dev/null; then
+            cat << EOF
+### WER
+
+|dataset|Snt|Wrd|Corr|Sub|Del|Ins|Err|S.Err|
+|---|---|---|---|---|---|---|---|---|
+EOF
+            grep -e Avg ${expdir}/decode_*/result.wrd.txt \
+                | sed -e "s#${expdir}/\([^/]*\)/result.wrd.txt:#|\1#g" \
+                | sed -e 's#Sum/Avg##g' | tr '|' ' ' | tr -s ' ' '|'
+            echo
+        fi
+    fi
+done < <(find ${exp} -mindepth ${mindepth} -maxdepth ${maxdepth} -type d)