From e79e00a6b27504ca4962f56a6e9b71b56a12e9e7 Mon Sep 17 00:00:00 2001 From: Hui Zhang Date: Mon, 22 Nov 2021 09:29:23 +0000 Subject: [PATCH] pack model --- examples/wenetspeech/asr1/README.md | 30 ++--- examples/wenetspeech/asr1/RESULTS.md | 24 ++++ examples/wenetspeech/asr1/utils | 1 + utils/pack_model.sh | 169 +++++++++++++++++++++++++++ utils/show_results.sh | 74 ++++++++++++ 5 files changed, 278 insertions(+), 20 deletions(-) create mode 100644 examples/wenetspeech/asr1/RESULTS.md create mode 120000 examples/wenetspeech/asr1/utils create mode 100755 utils/pack_model.sh create mode 100755 utils/show_results.sh diff --git a/examples/wenetspeech/asr1/README.md b/examples/wenetspeech/asr1/README.md index 5aff041f..c08b94e2 100644 --- a/examples/wenetspeech/asr1/README.md +++ b/examples/wenetspeech/asr1/README.md @@ -1,24 +1,14 @@ -# WenetSpeech +## Pack Model +pack model to tar.gz, e.g. -## Conformer +```bash +./utils/pack_model.sh --preprocess_conf conf/preprocess.yaml --dict data/vocab.txt conf/conformer.yaml '' data/mean_std.json exp/conformer/checkpoints/wenetspeec +h.pdparams -| Model | Params | Config | Augmentation| Test set | Decode method | Loss | WER | -| --- | --- | --- | --- | --- | --- | --- | --- | -| conformer | 32.52 M | conf/conformer.yaml | spec_aug | dev | attention | | | -| conformer | 32.52 M | conf/conformer.yaml | spec_aug | test net | ctc_greedy_search | | | -| conformer | 32.52 M | conf/conformer.yaml | spec_aug | test meeting | ctc_prefix_beam_search | | | -| conformer | 32.52 M | conf/conformer.yaml | spec_aug | test net | attention_rescoring | | | +``` - - -## Conformer Pretrain Model - -Pretrain model from http://mobvoi-speech-public.ufile.ucloud.cn/public/wenet/wenetspeech/20211025_conformer_exp.tar.gz - -| Model | Params | Config | Augmentation| Test set | Decode method | Loss | WER | -| --- | --- | --- | --- | --- | --- | --- | --- | -| conformer | 32.52 M | conf/conformer.yaml | spec_aug | aishell1 | attention | - | 0.048456 | -| conformer | 32.52 M | conf/conformer.yaml | spec_aug | aishell1 | ctc_greedy_search | - | 0.052534 | -| conformer | 32.52 M | conf/conformer.yaml | spec_aug | aishell1 | ctc_prefix_beam_search | - | 0.052915 | -| conformer | 32.52 M | conf/conformer.yaml | spec_aug | aishell1 | attention_rescoring | - | 0.047904 | \ No newline at end of file +show model.tar.gz +``` +tar tf model.tar.gz +``` diff --git a/examples/wenetspeech/asr1/RESULTS.md b/examples/wenetspeech/asr1/RESULTS.md new file mode 100644 index 00000000..5aff041f --- /dev/null +++ b/examples/wenetspeech/asr1/RESULTS.md @@ -0,0 +1,24 @@ +# WenetSpeech + + +## Conformer + +| Model | Params | Config | Augmentation| Test set | Decode method | Loss | WER | +| --- | --- | --- | --- | --- | --- | --- | --- | +| conformer | 32.52 M | conf/conformer.yaml | spec_aug | dev | attention | | | +| conformer | 32.52 M | conf/conformer.yaml | spec_aug | test net | ctc_greedy_search | | | +| conformer | 32.52 M | conf/conformer.yaml | spec_aug | test meeting | ctc_prefix_beam_search | | | +| conformer | 32.52 M | conf/conformer.yaml | spec_aug | test net | attention_rescoring | | | + + + +## Conformer Pretrain Model + +Pretrain model from http://mobvoi-speech-public.ufile.ucloud.cn/public/wenet/wenetspeech/20211025_conformer_exp.tar.gz + +| Model | Params | Config | Augmentation| Test set | Decode method | Loss | WER | +| --- | --- | --- | --- | --- | --- | --- | --- | +| conformer | 32.52 M | conf/conformer.yaml | spec_aug | aishell1 | attention | - | 0.048456 | +| conformer | 32.52 M | conf/conformer.yaml | spec_aug | aishell1 | ctc_greedy_search | - | 0.052534 | +| conformer | 32.52 M | conf/conformer.yaml | spec_aug | aishell1 | ctc_prefix_beam_search | - | 0.052915 | +| conformer | 32.52 M | conf/conformer.yaml | spec_aug | aishell1 | attention_rescoring | - | 0.047904 | \ No newline at end of file diff --git a/examples/wenetspeech/asr1/utils b/examples/wenetspeech/asr1/utils new file mode 120000 index 00000000..973afe67 --- /dev/null +++ b/examples/wenetspeech/asr1/utils @@ -0,0 +1 @@ +../../../utils \ No newline at end of file diff --git a/utils/pack_model.sh b/utils/pack_model.sh new file mode 100755 index 00000000..5bd40c84 --- /dev/null +++ b/utils/pack_model.sh @@ -0,0 +1,169 @@ +#!/usr/bin/env bash + +# Copyright 2019 Johns Hopkins University (Shinji Watanabe) +# Apache 2.0 (http://www.apache.org/licenses/LICENSE-2.0) + +[ -f ./path.sh ] && . ./path.sh + +results="" +# e.g., "exp/tr_it_pytorch_train/decode_dt_it_decode/result.wrd.txt +# exp/tr_it_pytorch_train/decode_et_it_decode/result.wrd.txt"' +lm="" +dict="" +etc="" +outfile="model" +preprocess_conf="" + +help_message=$(cat < --dict , for example: +: exp/train_rnnlm/rnnlm.model.best +: data/lang_char +: conf/train.yaml +: conf/decode.yaml +: data/tr_it/cmvn.ark +: exp/tr_it_pytorch_train/results/model.last10.avg.best +EOF +) + +. utils/parse_options.sh + +echo $PWD +echo $dict + +if [ $# != 4 ]; then + echo "${help_message}" + exit 1 +fi + +tr_conf=$1 +dec_conf=$2 +cmvn=$3 +e2e=$4 + +echo " - Model files (archived to ${outfile}.tar.gz by \`\$ pack_model.sh\`)" +echo " - model link: (put the model link manually.)" + +# configs +if [ -e ${tr_conf} ]; then + tar cfh ${outfile}.tar ${tr_conf} + echo -n " - training config file: \`" + echo ${tr_conf} | sed -e "s/$/\`/" +else + echo "missing ${tr_conf}" + exit 1 +fi +if [ -e ${dec_conf} ]; then + tar rfh ${outfile}.tar ${dec_conf} + echo -n " - decoding config file: \`" + echo ${dec_conf} | sed -e "s/$/\`/" +else + echo "missing ${dec_conf}" + exit 1 +fi +# NOTE(kan-bayashi): preprocess conf is optional +if [ -n "${preprocess_conf}" ]; then + tar rfh ${outfile}.tar ${preprocess_conf} + echo -n " - preprocess config file: \`" + echo ${preprocess_conf} | sed -e "s/$/\`/" +fi + +# cmvn +if [ -e ${cmvn} ]; then + tar rfh ${outfile}.tar ${cmvn} + echo -n " - cmvn file: \`" + echo ${cmvn} | sed -e "s/$/\`/" +else + echo "missing ${cmvn}" + exit 1 +fi + +# e2e +if [ -e ${e2e} ]; then + tar rfh ${outfile}.tar ${e2e} + echo -n " - e2e file: \`" + echo ${e2e} | sed -e "s/$/\`/" + + e2e_conf=$(dirname ${e2e})/model.json + if [ ! -e ${e2e_conf} ]; then + echo missing ${e2e_conf} + #exit 1 + else + echo -n " - e2e JSON file: \`" + echo ${e2e_conf} | sed -e "s/$/\`/" + tar rfh ${outfile}.tar ${e2e_conf} + fi +else + echo "missing ${e2e}" + exit 1 +fi + +# lm +if [ -n "${lm}" ]; then + if [ -e ${lm} ]; then + tar rfh ${outfile}.tar ${lm} + echo -n " - lm file: \`" + echo ${lm} | sed -e "s/$/\`/" + + lm_conf=$(dirname ${lm})/model.json + if [ ! -e ${lm_conf} ]; then + echo missing ${lm_conf} + exit 1 + else + echo -n " - lm JSON file: \`" + echo ${lm_conf} | sed -e "s/$/\`/" + tar rfh ${outfile}.tar ${lm_conf} + fi + else + echo "missing ${lm}" + exit 1 + fi +fi + +echo ${dict} +echo test -e ${dict} +# dict +if [ -n "${dict}" ]; then + if [ -e ${dict} ]; then + tar rfh ${outfile}.tar ${dict} + echo -n " - dict file: \`" + echo ${dict} | sed -e "s/$/\`/" + else + echo "missing ${dict}" + exit 1 + fi +fi + +# etc +for x in ${etc}; do + if [ -e ${x} ]; then + tar rfh ${outfile}.tar ${x} + echo -n " - etc file: \`" + echo ${x} | sed -e "s/$/\`/" + else + echo "missing ${x}" + exit 1 + fi +done + +# finally compress the tar file +gzip -f ${outfile}.tar + +# results +if [ -n "${results}" ]; then + echo " - Results (paste them by yourself or obtained by \`\$ pack_model.sh --results \`)" + echo "\`\`\`" +fi +for x in ${results}; do + if [ -e ${x} ]; then + echo "${x}" + grep -e Avg -e SPKR -m 2 ${x} + else + echo "missing ${x}" + exit 1 + fi +done +if [ -n "${results}" ]; then + echo "\`\`\`" +fi + +exit 0 diff --git a/utils/show_results.sh b/utils/show_results.sh new file mode 100755 index 00000000..42f80ee6 --- /dev/null +++ b/utils/show_results.sh @@ -0,0 +1,74 @@ +#!/usr/bin/env bash +mindepth=0 +maxdepth=1 + +. utils/parse_options.sh + +if [ $# -gt 1 ]; then + echo "Usage: $0 --mindepth 0 --maxdepth 1 [exp]" 1>&2 + echo "" + echo "Show the system environments and the evaluation results in Markdown format." + echo 'The default of is "exp/".' + exit 1 +fi + +[ -f ./path.sh ] && . ./path.sh +set -euo pipefail +if [ $# -eq 1 ]; then + exp=$1 +else + exp=exp +fi + + +cat << EOF + +# RESULTS +## Environments +- date: \`$(LC_ALL=C date)\` +EOF + +python3 << EOF +import sys, paddle +pyversion = sys.version.replace('\n', ' ') + +print(f"""- python version: \`{pyversion}\` +- paddle version: \`paddle {paddle.__version__}\`""") +EOF + +cat << EOF +- Git hash: \`$(git rev-parse HEAD)\` + - Commit date: \`$(git log -1 --format='%cd')\` + +EOF + +while IFS= read -r expdir; do + if ls ${expdir}/decode_*/result.txt &> /dev/null; then + # 1. Show the result table + cat << EOF +## $(basename ${expdir}) +### CER + +|dataset|Snt|Wrd|Corr|Sub|Del|Ins|Err|S.Err| +|---|---|---|---|---|---|---|---|---| +EOF + grep -e Avg ${expdir}/decode_*/result.txt \ + | sed -e "s#${expdir}/\([^/]*\)/result.txt:#|\1#g" \ + | sed -e 's#Sum/Avg##g' | tr '|' ' ' | tr -s ' ' '|' + echo + + # 2. Show the result table for WER + if ls ${expdir}/decode_*/result.wrd.txt &> /dev/null; then + cat << EOF +### WER + +|dataset|Snt|Wrd|Corr|Sub|Del|Ins|Err|S.Err| +|---|---|---|---|---|---|---|---|---| +EOF + grep -e Avg ${expdir}/decode_*/result.wrd.txt \ + | sed -e "s#${expdir}/\([^/]*\)/result.wrd.txt:#|\1#g" \ + | sed -e 's#Sum/Avg##g' | tr '|' ' ' | tr -s ' ' '|' + echo + fi + fi +done < <(find ${exp} -mindepth ${mindepth} -maxdepth ${maxdepth} -type d)