diff --git a/examples/aishell/s0/conf/deepspeech2.yaml b/examples/aishell/s0/conf/deepspeech2.yaml index 5a386b985..f2edeb964 100644 --- a/examples/aishell/s0/conf/deepspeech2.yaml +++ b/examples/aishell/s0/conf/deepspeech2.yaml @@ -22,18 +22,21 @@ data: sortagrad: True shuffle_method: batch_shuffle num_workers: 0 + model: num_conv_layers: 2 num_rnn_layers: 3 rnn_layer_size: 1024 use_gru: True share_rnn_weights: False + training: n_epoch: 50 lr: 2e-3 lr_decay: 0.83 weight_decay: 1e-06 global_grad_clip: 5.0 + decoding: batch_size: 128 error_rate_type: cer diff --git a/examples/aishell/s0/local/infer.sh b/examples/aishell/s0/local/infer.sh deleted file mode 100644 index 8c6a4dca2..000000000 --- a/examples/aishell/s0/local/infer.sh +++ /dev/null @@ -1,27 +0,0 @@ -#! /usr/bin/env bash - -if [[ $# != 1 ]]; then - echo "usage: $0 ckpt-path" - exit -1 -fi - -# download language model -bash local/download_lm_ch.sh -if [ $? -ne 0 ]; then - exit 1 -fi - -python3 -u ${BIN_DIR}/infer.py \ ---device 'gpu' \ ---nproc 1 \ ---config conf/deepspeech2.yaml \ ---checkpoint_path ${1} - - -if [ $? -ne 0 ]; then - echo "Failed in inference!" - exit 1 -fi - - -exit 0 diff --git a/examples/aishell/s0/local/test.sh b/examples/aishell/s0/local/test.sh index 0872ff21e..8c6a4dca2 100644 --- a/examples/aishell/s0/local/test.sh +++ b/examples/aishell/s0/local/test.sh @@ -1,19 +1,25 @@ #! /usr/bin/env bash +if [[ $# != 1 ]]; then + echo "usage: $0 ckpt-path" + exit -1 +fi + # download language model bash local/download_lm_ch.sh if [ $? -ne 0 ]; then exit 1 fi -python3 -u ${BIN_DIR}/test.py \ +python3 -u ${BIN_DIR}/infer.py \ --device 'gpu' \ --nproc 1 \ --config conf/deepspeech2.yaml \ ---output ckpt +--checkpoint_path ${1} + if [ $? -ne 0 ]; then - echo "Failed in evaluation!" + echo "Failed in inference!" exit 1 fi diff --git a/examples/aishell/s1/conf/conformer.yaml b/examples/aishell/s1/conf/conformer.yaml index 85e35484f..40b04ed7b 100644 --- a/examples/aishell/s1/conf/conformer.yaml +++ b/examples/aishell/s1/conf/conformer.yaml @@ -24,7 +24,7 @@ data: n_fft: None stride_ms: 10.0 window_ms: 25.0 - use_dB_normalization: True + use_dB_normalization: False target_dB: -20 random_seed: 0 keep_transcription_text: False @@ -74,7 +74,7 @@ model: training: - n_epoch: 240 + n_epoch: 300 accum_grad: 2 global_grad_clip: 5.0 optim: adam diff --git a/examples/librispeech/s0/local/download_model.sh b/examples/librispeech/s0/local/download_model.sh deleted file mode 100644 index f13bde0f2..000000000 --- a/examples/librispeech/s0/local/download_model.sh +++ /dev/null @@ -1,21 +0,0 @@ -#! /usr/bin/env bash - -. ${MAIN_ROOT}/utils/utility.sh - -DIR=data/pretrain -mkdir -p ${DIR} - -URL='https://deepspeech.bj.bcebos.com/eng_models/librispeech_model_fluid.tar.gz' -MD5=fafb11fe57c3ecd107147056453f5348 -TARGET=${DIR}/librispeech_model_fluid.tar.gz - - -echo "Download LibriSpeech model ..." -download $URL $MD5 $TARGET -if [ $? -ne 0 ]; then - echo "Fail to download LibriSpeech model!" - exit 1 -fi -tar -zxvf $TARGET -C ${DIR} - -exit 0 diff --git a/examples/librispeech/s0/local/export.sh b/examples/librispeech/s0/local/export.sh index 1b5533916..1b19d5720 100644 --- a/examples/librispeech/s0/local/export.sh +++ b/examples/librispeech/s0/local/export.sh @@ -1,18 +1,32 @@ #! /usr/bin/env bash -if [ $# != 2 ];then - echo "usage: export ckpt_path jit_model_path" +if [ $# != 3 ];then + echo "usage: $0 config_path ckpt_prefix jit_model_path" exit -1 fi +ngpu=$(echo $CUDA_VISIBLE_DEVICES | awk -F "," '{print NF}') +echo "using $ngpu gpus..." + +config_path=$1 +ckpt_path_prefix=$2 +jit_model_export_path=$3 + +device=gpu +if [ ngpu == 0 ];then + device=cpu +fi + python3 -u ${BIN_DIR}/export.py \ ---config conf/deepspeech2.yaml \ ---checkpoint_path ${1} \ ---export_path ${2} +--device ${device} \ +--nproc ${ngpu} \ +--config ${config_path} \ +--checkpoint_path ${ckpt_path_prefix} \ +--export_path ${jit_model_export_path} if [ $? -ne 0 ]; then - echo "Failed in evaluation!" + echo "Failed in export!" exit 1 fi diff --git a/examples/librispeech/s0/local/infer.sh b/examples/librispeech/s0/local/infer.sh deleted file mode 100644 index 98b3b016a..000000000 --- a/examples/librispeech/s0/local/infer.sh +++ /dev/null @@ -1,25 +0,0 @@ -#! /usr/bin/env bash - -if [[ $# != 1 ]];then - echo "usage: $0 ckpt-path" - exit -1 -fi - -# download language model -bash local/download_lm_en.sh -if [ $? -ne 0 ]; then - exit 1 -fi - -python3 -u ${BIN_DIR}/infer.py \ ---device 'gpu' \ ---nproc 1 \ ---config conf/deepspeech2.yaml \ ---checkpoint_path ${1} - -if [ $? -ne 0 ]; then - echo "Failed in inference!" - exit 1 -fi - -exit 0 diff --git a/examples/librispeech/s0/local/test.sh b/examples/librispeech/s0/local/test.sh index f39fbaef1..79e05838c 100644 --- a/examples/librispeech/s0/local/test.sh +++ b/examples/librispeech/s0/local/test.sh @@ -1,17 +1,32 @@ #! /usr/bin/env bash +if [ $# != 2 ];then + echo "usage: ${0} config_path ckpt_path_prefix" + exit -1 +fi + +ngpu=$(echo $CUDA_VISIBLE_DEVICES | awk -F "," '{print NF}') +echo "using $ngpu gpus..." + +device=gpu +if [ ngpu == 0 ];then + device=cpu +fi +config_path=$1 +ckpt_prefix=$2 + # download language model bash local/download_lm_en.sh if [ $? -ne 0 ]; then - exit 1 + exit 1 fi python3 -u ${BIN_DIR}/test.py \ ---device 'gpu' \ +--device ${device} \ --nproc 1 \ ---config conf/deepspeech2.yaml \ ---output ckpt - +--config ${config_path} \ +--result_file ${ckpt_prefix}.rsl \ +--checkpoint_path ${ckpt_prefix} if [ $? -ne 0 ]; then echo "Failed in evaluation!" diff --git a/examples/librispeech/s0/local/train.sh b/examples/librispeech/s0/local/train.sh index cbccb1896..a4218aa86 100644 --- a/examples/librispeech/s0/local/train.sh +++ b/examples/librispeech/s0/local/train.sh @@ -1,23 +1,33 @@ #! /usr/bin/env bash -#export FLAGS_sync_nccl_allreduce=0 - -# https://github.com/PaddlePaddle/Paddle/pull/28484 -#export NCCL_SHM_DISABLE=1 +if [ $# != 2 ];then + echo "usage: CUDA_VISIBLE_DEVICES=0 ${0} config_path ckpt_name" + exit -1 +fi -ngpu=$(echo ${CUDA_VISIBLE_DEVICES} | python -c 'import sys; a = sys.stdin.read(); print(len(a.split(",")));') +ngpu=$(echo $CUDA_VISIBLE_DEVICES | awk -F "," '{print NF}') echo "using $ngpu gpus..." +config_path=$1 +ckpt_name=$2 + +device=gpu +if [ ngpu == 0 ];then + device=cpu +fi +echo "using ${device}..." + +mkdir -p exp + python3 -u ${BIN_DIR}/train.py \ ---device 'gpu' \ +--device ${device} \ --nproc ${ngpu} \ ---config conf/deepspeech2.yaml \ ---output ckpt-${1} +--config ${config_path} \ +--output exp/${ckpt_name} if [ $? -ne 0 ]; then echo "Failed in training!" exit 1 fi - exit 0 diff --git a/examples/librispeech/s0/run.sh b/examples/librispeech/s0/run.sh index cf0f41edb..2ee577cd5 100644 --- a/examples/librispeech/s0/run.sh +++ b/examples/librispeech/s0/run.sh @@ -1,19 +1,37 @@ #!/bin/bash set -e - source path.sh -# prepare data -bash ./local/data.sh +stage=0 +stop_stage=100 +conf_path=conf/deepspeech2.yaml +ckpt=$(basename ${conf_path} | awk -F'.' '{print $1}') +avg_num=1 +avg_ckpt=avg_${avg_num} + +source ${MAIN_ROOT}/utils/parse_options.sh || exit 1; + +if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then + # prepare data + bash ./local/data.sh || exit -1 +fi -# train model -CUDA_VISIBLE_DEVICES=0,1,2,3 bash ./local/train.sh +if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then + # train model, all `ckpt` under `exp` dir + CUDA_VISIBLE_DEVICES=4,5,6,7 ./local/train.sh ${conf_path} ${ckpt} +fi -# test model -CUDA_VISIBLE_DEVICES=0 bash ./local/test.sh +if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then + # avg n best model + ./local/avg.sh exp/${ckpt}/checkpoints ${avg_num} +fi -# infer model -CUDA_VISIBLE_DEVICES=0 bash ./local/infer.sh ckpt/checkpoints/step-3284 +if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then + # test ckpt avg_n + CUDA_VISIBLE_DEVICES=7 ./local/test.sh ${conf_path} exp/${ckpt}/checkpoints/${avg_ckpt} || exit -1 +fi -# export model -bash ./local/export.sh ckpt/checkpoints/step-3284 jit.model \ No newline at end of file +if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ]; then + # export ckpt avg_n + CUDA_VISIBLE_DEVICES= ./local/export.sh ${conf_path} exp/${ckpt}/checkpoints/${avg_ckpt} exp/${ckpt}/checkpoints/${avg_ckpt}.jit +fi \ No newline at end of file diff --git a/examples/tiny/s0/local/export.sh b/examples/tiny/s0/local/export.sh index 1b5533916..1b19d5720 100644 --- a/examples/tiny/s0/local/export.sh +++ b/examples/tiny/s0/local/export.sh @@ -1,18 +1,32 @@ #! /usr/bin/env bash -if [ $# != 2 ];then - echo "usage: export ckpt_path jit_model_path" +if [ $# != 3 ];then + echo "usage: $0 config_path ckpt_prefix jit_model_path" exit -1 fi +ngpu=$(echo $CUDA_VISIBLE_DEVICES | awk -F "," '{print NF}') +echo "using $ngpu gpus..." + +config_path=$1 +ckpt_path_prefix=$2 +jit_model_export_path=$3 + +device=gpu +if [ ngpu == 0 ];then + device=cpu +fi + python3 -u ${BIN_DIR}/export.py \ ---config conf/deepspeech2.yaml \ ---checkpoint_path ${1} \ ---export_path ${2} +--device ${device} \ +--nproc ${ngpu} \ +--config ${config_path} \ +--checkpoint_path ${ckpt_path_prefix} \ +--export_path ${jit_model_export_path} if [ $? -ne 0 ]; then - echo "Failed in evaluation!" + echo "Failed in export!" exit 1 fi diff --git a/examples/tiny/s0/local/infer.sh b/examples/tiny/s0/local/infer.sh deleted file mode 100644 index b36f9000a..000000000 --- a/examples/tiny/s0/local/infer.sh +++ /dev/null @@ -1,26 +0,0 @@ -#! /usr/bin/env bash - -if [[ $# != 1 ]];then - echo "usage: $0 ckpt-path" - exit -1 -fi - -# download language model -bash local/download_lm_en.sh -if [ $? -ne 0 ]; then - exit 1 -fi - -python3 -u ${BIN_DIR}/infer.py \ ---device 'gpu' \ ---nproc 1 \ ---config conf/deepspeech2.yaml \ ---checkpoint_path ${1} - - -if [ $? -ne 0 ]; then - echo "Failed in inference!" - exit 1 -fi - -exit 0 diff --git a/examples/tiny/s0/local/test.sh b/examples/tiny/s0/local/test.sh index 8c8c278c6..79e05838c 100644 --- a/examples/tiny/s0/local/test.sh +++ b/examples/tiny/s0/local/test.sh @@ -1,16 +1,32 @@ #! /usr/bin/env bash +if [ $# != 2 ];then + echo "usage: ${0} config_path ckpt_path_prefix" + exit -1 +fi + +ngpu=$(echo $CUDA_VISIBLE_DEVICES | awk -F "," '{print NF}') +echo "using $ngpu gpus..." + +device=gpu +if [ ngpu == 0 ];then + device=cpu +fi +config_path=$1 +ckpt_prefix=$2 + # download language model bash local/download_lm_en.sh if [ $? -ne 0 ]; then - exit 1 + exit 1 fi python3 -u ${BIN_DIR}/test.py \ ---device 'gpu' \ +--device ${device} \ --nproc 1 \ ---config conf/deepspeech2.yaml \ ---output ckpt +--config ${config_path} \ +--result_file ${ckpt_prefix}.rsl \ +--checkpoint_path ${ckpt_prefix} if [ $? -ne 0 ]; then echo "Failed in evaluation!" diff --git a/examples/tiny/s0/local/train.sh b/examples/tiny/s0/local/train.sh index af62ae55f..f8c9dbc0b 100644 --- a/examples/tiny/s0/local/train.sh +++ b/examples/tiny/s0/local/train.sh @@ -1,17 +1,32 @@ #! /usr/bin/env bash -export FLAGS_sync_nccl_allreduce=0 +if [ $# != 2 ];then + echo "usage: CUDA_VISIBLE_DEVICES=0 ${0} config_path ckpt_name" + exit -1 +fi + +ngpu=$(echo $CUDA_VISIBLE_DEVICES | awk -F "," '{print NF}') +echo "using $ngpu gpus..." + +config_path=$1 +ckpt_name=$2 + +device=gpu +if [ ngpu == 0 ];then + device=cpu +fi + +mkdir -p exp python3 -u ${BIN_DIR}/train.py \ ---device 'gpu' \ ---nproc 1 \ ---config conf/deepspeech2.yaml \ ---output ckpt +--device ${device} \ +--nproc ${ngpu} \ +--config ${config_path} \ +--output exp/${ckpt_name} if [ $? -ne 0 ]; then echo "Failed in training!" exit 1 fi - exit 0 diff --git a/examples/tiny/s0/run.sh b/examples/tiny/s0/run.sh index 2b5ed5308..b14d30262 100644 --- a/examples/tiny/s0/run.sh +++ b/examples/tiny/s0/run.sh @@ -1,16 +1,37 @@ #!/bin/bash set -e - source path.sh -# prepare data -bash ./local/data.sh +stage=0 +stop_stage=100 +conf_path=conf/deepspeech2.yaml +ckpt=$(basename ${conf_path} | awk -F'.' '{print $1}') +avg_num=1 +avg_ckpt=avg_${avg_num} + +source ${MAIN_ROOT}/utils/parse_options.sh || exit 1; + +if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then + # prepare data + bash ./local/data.sh || exit -1 +fi + +if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then + # train model, all `ckpt` under `exp` dir + CUDA_VISIBLE_DEVICES=0 ./local/train.sh ${conf_path} ${ckpt} +fi -# train model -bash ./local/train.sh +if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then + # avg n best model + ./local/avg.sh exp/${ckpt}/checkpoints ${avg_num} +fi -# test model -bash ./local/test.sh +if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then + # test ckpt avg_n + CUDA_VISIBLE_DEVICES=0 ./local/test.sh ${conf_path} exp/${ckpt}/checkpoints/${avg_ckpt} || exit -1 +fi -# infer model -bash ./local/infer.sh +if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ]; then + # export ckpt avg_n + CUDA_VISIBLE_DEVICES= ./local/export.sh ${conf_path} exp/${ckpt}/checkpoints/${avg_ckpt} exp/${ckpt}/checkpoints/${avg_ckpt}.jit +fi