refactor aishell scripts

pull/578/head
Hui Zhang 4 years ago
parent 4e94debf69
commit b15b6c6a26

@ -85,4 +85,6 @@ def get_cfg_defaults():
"""Get a yacs CfgNode object with default values for my_project.""" """Get a yacs CfgNode object with default values for my_project."""
# Return a clone so that the defaults will not be altered # Return a clone so that the defaults will not be altered
# This is for the "local variable" use pattern # This is for the "local variable" use pattern
return _C.clone() config = _C.clone()
config.set_new_allowed(True)
return config

@ -18,7 +18,6 @@ from deepspeech.exps.u2.model import U2Trainer
from deepspeech.io.dataset import ManifestDataset from deepspeech.io.dataset import ManifestDataset
from deepspeech.models.u2 import U2Model from deepspeech.models.u2 import U2Model
_C = CfgNode() _C = CfgNode()
_C.data = ManifestDataset.params() _C.data = ManifestDataset.params()
@ -34,4 +33,6 @@ def get_cfg_defaults():
"""Get a yacs CfgNode object with default values for my_project.""" """Get a yacs CfgNode object with default values for my_project."""
# Return a clone so that the defaults will not be altered # Return a clone so that the defaults will not be altered
# This is for the "local variable" use pattern # This is for the "local variable" use pattern
return _C.clone() config = _C.clone()
config.set_new_allowed(True)
return config

@ -149,7 +149,7 @@ class FeatureNormalizer(object):
eps=1e-20): eps=1e-20):
"""Compute mean and std from randomly sampled instances.""" """Compute mean and std from randomly sampled instances."""
paddle.set_device('cpu') paddle.set_device('cpu')
collate_func = CollateFunc(featurize_func) collate_func = CollateFunc(featurize_func)
dataset = AudioDataset(manifest_path, num_samples, self._rng) dataset = AudioDataset(manifest_path, num_samples, self._rng)
data_loader = DataLoader( data_loader = DataLoader(

@ -62,7 +62,6 @@ class U2BaseModel(nn.Module):
# network architecture # network architecture
default = CfgNode() default = CfgNode()
# allow add new item when merge_with_file # allow add new item when merge_with_file
default.set_new_allowed(True)
default.cmvn_file = "" default.cmvn_file = ""
default.cmvn_file_type = "npz" default.cmvn_file_type = "npz"
default.input_dim = 0 default.input_dim = 0
@ -85,7 +84,7 @@ class U2BaseModel(nn.Module):
# activation_type='swish', # activation_type='swish',
# pos_enc_layer_type='rel_pos', # pos_enc_layer_type='rel_pos',
# selfattention_layer_type='rel_selfattn', # selfattention_layer_type='rel_selfattn',
)) ))
# decoder related # decoder related
default.decoder = 'transformer' default.decoder = 'transformer'
default.decoder_conf = CfgNode( default.decoder_conf = CfgNode(

@ -9,7 +9,7 @@ data:
augmentation_config: conf/augmentation.json augmentation_config: conf/augmentation.json
batch_size: 64 batch_size: 64
min_input_len: 0.5 min_input_len: 0.5
max_input_len: 20.0 max_input_len: 20.0 # second
min_output_len: 0.0 min_output_len: 0.0
max_output_len: 400.0 max_output_len: 400.0
min_output_input_ratio: 0.05 min_output_input_ratio: 0.05
@ -30,7 +30,7 @@ data:
keep_transcription_text: False keep_transcription_text: False
sortagrad: True sortagrad: True
shuffle_method: batch_shuffle shuffle_method: batch_shuffle
num_workers: 0 num_workers: 2
# network architecture # network architecture
@ -75,7 +75,7 @@ model:
training: training:
n_epoch: 240 n_epoch: 240
accum_grad: 1 accum_grad: 2
global_grad_clip: 5.0 global_grad_clip: 5.0
optim: adam optim: adam
optim_conf: optim_conf:
@ -89,7 +89,7 @@ training:
decoding: decoding:
batch_size: 16 batch_size: 128
error_rate_type: cer error_rate_type: cer
decoding_method: attention # 'attention', 'ctc_greedy_search', 'ctc_prefix_beam_search', 'attention_rescoring' decoding_method: attention # 'attention', 'ctc_greedy_search', 'ctc_prefix_beam_search', 'attention_rescoring'
lang_model_path: data/lm/common_crawl_00.prune01111.trie.klm lang_model_path: data/lm/common_crawl_00.prune01111.trie.klm

@ -1,24 +1,23 @@
#! /usr/bin/env bash #! /usr/bin/env bash
if [ $# != 2 ];then if [ $# != 2 ]; then
echo "usage: ${0} ckpt_dir avg_num" echo "usage: ${0} ckpt_dir avg_num"
exit -1 exit -1
fi fi
ckpt_path=${1} ckpt_dir=${1}
average_num=${2} average_num=${2}
decode_checkpoint=${ckpt_path}/avg_${average_num}.pdparams decode_checkpoint=${ckpt_dir}/avg_${average_num}.pdparams
python3 -u ${MAIN_ROOT}/utils/avg_model.py \ python3 -u ${MAIN_ROOT}/utils/avg_model.py \
--dst_model ${decode_checkpoint} \ --dst_model ${decode_checkpoint} \
--ckpt_dir ${ckpt_path} \ --ckpt_dir ${ckpt_dir} \
--num ${average_num} \ --num ${average_num} \
--val_best --val_best
if [ $? -ne 0 ]; then if [ $? -ne 0 ]; then
echo "Failed in avg ckpt!" echo "Failed in avg ckpt!"
exit 1 exit 1
fi fi
exit 0 exit 0

@ -1,14 +1,28 @@
#! /usr/bin/env bash #! /usr/bin/env bash
if [ $# != 2 ];then if [ $# != 3 ];then
echo "usage: ${0} ckpt_path jit_model_path" echo "usage: $0 config_path ckpt_prefix jit_model_path"
exit -1 exit -1
fi fi
ngpu=$(echo $CUDA_VISIBLE_DEVICES | awk -F "," '{print NF}')
echo "using $ngpu gpus..."
config_path=$1
ckpt_path_prefix=$2
jit_model_export_path=$3
device=gpu
if [ ngpu == 0 ];then
device=cpu
fi
python3 -u ${BIN_DIR}/export.py \ python3 -u ${BIN_DIR}/export.py \
--config conf/conformer.yaml \ --device ${device} \
--checkpoint_path ${1} \ --nproc ${ngpu} \
--export_path ${2} --config ${config_path} \
--checkpoint_path ${ckpt_path_prefix} \
--export_path ${jit_model_export_path}
if [ $? -ne 0 ]; then if [ $? -ne 0 ]; then

@ -1,10 +1,20 @@
#! /usr/bin/env bash #! /usr/bin/env bash
if [ $# != 1 ];then if [ $# != 2 ];then
echo "usage: ${0} ckpt_path_prefix" echo "usage: ${0} config_path ckpt_path_prefix"
exit -1 exit -1
fi fi
ngpu=$(echo $CUDA_VISIBLE_DEVICES | awk -F "," '{print NF}')
echo "using $ngpu gpus..."
device=gpu
if [ ngpu == 0 ];then
device=cpu
fi
config_path=$1
ckpt_prefix=$2
# download language model # download language model
#bash local/download_lm_ch.sh #bash local/download_lm_ch.sh
#if [ $? -ne 0 ]; then #if [ $? -ne 0 ]; then
@ -12,11 +22,11 @@ fi
#fi #fi
python3 -u ${BIN_DIR}/test.py \ python3 -u ${BIN_DIR}/test.py \
--device 'gpu' \ --device ${device} \
--nproc 1 \ --nproc 1 \
--config conf/conformer.yaml \ --config ${config_path} \
--result_file ${1}.rsl \ --result_file ${ckpt_prefix}.rsl \
--checkpoint_path ${1} --checkpoint_path ${ckpt_prefix}
if [ $? -ne 0 ]; then if [ $? -ne 0 ]; then
echo "Failed in evaluation!" echo "Failed in evaluation!"

@ -1,25 +1,33 @@
#! /usr/bin/env bash #! /usr/bin/env bash
if [ $# != 1 ];then if [ $# != 2 ];then
echo "usage: ${0} ckpt_tag" echo "usage: CUDA_VISIBLE_DEVICES=0 ${0} config_path ckpt_name"
exit -1 exit -1
fi fi
mkdir -p exp
ngpu=$(echo $CUDA_VISIBLE_DEVICES | awk -F "," '{print NF}') ngpu=$(echo $CUDA_VISIBLE_DEVICES | awk -F "," '{print NF}')
echo "using $ngpu gpus..." echo "using $ngpu gpus..."
config_path=$1
ckpt_name=$2
device=gpu
if [ ngpu == 0 ];then
device=cpu
fi
echo "using ${device}..."
mkdir -p exp
python3 -u ${BIN_DIR}/train.py \ python3 -u ${BIN_DIR}/train.py \
--device 'gpu' \ --device ${device} \
--nproc ${ngpu} \ --nproc ${ngpu} \
--config conf/conformer.yaml \ --config ${config_path} \
--output exp/${1} --output exp/${ckpt_name}
if [ $? -ne 0 ]; then if [ $? -ne 0 ]; then
echo "Failed in training!" echo "Failed in training!"
exit 1 exit 1
fi fi
exit 0 exit 0

@ -1,19 +1,37 @@
#!/bin/bash #!/bin/bash
set -e
source path.sh source path.sh
# only demos
# prepare data stage=0
bash ./local/data.sh stop_stage=100
conf_path=conf/conformer.yaml
ckpt=$(basename ${conf_path} | awk -F'.' '{print $1}')
avg_num=20
avg_ckpt=avg_${avg_num}
source ${MAIN_ROOT}/utils/parse_options.sh || exit 1;
if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
# prepare data
bash ./local/data.sh || exit -1
fi
# train model if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
CUDA_VISIBLE_DEVICES=0,1,2,3 bash ./local/train.sh # train model, all `ckpt` under `exp` dir
CUDA_VISIBLE_DEVICES=0,1,2,3 ./local/train.sh ${conf_path} ${ckpt}
fi
# test model if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
CUDA_VISIBLE_DEVICES=0 bash ./local/test.sh # avg n best model
./local/avg.sh exp/${ckpt}/checkpoints ${avg_num}
fi
# infer model if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
CUDA_VISIBLE_DEVICES=0 bash ./local/infer.sh ckpt/checkpoints/step-3284 # test ckpt avg_n
CUDA_VISIBLE_DEVICES=4 ./local/test.sh ${conf_path} exp/${ckpt}/checkpoints/${avg_ckpt} || exit -1
fi
# export model if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ]; then
bash ./local/export.sh ckpt/checkpoints/step-3284 jit.model # export ckpt avg_n
CUDA_VISIBLE_DEVICES= ./local/export.sh ${conf_path} exp/${ckpt}/checkpoints/${avg_ckpt} exp/${ckpt}/checkpoints/${avg_ckpt}.jit
fi

Loading…
Cancel
Save