refactor aishell scripts

pull/578/head
Hui Zhang 4 years ago
parent 4e94debf69
commit b15b6c6a26

@ -85,4 +85,6 @@ def get_cfg_defaults():
"""Get a yacs CfgNode object with default values for my_project."""
# Return a clone so that the defaults will not be altered
# This is for the "local variable" use pattern
return _C.clone()
config = _C.clone()
config.set_new_allowed(True)
return config

@ -18,7 +18,6 @@ from deepspeech.exps.u2.model import U2Trainer
from deepspeech.io.dataset import ManifestDataset
from deepspeech.models.u2 import U2Model
_C = CfgNode()
_C.data = ManifestDataset.params()
@ -34,4 +33,6 @@ def get_cfg_defaults():
"""Get a yacs CfgNode object with default values for my_project."""
# Return a clone so that the defaults will not be altered
# This is for the "local variable" use pattern
return _C.clone()
config = _C.clone()
config.set_new_allowed(True)
return config

@ -149,7 +149,7 @@ class FeatureNormalizer(object):
eps=1e-20):
"""Compute mean and std from randomly sampled instances."""
paddle.set_device('cpu')
collate_func = CollateFunc(featurize_func)
dataset = AudioDataset(manifest_path, num_samples, self._rng)
data_loader = DataLoader(

@ -62,7 +62,6 @@ class U2BaseModel(nn.Module):
# network architecture
default = CfgNode()
# allow add new item when merge_with_file
default.set_new_allowed(True)
default.cmvn_file = ""
default.cmvn_file_type = "npz"
default.input_dim = 0
@ -85,7 +84,7 @@ class U2BaseModel(nn.Module):
# activation_type='swish',
# pos_enc_layer_type='rel_pos',
# selfattention_layer_type='rel_selfattn',
))
))
# decoder related
default.decoder = 'transformer'
default.decoder_conf = CfgNode(

@ -9,7 +9,7 @@ data:
augmentation_config: conf/augmentation.json
batch_size: 64
min_input_len: 0.5
max_input_len: 20.0
max_input_len: 20.0 # second
min_output_len: 0.0
max_output_len: 400.0
min_output_input_ratio: 0.05
@ -30,7 +30,7 @@ data:
keep_transcription_text: False
sortagrad: True
shuffle_method: batch_shuffle
num_workers: 0
num_workers: 2
# network architecture
@ -75,7 +75,7 @@ model:
training:
n_epoch: 240
accum_grad: 1
accum_grad: 2
global_grad_clip: 5.0
optim: adam
optim_conf:
@ -89,7 +89,7 @@ training:
decoding:
batch_size: 16
batch_size: 128
error_rate_type: cer
decoding_method: attention # 'attention', 'ctc_greedy_search', 'ctc_prefix_beam_search', 'attention_rescoring'
lang_model_path: data/lm/common_crawl_00.prune01111.trie.klm

@ -1,24 +1,23 @@
#! /usr/bin/env bash
if [ $# != 2 ];then
if [ $# != 2 ]; then
echo "usage: ${0} ckpt_dir avg_num"
exit -1
fi
ckpt_path=${1}
ckpt_dir=${1}
average_num=${2}
decode_checkpoint=${ckpt_path}/avg_${average_num}.pdparams
decode_checkpoint=${ckpt_dir}/avg_${average_num}.pdparams
python3 -u ${MAIN_ROOT}/utils/avg_model.py \
--dst_model ${decode_checkpoint} \
--ckpt_dir ${ckpt_path} \
--ckpt_dir ${ckpt_dir} \
--num ${average_num} \
--val_best
if [ $? -ne 0 ]; then
echo "Failed in avg ckpt!"
exit 1
fi
exit 0

@ -1,14 +1,28 @@
#! /usr/bin/env bash
if [ $# != 2 ];then
echo "usage: ${0} ckpt_path jit_model_path"
if [ $# != 3 ];then
echo "usage: $0 config_path ckpt_prefix jit_model_path"
exit -1
fi
ngpu=$(echo $CUDA_VISIBLE_DEVICES | awk -F "," '{print NF}')
echo "using $ngpu gpus..."
config_path=$1
ckpt_path_prefix=$2
jit_model_export_path=$3
device=gpu
if [ ngpu == 0 ];then
device=cpu
fi
python3 -u ${BIN_DIR}/export.py \
--config conf/conformer.yaml \
--checkpoint_path ${1} \
--export_path ${2}
--device ${device} \
--nproc ${ngpu} \
--config ${config_path} \
--checkpoint_path ${ckpt_path_prefix} \
--export_path ${jit_model_export_path}
if [ $? -ne 0 ]; then

@ -1,10 +1,20 @@
#! /usr/bin/env bash
if [ $# != 1 ];then
echo "usage: ${0} ckpt_path_prefix"
if [ $# != 2 ];then
echo "usage: ${0} config_path ckpt_path_prefix"
exit -1
fi
ngpu=$(echo $CUDA_VISIBLE_DEVICES | awk -F "," '{print NF}')
echo "using $ngpu gpus..."
device=gpu
if [ ngpu == 0 ];then
device=cpu
fi
config_path=$1
ckpt_prefix=$2
# download language model
#bash local/download_lm_ch.sh
#if [ $? -ne 0 ]; then
@ -12,11 +22,11 @@ fi
#fi
python3 -u ${BIN_DIR}/test.py \
--device 'gpu' \
--device ${device} \
--nproc 1 \
--config conf/conformer.yaml \
--result_file ${1}.rsl \
--checkpoint_path ${1}
--config ${config_path} \
--result_file ${ckpt_prefix}.rsl \
--checkpoint_path ${ckpt_prefix}
if [ $? -ne 0 ]; then
echo "Failed in evaluation!"

@ -1,25 +1,33 @@
#! /usr/bin/env bash
if [ $# != 1 ];then
echo "usage: ${0} ckpt_tag"
if [ $# != 2 ];then
echo "usage: CUDA_VISIBLE_DEVICES=0 ${0} config_path ckpt_name"
exit -1
fi
mkdir -p exp
ngpu=$(echo $CUDA_VISIBLE_DEVICES | awk -F "," '{print NF}')
echo "using $ngpu gpus..."
config_path=$1
ckpt_name=$2
device=gpu
if [ ngpu == 0 ];then
device=cpu
fi
echo "using ${device}..."
mkdir -p exp
python3 -u ${BIN_DIR}/train.py \
--device 'gpu' \
--device ${device} \
--nproc ${ngpu} \
--config conf/conformer.yaml \
--output exp/${1}
--config ${config_path} \
--output exp/${ckpt_name}
if [ $? -ne 0 ]; then
echo "Failed in training!"
exit 1
fi
exit 0

@ -1,19 +1,37 @@
#!/bin/bash
set -e
source path.sh
# only demos
# prepare data
bash ./local/data.sh
stage=0
stop_stage=100
conf_path=conf/conformer.yaml
ckpt=$(basename ${conf_path} | awk -F'.' '{print $1}')
avg_num=20
avg_ckpt=avg_${avg_num}
source ${MAIN_ROOT}/utils/parse_options.sh || exit 1;
if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
# prepare data
bash ./local/data.sh || exit -1
fi
# train model
CUDA_VISIBLE_DEVICES=0,1,2,3 bash ./local/train.sh
if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
# train model, all `ckpt` under `exp` dir
CUDA_VISIBLE_DEVICES=0,1,2,3 ./local/train.sh ${conf_path} ${ckpt}
fi
# test model
CUDA_VISIBLE_DEVICES=0 bash ./local/test.sh
if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
# avg n best model
./local/avg.sh exp/${ckpt}/checkpoints ${avg_num}
fi
# infer model
CUDA_VISIBLE_DEVICES=0 bash ./local/infer.sh ckpt/checkpoints/step-3284
if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
# test ckpt avg_n
CUDA_VISIBLE_DEVICES=4 ./local/test.sh ${conf_path} exp/${ckpt}/checkpoints/${avg_ckpt} || exit -1
fi
# export model
bash ./local/export.sh ckpt/checkpoints/step-3284 jit.model
if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ]; then
# export ckpt avg_n
CUDA_VISIBLE_DEVICES= ./local/export.sh ${conf_path} exp/${ckpt}/checkpoints/${avg_ckpt} exp/${ckpt}/checkpoints/${avg_ckpt}.jit
fi

Loading…
Cancel
Save