merge develop_ds2_online

pull/735/head
huangyuxin 4 years ago
commit e8a3913422

@ -30,11 +30,13 @@ def main(config, args):
if __name__ == "__main__": if __name__ == "__main__":
parser = default_argument_parser() parser = default_argument_parser()
parser.add_argument("--model_type")
args = parser.parse_args() args = parser.parse_args()
print_arguments(args) print_arguments(args)
# https://yaml.org/type/float.html # https://yaml.org/type/float.html
config = get_cfg_defaults() config = get_cfg_defaults(args.model_type)
if args.config: if args.config:
config.merge_from_file(args.config) config.merge_from_file(args.config)
if args.opts: if args.opts:

@ -30,11 +30,12 @@ def main(config, args):
if __name__ == "__main__": if __name__ == "__main__":
parser = default_argument_parser() parser = default_argument_parser()
parser.add_argument("--model_type")
args = parser.parse_args() args = parser.parse_args()
print_arguments(args, globals()) print_arguments(args, globals())
# https://yaml.org/type/float.html # https://yaml.org/type/float.html
config = get_cfg_defaults() config = get_cfg_defaults(args.model_type)
if args.config: if args.config:
config.merge_from_file(args.config) config.merge_from_file(args.config)
if args.opts: if args.opts:

@ -35,11 +35,12 @@ def main(config, args):
if __name__ == "__main__": if __name__ == "__main__":
parser = default_argument_parser() parser = default_argument_parser()
parser.add_argument("--model_type")
args = parser.parse_args() args = parser.parse_args()
print_arguments(args, globals()) print_arguments(args, globals())
# https://yaml.org/type/float.html # https://yaml.org/type/float.html
config = get_cfg_defaults() config = get_cfg_defaults(args.model_type)
if args.config: if args.config:
config.merge_from_file(args.config) config.merge_from_file(args.config)
if args.opts: if args.opts:

@ -18,9 +18,12 @@ from deepspeech.exps.deepspeech2.model import DeepSpeech2Trainer
from deepspeech.io.collator import SpeechCollator from deepspeech.io.collator import SpeechCollator
from deepspeech.io.dataset import ManifestDataset from deepspeech.io.dataset import ManifestDataset
from deepspeech.models.ds2 import DeepSpeech2Model from deepspeech.models.ds2 import DeepSpeech2Model
from deepspeech.models.ds2_online import DeepSpeech2ModelOnline
_C = CfgNode()
def get_cfg_defaults(model_type):
_C = CfgNode()
if (model_type == 'offline'):
_C.data = ManifestDataset.params() _C.data = ManifestDataset.params()
_C.collator = SpeechCollator.params() _C.collator = SpeechCollator.params()
@ -30,9 +33,16 @@ _C.model = DeepSpeech2Model.params()
_C.training = DeepSpeech2Trainer.params() _C.training = DeepSpeech2Trainer.params()
_C.decoding = DeepSpeech2Tester.params() _C.decoding = DeepSpeech2Tester.params()
else:
_C.data = ManifestDataset.params()
_C.collator = SpeechCollator.params()
_C.model = DeepSpeech2ModelOnline.params()
_C.training = DeepSpeech2Trainer.params()
def get_cfg_defaults(): _C.decoding = DeepSpeech2Tester.params()
"""Get a yacs CfgNode object with default values for my_project.""" """Get a yacs CfgNode object with default values for my_project."""
# Return a clone so that the defaults will not be altered # Return a clone so that the defaults will not be altered
# This is for the "local variable" use pattern # This is for the "local variable" use pattern

@ -29,6 +29,8 @@ from deepspeech.io.sampler import SortagradBatchSampler
from deepspeech.io.sampler import SortagradDistributedBatchSampler from deepspeech.io.sampler import SortagradDistributedBatchSampler
from deepspeech.models.ds2 import DeepSpeech2InferModel from deepspeech.models.ds2 import DeepSpeech2InferModel
from deepspeech.models.ds2 import DeepSpeech2Model from deepspeech.models.ds2 import DeepSpeech2Model
from deepspeech.models.ds2_online import DeepSpeech2InferModelOnline
from deepspeech.models.ds2_online import DeepSpeech2ModelOnline
from deepspeech.training.gradclip import ClipGradByGlobalNormWithLog from deepspeech.training.gradclip import ClipGradByGlobalNormWithLog
from deepspeech.training.trainer import Trainer from deepspeech.training.trainer import Trainer
from deepspeech.utils import error_rate from deepspeech.utils import error_rate
@ -122,13 +124,27 @@ class DeepSpeech2Trainer(Trainer):
def setup_model(self): def setup_model(self):
config = self.config config = self.config
if self.args.model_type == 'offline':
model = DeepSpeech2Model( model = DeepSpeech2Model(
feat_size=self.train_loader.collate_fn.feature_size, feat_size=self.train_loader.collate_fn.feature_size,
dict_size=self.train_loader.collate_fn.vocab_size, dict_size=self.train_loader.collate_fn.vocab_size,
num_conv_layers=config.model.num_conv_layers, num_conv_layers=config.model.num_conv_layers,
num_rnn_layers=config.model.num_rnn_layers, num_rnn_layers=config.model.num_rnn_layers,
rnn_size=config.model.rnn_layer_size, rnn_size=config.model.rnn_layer_size,
use_gru=config.model.use_gru,
share_rnn_weights=config.model.share_rnn_weights)
elif self.args.model_type == 'online':
model = DeepSpeech2ModelOnline(
feat_size=self.train_loader.collate_fn.feature_size,
dict_size=self.train_loader.collate_fn.vocab_size,
num_conv_layers=config.model.num_conv_layers,
num_rnn_layers=config.model.num_rnn_layers,
num_fc_layers=config.model.num_fc_layers,
fc_layers_size_list=config.model.fc_layers_size_list,
rnn_size=config.model.rnn_layer_size,
use_gru=config.model.use_gru) use_gru=config.model.use_gru)
else:
raise Exception("wrong model type")
if self.parallel: if self.parallel:
model = paddle.DataParallel(model) model = paddle.DataParallel(model)
@ -329,8 +345,14 @@ class DeepSpeech2Tester(DeepSpeech2Trainer):
exit(-1) exit(-1)
def export(self): def export(self):
if self.args.model_type == 'offline':
infer_model = DeepSpeech2InferModel.from_pretrained( infer_model = DeepSpeech2InferModel.from_pretrained(
self.test_loader, self.config, self.args.checkpoint_path) self.test_loader, self.config, self.args.checkpoint_path)
elif self.args.model_type == 'online':
infer_model = DeepSpeech2InferModelOnline.from_pretrained(
self.test_loader, self.config, self.args.checkpoint_path)
else:
raise Exception("wrong model tyep")
infer_model.eval() infer_model.eval()
feat_dim = self.test_loader.collate_fn.feature_size feat_dim = self.test_loader.collate_fn.feature_size
@ -368,13 +390,27 @@ class DeepSpeech2Tester(DeepSpeech2Trainer):
def setup_model(self): def setup_model(self):
config = self.config config = self.config
if self.args.model_type == 'offline':
model = DeepSpeech2Model( model = DeepSpeech2Model(
feat_size=self.test_loader.collate_fn.feature_size, feat_size=self.test_loader.collate_fn.feature_size,
dict_size=self.test_loader.collate_fn.vocab_size, dict_size=self.test_loader.collate_fn.vocab_size,
num_conv_layers=config.model.num_conv_layers, num_conv_layers=config.model.num_conv_layers,
num_rnn_layers=config.model.num_rnn_layers, num_rnn_layers=config.model.num_rnn_layers,
rnn_size=config.model.rnn_layer_size, rnn_size=config.model.rnn_layer_size,
use_gru=config.model.use_gru,
share_rnn_weights=config.model.share_rnn_weights)
elif self.args.model_type == 'online':
model = DeepSpeech2ModelOnline(
feat_size=self.test_loader.collate_fn.feature_size,
dict_size=self.test_loader.collate_fn.vocab_size,
num_conv_layers=config.model.num_conv_layers,
num_rnn_layers=config.model.num_rnn_layers,
num_fc_layers=config.model.num_fc_layers,
fc_layers_size_list=config.model.fc_layers_size_list,
rnn_size=config.model.rnn_layer_size,
use_gru=config.model.use_gru) use_gru=config.model.use_gru)
else:
raise Exception("Wrong model type")
self.model = model self.model = model
logger.info("Setup model!") logger.info("Setup model!")

@ -1,6 +1,6 @@
#!/bin/bash #!/bin/bash
if [ $# != 3 ];then if [ $# != 4 ];then
echo "usage: $0 config_path ckpt_prefix jit_model_path" echo "usage: $0 config_path ckpt_prefix jit_model_path"
exit -1 exit -1
fi fi
@ -11,6 +11,7 @@ echo "using $ngpu gpus..."
config_path=$1 config_path=$1
ckpt_path_prefix=$2 ckpt_path_prefix=$2
jit_model_export_path=$3 jit_model_export_path=$3
model_type=$4
device=gpu device=gpu
if [ ${ngpu} == 0 ];then if [ ${ngpu} == 0 ];then
@ -22,8 +23,8 @@ python3 -u ${BIN_DIR}/export.py \
--nproc ${ngpu} \ --nproc ${ngpu} \
--config ${config_path} \ --config ${config_path} \
--checkpoint_path ${ckpt_path_prefix} \ --checkpoint_path ${ckpt_path_prefix} \
--export_path ${jit_model_export_path} --export_path ${jit_model_export_path} \
--model_type ${model_type}
if [ $? -ne 0 ]; then if [ $? -ne 0 ]; then
echo "Failed in export!" echo "Failed in export!"

@ -1,6 +1,6 @@
#!/bin/bash #!/bin/bash
if [ $# != 2 ];then if [ $# != 3 ];then
echo "usage: ${0} config_path ckpt_path_prefix" echo "usage: ${0} config_path ckpt_path_prefix"
exit -1 exit -1
fi fi
@ -14,6 +14,7 @@ if [ ${ngpu} == 0 ];then
fi fi
config_path=$1 config_path=$1
ckpt_prefix=$2 ckpt_prefix=$2
model_type=$3
# download language model # download language model
bash local/download_lm_en.sh bash local/download_lm_en.sh
@ -26,7 +27,8 @@ python3 -u ${BIN_DIR}/test.py \
--nproc 1 \ --nproc 1 \
--config ${config_path} \ --config ${config_path} \
--result_file ${ckpt_prefix}.rsl \ --result_file ${ckpt_prefix}.rsl \
--checkpoint_path ${ckpt_prefix} --checkpoint_path ${ckpt_prefix} \
--model_type ${model_type}
if [ $? -ne 0 ]; then if [ $? -ne 0 ]; then
echo "Failed in evaluation!" echo "Failed in evaluation!"

@ -1,6 +1,6 @@
#!/bin/bash #!/bin/bash
if [ $# != 2 ];then if [ $# != 3 ];then
echo "usage: CUDA_VISIBLE_DEVICES=0 ${0} config_path ckpt_name" echo "usage: CUDA_VISIBLE_DEVICES=0 ${0} config_path ckpt_name"
exit -1 exit -1
fi fi
@ -10,6 +10,7 @@ echo "using $ngpu gpus..."
config_path=$1 config_path=$1
ckpt_name=$2 ckpt_name=$2
model_type=$3
device=gpu device=gpu
if [ ${ngpu} == 0 ];then if [ ${ngpu} == 0 ];then
@ -22,7 +23,8 @@ python3 -u ${BIN_DIR}/train.py \
--device ${device} \ --device ${device} \
--nproc ${ngpu} \ --nproc ${ngpu} \
--config ${config_path} \ --config ${config_path} \
--output exp/${ckpt_name} --output exp/${ckpt_name} \
--model_type ${model_type}
if [ $? -ne 0 ]; then if [ $? -ne 0 ]; then
echo "Failed in training!" echo "Failed in training!"

@ -7,6 +7,7 @@ stage=0
stop_stage=100 stop_stage=100
conf_path=conf/deepspeech2.yaml conf_path=conf/deepspeech2.yaml
avg_num=1 avg_num=1
model_type=online
source ${MAIN_ROOT}/utils/parse_options.sh || exit 1; source ${MAIN_ROOT}/utils/parse_options.sh || exit 1;
@ -21,7 +22,7 @@ fi
if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
# train model, all `ckpt` under `exp` dir # train model, all `ckpt` under `exp` dir
CUDA_VISIBLE_DEVICES=${gpus} ./local/train.sh ${conf_path} ${ckpt} CUDA_VISIBLE_DEVICES=${gpus} ./local/train.sh ${conf_path} ${ckpt} ${model_type}
fi fi
if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
@ -31,10 +32,10 @@ fi
if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
# test ckpt avg_n # test ckpt avg_n
CUDA_VISIBLE_DEVICES=${gpus} ./local/test.sh ${conf_path} exp/${ckpt}/checkpoints/${avg_ckpt} || exit -1 CUDA_VISIBLE_DEVICES=${gpus} ./local/test.sh ${conf_path} exp/${ckpt}/checkpoints/${avg_ckpt} ${model_type} || exit -1
fi fi
if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ]; then if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ]; then
# export ckpt avg_n # export ckpt avg_n
CUDA_VISIBLE_DEVICES=${gpus} ./local/export.sh ${conf_path} exp/${ckpt}/checkpoints/${avg_ckpt} exp/${ckpt}/checkpoints/${avg_ckpt}.jit CUDA_VISIBLE_DEVICES=${gpus} ./local/export.sh ${conf_path} exp/${ckpt}/checkpoints/${avg_ckpt} exp/${ckpt}/checkpoints/${avg_ckpt}.jit ${model_type}
fi fi

@ -0,0 +1,41 @@
#!/bin/bash
set -e
source path.sh
gpus=7
stage=1
stop_stage=100
conf_path=conf/deepspeech2.yaml
avg_num=1
model_type=online
source ${MAIN_ROOT}/utils/parse_options.sh || exit 1;
avg_ckpt=avg_${avg_num}
ckpt=$(basename ${conf_path} | awk -F'.' '{print $1}') ###ckpt = deepspeech2
echo "checkpoint name ${ckpt}"
if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
# prepare data
bash ./local/data.sh || exit -1
fi
if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
# train model, all `ckpt` under `exp` dir
CUDA_VISIBLE_DEVICES=${gpus} ./local/train.sh ${conf_path} ${ckpt} ${model_type}
fi
if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
# avg n best model
avg.sh exp/${ckpt}/checkpoints ${avg_num}
fi
if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
# test ckpt avg_n
CUDA_VISIBLE_DEVICES=${gpus} ./local/test.sh ${conf_path} exp/${ckpt}/checkpoints/${avg_ckpt} ${model_type} || exit -1
fi
if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ]; then
# export ckpt avg_n
CUDA_VISIBLE_DEVICES=${gpus} ./local/export.sh ${conf_path} exp/${ckpt}/checkpoints/${avg_ckpt} exp/${ckpt}/checkpoints/${avg_ckpt}.jit ${model_type}
fi

@ -19,6 +19,7 @@ import paddle
from deepspeech.models.ds2 import DeepSpeech2Model from deepspeech.models.ds2 import DeepSpeech2Model
class TestDeepSpeech2Model(unittest.TestCase): class TestDeepSpeech2Model(unittest.TestCase):
def setUp(self): def setUp(self):
paddle.set_device('cpu') paddle.set_device('cpu')

Loading…
Cancel
Save