pull/522/head
Hui Zhang 5 years ago
parent 4292e50622
commit e80c741131

@ -9,31 +9,12 @@ if [ $? -ne 0 ]; then
fi
cd - > /dev/null
# infer
CUDA_VISIBLE_DEVICES=0 \
python3 -u ${MAIN_ROOT}/infer.py \
--num_samples=10 \
--beam_size=300 \
--num_proc_bsearch=8 \
--num_conv_layers=2 \
--num_rnn_layers=3 \
--rnn_layer_size=1024 \
--alpha=2.6 \
--beta=5.0 \
--cutoff_prob=0.99 \
--cutoff_top_n=40 \
--use_gru=True \
--use_gpu=True \
--share_rnn_weights=False \
--infer_manifest="data/manifest.test" \
--mean_std_path="data/mean_std.npz" \
--vocab_path="data/vocab.txt" \
--model_path="checkpoints/step_final" \
--lang_model_path="${MAIN_ROOT}/models/lm/zh_giga.no_cna_cmn.prune01244.klm" \
--decoding_method="ctc_beam_search" \
--error_rate_type="cer" \
--specgram_type="linear"
--device 'gpu' \
--nproc 1 \
--config conf/deepspeech2.yaml \
--checkpoint_path ckpt/checkpoints/step-3283
if [ $? -ne 0 ]; then
echo "Failed in inference!"

@ -4,39 +4,11 @@
# if you wish to resume from an exists model, uncomment --init_from_pretrained_model
export FLAGS_sync_nccl_allreduce=0
#CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 \
#python3 -u ${MAIN_ROOT}/train.py \
#--batch_size=64 \
#--num_epoch=50 \
#--num_conv_layers=2 \
#--num_rnn_layers=3 \
#--rnn_layer_size=1024 \
#--num_iter_print=100 \
#--save_epoch=1 \
#--num_samples=120000 \
#--learning_rate=5e-4 \
#--max_duration=27.0 \
#--min_duration=0.0 \
#--test_off=False \
#--use_sortagrad=True \
#--use_gru=True \
#--use_gpu=True \
#--is_local=True \
#--share_rnn_weights=False \
#--train_manifest="data/manifest.train" \
#--dev_manifest="data/manifest.dev" \
#--mean_std_path="data/mean_std.npz" \
#--vocab_path="data/vocab.txt" \
#--output_model_dir="./checkpoints" \
#--augment_conf_path="${MAIN_ROOT}/conf/augmentation.config" \
#--specgram_type="linear" \
#--shuffle_method="batch_shuffle_clipped" \
python3 -u ${MAIN_ROOT}/train.py \
--device 'gpu' \
--nproc 4 \
--config conf/deepspeech2.yaml \
--output ckpt
--output ckpt-${1}
if [ $? -ne 0 ]; then

@ -21,6 +21,7 @@ import logging
import numpy as np
from collections import defaultdict
from functools import partial
from pathlib import Path
import paddle
from paddle import distributed as dist
@ -449,6 +450,30 @@ class DeepSpeech2Tester(DeepSpeech2Trainer):
error_rate_type, num_ins, num_ins, errors_sum / len_refs)
self.logger.info(msg)
def setup_output_dir(self):
"""Create a directory used for output.
"""
# output dir
if self.args.output:
output_dir = Path(self.args.output).expanduser() / "infer"
output_dir.mkdir(parents=True, exist_ok=True)
else:
output_dir = Path(self.args.checkpoint_path).expanduser().parent / "infer"
output_dir.mkdir(parents=True, exist_ok=True)
self.output_dir = output_dir
# def setup_checkpointer(self):
# """Create a directory used to save checkpoints into.
# It is "checkpoints" inside the output directory.
# """
# # checkpoint dir
# checkpoint_dir = self.output_dir / "checkpoints"
# checkpoint_dir.mkdir(exist_ok=True)
# self.checkpoint_dir = checkpoint_dir
def setup(self):
"""Setup the experiment.
"""
@ -458,7 +483,6 @@ class DeepSpeech2Tester(DeepSpeech2Trainer):
self.setup_output_dir()
self.setup_logger()
self.setup_checkpointer()
self.setup_dataloader()
self.setup_model()
@ -482,7 +506,7 @@ class DeepSpeech2Tester(DeepSpeech2Trainer):
num_rnn_layers=config.model.num_rnn_layers,
rnn_size=config.model.rnn_layer_size,
share_rnn_weights=config.model.share_rnn_weights)
if self.parallel:
model = paddle.DataParallel(model)

@ -688,6 +688,24 @@ class DeepSpeech2(nn.Layer):
probs, vocab_list, decoding_method, lang_model_path, beam_alpha,
beam_beta, beam_size, cutoff_prob, cutoff_top_n, num_processes)
def from_pretrained(self, checkpoint_path):
"""Build a model from a pretrained model.
Parameters
----------
model: nn.Layer
Asr Model.
checkpoint_path: Path or str
The path of pretrained model checkpoint, without extension name.
Returns
-------
Model
The model build from pretrined result.
"""
checkpoint.load_parameters(self, checkpoint_path=checkpoint_path)
return model
def ctc_loss(logits,
labels,

@ -91,6 +91,9 @@ class Trainer():
self.args = args
self.optimizer = None
self.visualizer = None
self.output_dir = None
self.checkpoint_dir = None
self.logger = None
def setup(self):
"""Setup the experiment.
@ -258,6 +261,10 @@ class Trainer():
stream_handler.setFormatter(formatter)
logger.addHandler(stream_handler)
if not hasattr(self, 'output_dir'):
self.logger = logger
return
log_file = self.output_dir / 'worker_{}.log'.format(dist.get_rank())
# file_handler = logging.FileHandler(str(log_file))
# file_handler.setFormatter(formatter)

Loading…
Cancel
Save