diff --git a/deepspeech/decoders/recog.py b/deepspeech/decoders/recog.py index dae3cd42..d1ddfc8a 100644 --- a/deepspeech/decoders/recog.py +++ b/deepspeech/decoders/recog.py @@ -49,6 +49,21 @@ def load_trained_model(args): model = exp.model return model, char_list, exp, confs +def get_config(config_path): + stream = open(config_path, mode='r', encoding="utf-8") + config = yaml.load(stream, Loader=yaml.FullLoader) + stream.close() + return config + +def load_trained_lm(args): + lm_args = get_config(args.rnnlm_conf) + # NOTE: for a compatibility with less than 0.5.0 version models + lm_model_module = getattr(lm_args, "model_module", "default") + lm_class = dynamic_import_lm(lm_model_module) + lm = lm_class(lm_args.model) + model_dict = paddle.load(args.rnnlm) + lm.set_state_dict(model_dict) + return lm def recog_v2(args): """Decode with custom models that implements ScorerInterface. @@ -79,18 +94,7 @@ def recog_v2(args): preprocess_args={"train": False}, ) if args.rnnlm: - lm_path = args.rnnlm - lm = TransformerLM( - n_vocab=5002, - pos_enc=None, - embed_unit=128, - att_unit=512, - head=8, - unit=2048, - layer=16, - dropout_rate=0.5, ) - model_dict = paddle.load(lm_path) - lm.set_state_dict(model_dict) + lm = load_trained_lm(args) lm.eval() else: lm = None diff --git a/deepspeech/models/lm/transformer.py b/deepspeech/models/lm/transformer.py index b5f7580a..28371ae2 100644 --- a/deepspeech/models/lm/transformer.py +++ b/deepspeech/models/lm/transformer.py @@ -42,7 +42,8 @@ class TransformerLM(nn.Layer, LMInterface, BatchScorerInterface): dropout_rate: float=0.5, emb_dropout_rate: float=0.0, att_dropout_rate: float=0.0, - tie_weights: bool=False, ): + tie_weights: bool=False, + **kwargs): nn.Layer.__init__(self) if pos_enc == "sinusoidal": diff --git a/examples/librispeech/s2/conf/lm/transformer.yaml b/examples/librispeech/s2/conf/lm/transformer.yaml new file mode 100644 index 00000000..4349f795 --- /dev/null +++ b/examples/librispeech/s2/conf/lm/transformer.yaml @@ -0,0 +1,13 @@ +model_module: transformer +model: + n_vocab: 5002 + pos_enc: null + embed_unit: 128 + att_unit: 512 + head: 8 + unit: 2048 + layer: 16 + dropout_rate: 0.5 + emb_dropout_rate: 0.0 + att_dropout_rate: 0.0 + tie_weights: False diff --git a/examples/librispeech/s2/local/recog.sh b/examples/librispeech/s2/local/recog.sh index 62c1479e..f0e96109 100755 --- a/examples/librispeech/s2/local/recog.sh +++ b/examples/librispeech/s2/local/recog.sh @@ -11,9 +11,10 @@ tag= decode_config=conf/decode/decode.yaml # lm params -lang_model=rnnlm.model.best -lmexpdir=exp/train_rnnlm_pytorch_lm_transformer_cosine_batchsize32_lr1e-4_layer16_unigram5000_ngpu4/ -lmtag='nolm' +lang_model=transformerLM.pdparams +lmexpdir=exp/lm/transformer +rnnlm_config_path=conf/lm/transformer.yaml +lmtag='transformer' train_set=train_960 recog_set="test-clean test-other dev-clean dev-other" @@ -91,9 +92,9 @@ for dmethd in join_ctc; do --recog-json ${feat_recog_dir}/split${nj}/JOB/manifest.${rtask} \ --result-label ${decode_dir}/data.JOB.json \ --model-conf ${config_path} \ - --model ${ckpt_prefix}.pdparams - - #--rnnlm ${lmexpdir}/${lang_model} \ + --model ${ckpt_prefix}.pdparams \ + --rnnlm-conf ${rnnlm_config_path} \ + --rnnlm ${lmexpdir}/${lang_model} score_sclite.sh --bpe ${nbpe} --bpemodel ${bpemodel} --wer false ${decode_dir} ${dict} diff --git a/examples/librispeech/s2/run.sh b/examples/librispeech/s2/run.sh index 3c7569fb..61172d25 100755 --- a/examples/librispeech/s2/run.sh +++ b/examples/librispeech/s2/run.sh @@ -33,16 +33,24 @@ if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then fi if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then - # test ckpt avg_n + # attetion resocre decoder ./local/test.sh ${conf_path} ${dict_path} exp/${ckpt}/checkpoints/${avg_ckpt} || exit -1 fi -if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ]; then +if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ] && ${use_lm} == true; then + # join ctc decoder, use transformerlm to score + if [ ! -f exp/lm/transformer/transformerLM.pdparams ]; then + wget https://deepspeech.bj.bcebos.com/transformer_lm/transformerLM.pdparams exp/lm/transformer/ + fi + bash local/recog.sh --ckpt_prefix exp/${ckpt}/checkpoints/${avg_ckpt} +fi + +if [ ${stage} -le 5 ] && [ ${stop_stage} -ge 5 ]; then # ctc alignment of test data CUDA_VISIBLE_DEVICES=0 ./local/align.sh ${conf_path} ${dict_path} exp/${ckpt}/checkpoints/${avg_ckpt} || exit -1 fi -if [ ${stage} -le 5 ] && [ ${stop_stage} -ge 5 ]; then +if [ ${stage} -le 6 ] && [ ${stop_stage} -ge 6 ]; then # export ckpt avg_n CUDA_VISIBLE_DEVICES= ./local/export.sh ${conf_path} exp/${ckpt}/checkpoints/${avg_ckpt} exp/${ckpt}/checkpoints/${avg_ckpt}.jit fi