diff --git a/deepspeech/decoders/recog.py b/deepspeech/decoders/recog.py
index dae3cd42..d1ddfc8a 100644
--- a/deepspeech/decoders/recog.py
+++ b/deepspeech/decoders/recog.py
@@ -49,6 +49,21 @@ def load_trained_model(args):
     model = exp.model
     return model, char_list, exp, confs
 
+def get_config(config_path):
+    stream = open(config_path, mode='r', encoding="utf-8")
+    config = yaml.load(stream, Loader=yaml.FullLoader)
+    stream.close()
+    return config
+
+def load_trained_lm(args):
+        lm_args = get_config(args.rnnlm_conf)
+        # NOTE: for a compatibility with less than 0.5.0 version models
+        lm_model_module = getattr(lm_args, "model_module", "default")
+        lm_class = dynamic_import_lm(lm_model_module)
+        lm = lm_class(lm_args.model)
+        model_dict = paddle.load(args.rnnlm)
+        lm.set_state_dict(model_dict)
+        return lm
 
 def recog_v2(args):
     """Decode with custom models that implements ScorerInterface.
@@ -79,18 +94,7 @@ def recog_v2(args):
         preprocess_args={"train": False}, )
 
     if args.rnnlm:
-        lm_path = args.rnnlm
-        lm = TransformerLM(
-            n_vocab=5002,
-            pos_enc=None,
-            embed_unit=128,
-            att_unit=512,
-            head=8,
-            unit=2048,
-            layer=16,
-            dropout_rate=0.5, )
-        model_dict = paddle.load(lm_path)
-        lm.set_state_dict(model_dict)
+        lm = load_trained_lm(args)
         lm.eval()
     else:
         lm = None
diff --git a/deepspeech/models/lm/transformer.py b/deepspeech/models/lm/transformer.py
index b5f7580a..28371ae2 100644
--- a/deepspeech/models/lm/transformer.py
+++ b/deepspeech/models/lm/transformer.py
@@ -42,7 +42,8 @@ class TransformerLM(nn.Layer, LMInterface, BatchScorerInterface):
             dropout_rate: float=0.5,
             emb_dropout_rate: float=0.0,
             att_dropout_rate: float=0.0,
-            tie_weights: bool=False, ):
+            tie_weights: bool=False, 
+            **kwargs):
         nn.Layer.__init__(self)
 
         if pos_enc == "sinusoidal":
diff --git a/examples/librispeech/s2/conf/lm/transformer.yaml b/examples/librispeech/s2/conf/lm/transformer.yaml
new file mode 100644
index 00000000..4349f795
--- /dev/null
+++ b/examples/librispeech/s2/conf/lm/transformer.yaml
@@ -0,0 +1,13 @@
+model_module: transformer
+model:
+    n_vocab: 5002
+    pos_enc: null
+    embed_unit: 128
+    att_unit: 512
+    head: 8
+    unit: 2048
+    layer: 16
+    dropout_rate: 0.5
+    emb_dropout_rate: 0.0
+    att_dropout_rate: 0.0
+    tie_weights: False 
diff --git a/examples/librispeech/s2/local/recog.sh b/examples/librispeech/s2/local/recog.sh
index 62c1479e..f0e96109 100755
--- a/examples/librispeech/s2/local/recog.sh
+++ b/examples/librispeech/s2/local/recog.sh
@@ -11,9 +11,10 @@ tag=
 decode_config=conf/decode/decode.yaml
 
 # lm params
-lang_model=rnnlm.model.best
-lmexpdir=exp/train_rnnlm_pytorch_lm_transformer_cosine_batchsize32_lr1e-4_layer16_unigram5000_ngpu4/
-lmtag='nolm'
+lang_model=transformerLM.pdparams
+lmexpdir=exp/lm/transformer
+rnnlm_config_path=conf/lm/transformer.yaml
+lmtag='transformer'
 
 train_set=train_960
 recog_set="test-clean test-other dev-clean dev-other"
@@ -91,9 +92,9 @@ for dmethd in join_ctc; do
                 --recog-json ${feat_recog_dir}/split${nj}/JOB/manifest.${rtask} \
                 --result-label ${decode_dir}/data.JOB.json \
                 --model-conf ${config_path} \
-                --model ${ckpt_prefix}.pdparams
-
-                #--rnnlm ${lmexpdir}/${lang_model} \
+                --model ${ckpt_prefix}.pdparams \
+                --rnnlm-conf ${rnnlm_config_path} \
+                --rnnlm ${lmexpdir}/${lang_model}
 
         score_sclite.sh --bpe ${nbpe} --bpemodel ${bpemodel} --wer false ${decode_dir} ${dict}
 
diff --git a/examples/librispeech/s2/run.sh b/examples/librispeech/s2/run.sh
index 3c7569fb..61172d25 100755
--- a/examples/librispeech/s2/run.sh
+++ b/examples/librispeech/s2/run.sh
@@ -33,16 +33,24 @@ if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
 fi
 
 if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
-    # test ckpt avg_n
+    # attetion resocre decoder
     ./local/test.sh ${conf_path} ${dict_path} exp/${ckpt}/checkpoints/${avg_ckpt} || exit -1
 fi
 
-if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ]; then
+if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ] && ${use_lm} == true; then
+    # join ctc decoder, use transformerlm to score
+    if [ ! -f exp/lm/transformer/transformerLM.pdparams ]; then
+        wget https://deepspeech.bj.bcebos.com/transformer_lm/transformerLM.pdparams exp/lm/transformer/
+    fi
+    bash local/recog.sh  --ckpt_prefix exp/${ckpt}/checkpoints/${avg_ckpt}
+fi
+
+if [ ${stage} -le 5 ] && [ ${stop_stage} -ge 5 ]; then
     # ctc alignment of test data
     CUDA_VISIBLE_DEVICES=0 ./local/align.sh ${conf_path} ${dict_path} exp/${ckpt}/checkpoints/${avg_ckpt} || exit -1
 fi
 
-if [ ${stage} -le 5 ] && [ ${stop_stage} -ge 5 ]; then
+if [ ${stage} -le 6 ] && [ ${stop_stage} -ge 6 ]; then
     # export ckpt avg_n
     CUDA_VISIBLE_DEVICES= ./local/export.sh ${conf_path} exp/${ckpt}/checkpoints/${avg_ckpt} exp/${ckpt}/checkpoints/${avg_ckpt}.jit
 fi