add librispeech asr1

4 years ago · 41eeed0450
parent fb6d1e2c11
commit 41eeed0450
20 changed files with 590 additions and 627 deletions
--- a/examples/aishell/asr1/local/align.sh
+++ b/examples/aishell/asr1/local/align.sh
@ -24,7 +24,7 @@ python3 -u ${BIN_DIR}/alignment.py \
 --decode_config ${decode_config_path} \
 --result_file ${output_dir}/${type}.align \
 --checkpoint_path ${ckpt_prefix} \
--opts decoding.decode_batch_size ${batch_size}
+--opts decode.decode_batch_size ${batch_size}
 if [ $? -ne 0 ]; then
    echo "Failed in ctc alignment!"
--- a/examples/aishell/asr1/local/test.sh
+++ b/examples/aishell/asr1/local/test.sh
@ -30,7 +30,7 @@ for type in attention ctc_greedy_search; do
        # stream decoding only support batchsize=1
        batch_size=1
    else
-        batch_size=64
+        batch_size=1
    fi
    output_dir=${ckpt_prefix}
    mkdir -p ${output_dir}
@ -40,8 +40,8 @@ for type in attention ctc_greedy_search; do
    --decode_config ${decode_config_path} \
    --result_file ${output_dir}/${type}.rsl \
    --checkpoint_path ${ckpt_prefix} \
-    --opts decoding.decoding_method ${type} \
+    --opts decode.decoding_method ${type} \
-    --opts decoding.decode_batch_size ${batch_size}
+    --opts decode.decode_batch_size ${batch_size}
    if [ $? -ne 0 ]; then
        echo "Failed in evaluation!"
@ -60,8 +60,8 @@ for type in ctc_prefix_beam_search attention_rescoring; do
    --decode_config ${decode_config_path} \
    --result_file ${output_dir}/${type}.rsl \
    --checkpoint_path ${ckpt_prefix} \
-    --opts decoding.decoding_method ${type} \
+    --opts decode.decoding_method ${type} \
-    --opts decoding.batch_size ${batch_size}
+    --opts decode.decode_batch_size ${batch_size}
    if [ $? -ne 0 ]; then
        echo "Failed in evaluation!"
--- a/examples/aishell/asr1/local/test_wav.sh
+++ b/examples/aishell/asr1/local/test_wav.sh
@ -46,8 +46,8 @@ for type in  attention_rescoring; do
    --decode_config ${decode_config_path} \
    --result_file ${output_dir}/${type}.rsl \
    --checkpoint_path ${ckpt_prefix} \
-    --opts decoding.decoding_method ${type} \
+    --opts decode.decoding_method ${type} \
-    --opts decoding.decode_batch_size ${batch_size} \
+    --opts decode.decode_batch_size ${batch_size} \
    --audio_file ${audio_file}
    if [ $? -ne 0 ]; then
--- a/examples/csmsc/voc5/README.md
+++ b/examples/csmsc/voc5/README.md
@ -125,8 +125,8 @@ HiFiGAN checkpoint contains files listed below.
 ```text
 hifigan_csmsc_ckpt_0.1.1
 ├── default.yaml                  # default config used to train hifigan
-├── feats_stats.npy                  # statistics used to normalize spectrogram when training hifigan
+├── feats_stats.npy                  # generator parameters of hifigan
-└── snapshot_iter_2500000.pdz     # generator parameters of hifigan
+└── snapshot_iter_2500000.pdz     # statistics used to normalize spectrogram when training hifigan
 ```
 ## Acknowledgement
--- a/examples/librispeech/asr1/conf/chunk_conformer.yaml
+++ b/examples/librispeech/asr1/conf/chunk_conformer.yaml
@ -1,5 +1,6 @@
-# network architecture
+############################################
-model:
+#           Network Architecture           #
 ############################################
 cmvn_file: 
 cmvn_file_type: "json"
 # encoder related
@ -42,12 +43,16 @@ model:
    length_normalized_loss: false
-data:
+###########################################
 #                   Data                  #
 ###########################################
 train_manifest: data/manifest.train
 dev_manifest: data/manifest.dev
 test_manifest: data/manifest.test
-collator:
+###########################################
 #              Dataloader                 #
 ###########################################
 vocab_filepath: data/lang_char/vocab.txt 
 unit_type: 'spm'
 spm_model_prefix: 'data/lang_char/bpe_unigram_5000'
@ -71,7 +76,9 @@ collator:
 subsampling_factor: 1
 num_encs: 1
-training:
+###########################################
 #                 Training                #
 ###########################################
 n_epoch: 120
 accum_grad: 8
 global_grad_clip: 5.0
@ -87,17 +94,6 @@ training:
  kbest_n: 50
  latest_n: 5
-decoding:
+
  batch_size: 128
  error_rate_type: wer
  decoding_method: attention  # 'attention', 'ctc_greedy_search', 'ctc_prefix_beam_search', 'attention_rescoring'
  beam_size: 10
  ctc_weight: 0.5 # ctc weight for attention rescoring decode mode.
  decoding_chunk_size: -1 # decoding chunk size. Defaults to -1.
      # <0: for decoding, use full chunk.
      # >0: for decoding, use fixed chunk size as set.
      # 0: used for training, it's prohibited here. 
  num_decoding_left_chunks: -1  # number of left chunks for decoding. Defaults to -1.
  simulate_streaming: true  # simulate streaming inference. Defaults to False.
--- a/examples/librispeech/asr1/conf/chunk_transformer.yaml
+++ b/examples/librispeech/asr1/conf/chunk_transformer.yaml
@ -1,5 +1,6 @@
-# network architecture
+############################################
-model:
+#           Network Architecture           #
 ############################################
 cmvn_file: 
 cmvn_file_type: "json"
 # encoder related
@ -34,13 +35,17 @@ model:
    lsm_weight: 0.1     # label smoothing option
    length_normalized_loss: false
-data:
+###########################################
 #                   Data                  #
 ###########################################
 train_manifest: data/manifest.train
 dev_manifest: data/manifest.dev
 test_manifest: data/manifest.test
-collator:
+###########################################
 #              Dataloader                 #
 ###########################################
 vocab_filepath: data/lang_char/vocab.txt 
 unit_type: 'spm'
 spm_model_prefix: 'data/lang_char/bpe_unigram_5000'
@ -65,7 +70,9 @@ collator:
 num_encs: 1
-training:
+###########################################
 #                 Training                #
 ###########################################
 n_epoch: 120
 accum_grad: 1
 global_grad_clip: 5.0
@ -81,23 +88,3 @@ training:
 checkpoint:
  kbest_n: 50
  latest_n: 5
 decoding:
  batch_size: 64
  error_rate_type: wer
  decoding_method: attention  # 'attention', 'ctc_greedy_search', 'ctc_prefix_beam_search', 'attention_rescoring'
  lang_model_path: data/lm/common_crawl_00.prune01111.trie.klm
  alpha: 2.5
  beta: 0.3
  beam_size: 10
  cutoff_prob: 1.0
  cutoff_top_n: 0
  num_proc_bsearch: 8
  ctc_weight: 0.5 # ctc weight for attention rescoring decode mode.
  decoding_chunk_size: -1 # decoding chunk size. Defaults to -1.
      # <0: for decoding, use full chunk.
      # >0: for decoding, use fixed chunk size as set.
      # 0: used for training, it's prohibited here. 
  num_decoding_left_chunks: -1  # number of left chunks for decoding. Defaults to -1.
  simulate_streaming: true  # simulate streaming inference. Defaults to False.
--- a/examples/librispeech/asr1/conf/conformer.yaml
+++ b/examples/librispeech/asr1/conf/conformer.yaml
@ -1,5 +1,6 @@
-# network architecture
+############################################
-model:
+#           Network Architecture           #
 ############################################
 cmvn_file: 
 cmvn_file_type: "json"
 # encoder related
@ -39,13 +40,17 @@ model:
    length_normalized_loss: false
-data:
+###########################################
 #                   Data                  #
 ###########################################
 train_manifest: data/manifest.train
 dev_manifest: data/manifest.dev
 test_manifest: data/manifest.test-clean
-collator:
+###########################################
 #              Dataloader                 #
 ###########################################
 vocab_filepath: data/lang_char/vocab.txt 
 unit_type: 'spm'
 spm_model_prefix: 'data/lang_char/bpe_unigram_5000'
@ -70,7 +75,9 @@ collator:
 num_encs: 1
-training:
+###########################################
 #                 Training                #
 ###########################################
 n_epoch: 70
 accum_grad: 8
 global_grad_clip: 3.0
@ -88,17 +95,3 @@ training:
  latest_n: 5
 decoding:
  batch_size: 64
  error_rate_type: wer
  decoding_method: attention  # 'attention', 'ctc_greedy_search', 'ctc_prefix_beam_search', 'attention_rescoring'
  beam_size: 10
  ctc_weight: 0.5 # ctc weight for attention rescoring decode mode.
  decoding_chunk_size: -1 # decoding chunk size. Defaults to -1.
      # <0: for decoding, use full chunk.
      # >0: for decoding, use fixed chunk size as set.
      # 0: used for training, it's prohibited here. 
  num_decoding_left_chunks: -1  # number of left chunks for decoding. Defaults to -1.
  simulate_streaming: False  # simulate streaming inference. Defaults to False.
--- a/examples/librispeech/asr1/conf/transformer.yaml
+++ b/examples/librispeech/asr1/conf/transformer.yaml
@ -1,5 +1,6 @@
-# network architecture
+############################################
-model:
+#           Network Architecture           #
 ############################################
 cmvn_file: 
 cmvn_file_type: "json"
 # encoder related
@ -34,18 +35,16 @@ model:
 # https://yaml.org/type/float.html
-data:
+###########################################
 #                   Data                  #
 ###########################################
 train_manifest: data/manifest.train
 dev_manifest: data/manifest.dev
 test_manifest: data/manifest.test-clean
  min_input_len: 0.5  # second
  max_input_len: 30.0 # second
  min_output_len: 0.0 # tokens
  max_output_len: 400.0 # tokens
  min_output_input_ratio: 0.05
  max_output_input_ratio: 100.0
-collator:
+###########################################
 #              Dataloader                 #
 ###########################################
 vocab_filepath: data/lang_char/vocab.txt
 unit_type: 'spm'
 spm_model_prefix: 'data/lang_char/bpe_unigram_5000'
@ -70,7 +69,9 @@ collator:
 num_encs: 1
-training:
+###########################################
 #                 Training                #
 ###########################################
 n_epoch: 120 
 accum_grad: 4
 global_grad_clip: 5.0
@ -86,25 +87,3 @@ training:
 checkpoint:
  kbest_n: 50
  latest_n: 5
 decoding:
  batch_size: 64
  error_rate_type: wer
  decoding_method: attention  # 'attention', 'ctc_greedy_search', 'ctc_prefix_beam_search', 'attention_rescoring'
  lang_model_path: data/lm/common_crawl_00.prune01111.trie.klm
  alpha: 2.5
  beta: 0.3
  beam_size: 10
  cutoff_prob: 1.0
  cutoff_top_n: 0
  num_proc_bsearch: 8
  ctc_weight: 0.5 # ctc weight for attention rescoring decode mode.
  decoding_chunk_size: -1 # decoding chunk size. Defaults to -1.
      # <0: for decoding, use full chunk.
      # >0: for decoding, use fixed chunk size as set.
      # 0: used for training, it's prohibited here. 
  num_decoding_left_chunks: -1  # number of left chunks for decoding. Defaults to -1.
  simulate_streaming: False  # simulate streaming inference. Defaults to False.
--- a/examples/librispeech/asr1/conf/tuning/chunk_decode.yaml
+++ b/examples/librispeech/asr1/conf/tuning/chunk_decode.yaml
@ -0,0 +1,11 @@
 decode_batch_size: 128
 error_rate_type: wer
 decoding_method: attention  # 'attention', 'ctc_greedy_search', 'ctc_prefix_beam_search', 'attention_rescoring'
 beam_size: 10
 ctc_weight: 0.5 # ctc weight for attention rescoring decode mode.
 decoding_chunk_size: -1 # decoding chunk size. Defaults to -1.
    # <0: for decoding, use full chunk.
    # >0: for decoding, use fixed chunk size as set.
    # 0: used for training, it's prohibited here. 
 num_decoding_left_chunks: -1  # number of left chunks for decoding. Defaults to -1.
 simulate_streaming: true  # simulate streaming inference. Defaults to False.
--- a/examples/librispeech/asr1/conf/tuning/decode.yaml
+++ b/examples/librispeech/asr1/conf/tuning/decode.yaml
@ -0,0 +1,11 @@
 decode_batch_size: 64
 error_rate_type: wer
 decoding_method: attention  # 'attention', 'ctc_greedy_search', 'ctc_prefix_beam_search', 'attention_rescoring'
 beam_size: 10
 ctc_weight: 0.5 # ctc weight for attention rescoring decode mode.
 decoding_chunk_size: -1 # decoding chunk size. Defaults to -1.
    # <0: for decoding, use full chunk.
    # >0: for decoding, use fixed chunk size as set.
    # 0: used for training, it's prohibited here. 
 num_decoding_left_chunks: -1  # number of left chunks for decoding. Defaults to -1.
 simulate_streaming: False  # simulate streaming inference. Defaults to False.
--- a/examples/librispeech/asr1/local/align.sh
+++ b/examples/librispeech/asr1/local/align.sh
@ -1,7 +1,7 @@
 #!/bin/bash
-if [ $# != 2 ];then
+if [ $# != 3 ];then
-    echo "usage: ${0} config_path ckpt_path_prefix"
+    echo "usage: ${0} config_path decode_config_path ckpt_path_prefix"
    exit -1
 fi
@ -9,7 +9,8 @@ ngpu=$(echo $CUDA_VISIBLE_DEVICES | awk -F "," '{print NF}')
 echo "using $ngpu gpus..."
 config_path=$1
-ckpt_prefix=$2
+decode_config_path=$2
 ckpt_prefix=$3
 batch_size=1
 output_dir=${ckpt_prefix}
@ -20,9 +21,10 @@ mkdir -p ${output_dir}
 python3 -u ${BIN_DIR}/alignment.py \
 --ngpu ${ngpu} \
 --config ${config_path} \
 --decode_config ${decode_config_path} \
 --result_file ${output_dir}/${type}.align \
 --checkpoint_path ${ckpt_prefix} \
--opts decoding.batch_size ${batch_size}
+--opts decode.decode_batch_size ${batch_size}
 if [ $? -ne 0 ]; then
    echo "Failed in ctc alignment!"
--- a/examples/librispeech/asr1/local/test.sh
+++ b/examples/librispeech/asr1/local/test.sh
@ -15,8 +15,8 @@ recog_set="test-clean"
 stage=0
 stop_stage=100
-if [ $# != 2 ];then
+if [ $# != 3 ];then
-    echo "usage: ${0} config_path ckpt_path_prefix"
+    echo "usage: ${0} config_path decode_config_path ckpt_path_prefix"
    exit -1
 fi
@ -24,7 +24,8 @@ ngpu=$(echo $CUDA_VISIBLE_DEVICES | awk -F "," '{print NF}')
 echo "using $ngpu gpus..."
 config_path=$1
-ckpt_prefix=$2
+decode_config_path=$2
 ckpt_prefix=$3
 chunk_mode=false
 if [[ ${config_path} =~ ^.*chunk_.*yaml$ ]];then
@ -52,10 +53,11 @@ if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then
        python3 -u ${BIN_DIR}/test.py \
            --ngpu ${ngpu} \
            --config ${config_path} \
            --decode_config ${decode_config_path} \
            --result_file ${ckpt_prefix}.${type}.rsl \
            --checkpoint_path ${ckpt_prefix} \
-            --opts decoding.decoding_method ${type} \
+            --opts decode.decoding_method ${type} \
-            --opts decoding.batch_size ${batch_size}
+            --opts decode.decode_batch_size ${batch_size}
        if [ $? -ne 0 ]; then
            echo "Failed in evaluation!"
@ -76,10 +78,11 @@ for type in ctc_greedy_search; do
    python3 -u ${BIN_DIR}/test.py \
        --ngpu ${ngpu} \
        --config ${config_path} \
        --decode_config ${decode_config_path} \
        --result_file ${ckpt_prefix}.${type}.rsl \
        --checkpoint_path ${ckpt_prefix} \
-        --opts decoding.decoding_method ${type} \
+        --opts decode.decoding_method ${type} \
-        --opts decoding.batch_size ${batch_size}
+        --opts decode.decode_batch_size ${batch_size}
    if [ $? -ne 0 ]; then
        echo "Failed in evaluation!"
@ -96,10 +99,11 @@ for type in ctc_prefix_beam_search attention_rescoring; do
    python3 -u ${BIN_DIR}/test.py \
        --ngpu ${ngpu} \
        --config ${config_path} \
        --decode_config ${decode_config_path} \
        --result_file ${ckpt_prefix}.${type}.rsl \
        --checkpoint_path ${ckpt_prefix} \
-        --opts decoding.decoding_method ${type} \
+        --opts decode.decoding_method ${type} \
-        --opts decoding.batch_size ${batch_size}
+        --opts decode.decode_batch_size ${batch_size}
    if [ $? -ne 0 ]; then
        echo "Failed in evaluation!"
--- a/examples/librispeech/asr1/local/test_wav.sh
+++ b/examples/librispeech/asr1/local/test_wav.sh
@ -1,7 +1,7 @@
 #!/bin/bash
-if [ $# != 3 ];then
+if [ $# != 4 ];then
-    echo "usage: ${0} config_path ckpt_path_prefix audio_file"
+    echo "usage: ${0} config_path decode_config_path ckpt_path_prefix audio_file"
    exit -1
 fi
@ -9,8 +9,9 @@ ngpu=$(echo $CUDA_VISIBLE_DEVICES | awk -F "," '{print NF}')
 echo "using $ngpu gpus..."
 config_path=$1
-ckpt_prefix=$2
+decode_config_path=$2
-audio_file=$3
+ckpt_prefix=$3
 audio_file=$4
 mkdir -p data
 wget -nc https://paddlespeech.bj.bcebos.com/datasets/single_wav/en/demo_002_en.wav -P data/
@ -49,10 +50,11 @@ for type in attention_rescoring; do
    python3 -u ${BIN_DIR}/test_wav.py \
    --ngpu ${ngpu} \
    --config ${config_path} \
    --decode_config ${decode_config_path} \
    --result_file ${output_dir}/${type}.rsl \
    --checkpoint_path ${ckpt_prefix} \
-    --opts decoding.decoding_method ${type} \
+    --opts decode.decoding_method ${type} \
-    --opts decoding.batch_size ${batch_size} \
+    --opts decode.decode_batch_size ${batch_size} \
    --audio_file ${audio_file}
    #score_sclite.sh --bpe ${nbpe} --bpemodel ${bpemodel}.model --wer true ${expdir}/${decode_dir} ${dict}
--- a/examples/librispeech/asr1/run.sh
+++ b/examples/librispeech/asr1/run.sh
@ -8,6 +8,7 @@ gpus=0,1,2,3
 stage=0
 stop_stage=50
 conf_path=conf/transformer.yaml
 decode_conf_path=conf/tuning/decode.yaml
 avg_num=30
 audio_file=data/demo_002_en.wav
@ -34,17 +35,17 @@ fi
 if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
    # test ckpt avg_n
-    CUDA_VISIBLE_DEVICES=0 ./local/test.sh ${conf_path} exp/${ckpt}/checkpoints/${avg_ckpt} || exit -1
+    CUDA_VISIBLE_DEVICES=0 ./local/test.sh ${conf_path} ${decode_conf_path} exp/${ckpt}/checkpoints/${avg_ckpt} || exit -1
 fi
 if [ ${stage} -le 4 ] && [ ${stop_stage} -ge 4 ]; then
    # ctc alignment of test data
-    CUDA_VISIBLE_DEVICES=0 ./local/align.sh ${conf_path} exp/${ckpt}/checkpoints/${avg_ckpt} || exit -1
+    CUDA_VISIBLE_DEVICES=0 ./local/align.sh ${conf_path} ${decode_conf_path} exp/${ckpt}/checkpoints/${avg_ckpt} || exit -1
 fi
 if [ ${stage} -le 5 ] && [ ${stop_stage} -ge 5 ]; then
    # test a single .wav file
-    CUDA_VISIBLE_DEVICES=0 ./local/test_wav.sh ${conf_path} exp/${ckpt}/checkpoints/${avg_ckpt} ${audio_file} || exit -1
+    CUDA_VISIBLE_DEVICES=0 ./local/test_wav.sh ${conf_path} ${decode_conf_path} exp/${ckpt}/checkpoints/${avg_ckpt} ${audio_file} || exit -1
 fi
 if [ ${stage} -le 51 ] && [ ${stop_stage} -ge 51 ]; then
--- a/examples/tiny/asr1/conf/conformer.yaml
+++ b/examples/tiny/asr1/conf/conformer.yaml
@ -1,5 +1,7 @@
 # https://yaml.org/type/float.html
-data:
+###########################################
 #                   Data                  #
 ###########################################
 train_manifest: data/manifest.tiny
 dev_manifest: data/manifest.tiny
 test_manifest: data/manifest.tiny
@ -10,7 +12,10 @@ data:
 min_output_input_ratio: 0.05
 max_output_input_ratio: 10.0
-collator:
+
 ###########################################
 #              Dataloader                 #
 ###########################################
 mean_std_filepath: ""
 vocab_filepath: data/lang_char/vocab.txt 
 unit_type: 'spm'
@ -36,8 +41,9 @@ collator:
 num_workers: 2
-# network architecture
+############################################
-model:
+#           Network Architecture           #
 ############################################
 cmvn_file: "data/mean_std.json"
 cmvn_file_type: "json"
 # encoder related
@ -76,7 +82,9 @@ model:
    length_normalized_loss: false
-training:
+###########################################
 #                 training                #
 ###########################################
 n_epoch: 5
 accum_grad: 4
 global_grad_clip: 5.0
@ -94,23 +102,4 @@ training:
  latest_n: 1
 decoding:
  batch_size: 64
  error_rate_type: wer
  decoding_method: attention  # 'attention', 'ctc_greedy_search', 'ctc_prefix_beam_search', 'attention_rescoring'
  lang_model_path: data/lm/common_crawl_00.prune01111.trie.klm
  alpha: 2.5
  beta: 0.3
  beam_size: 10
  cutoff_prob: 1.0
  cutoff_top_n: 0
  num_proc_bsearch: 8
  ctc_weight: 0.5 # ctc weight for attention rescoring decode mode.
  decoding_chunk_size: -1 # decoding chunk size. Defaults to -1.
      # <0: for decoding, use full chunk.
      # >0: for decoding, use fixed chunk size as set.
      # 0: used for training, it's prohibited here. 
  num_decoding_left_chunks: -1  # number of left chunks for decoding. Defaults to -1.
  simulate_streaming: False  # simulate streaming inference. Defaults to False.
--- a/examples/tiny/asr1/conf/transformer.yaml
+++ b/examples/tiny/asr1/conf/transformer.yaml
@ -1,5 +1,7 @@
 # https://yaml.org/type/float.html
-data:
+###########################################
 #                   Data                  #
 ###########################################
 train_manifest: data/manifest.tiny
 dev_manifest: data/manifest.tiny
 test_manifest: data/manifest.tiny
@ -10,7 +12,9 @@ data:
 min_output_input_ratio: 0.05
 max_output_input_ratio: 10.0
-collator:
+###########################################
 #              Dataloader                 #
 ###########################################
 mean_std_filepath: data/mean_std.json
 vocab_filepath: data/lang_char/vocab.txt 
 unit_type: 'spm'
@ -35,8 +39,9 @@ collator:
 shuffle_method: batch_shuffle
 num_workers: 2
-# network architecture
+############################################
-model:
+#           Network Architecture           #
 ############################################
 cmvn_file: 
 cmvn_file_type: "json"
 # encoder related
@ -70,7 +75,9 @@ model:
    length_normalized_loss: false
-training:
+###########################################
 #                 training                #
 ###########################################
 n_epoch: 5
 accum_grad: 1
 global_grad_clip: 5.0
@ -88,23 +95,4 @@ training:
  latest_n: 1
 decoding:
  batch_size: 8 #64
  error_rate_type: wer
  decoding_method: attention  # 'attention', 'ctc_greedy_search', 'ctc_prefix_beam_search', 'attention_rescoring'
  lang_model_path: data/lm/common_crawl_00.prune01111.trie.klm
  alpha: 2.5
  beta: 0.3
  beam_size: 10
  cutoff_prob: 1.0
  cutoff_top_n: 0
  num_proc_bsearch: 8
  ctc_weight: 0.5 # ctc weight for attention rescoring decode mode.
  decoding_chunk_size: -1 # decoding chunk size. Defaults to -1.
      # <0: for decoding, use full chunk.
      # >0: for decoding, use fixed chunk size as set.
      # 0: used for training, it's prohibited here. 
  num_decoding_left_chunks: -1  # number of left chunks for decoding. Defaults to -1.
  simulate_streaming: False  # simulate streaming inference. Defaults to False.
--- a/paddlespeech/s2t/exps/u2/bin/alignment.py
+++ b/paddlespeech/s2t/exps/u2/bin/alignment.py
@ -46,7 +46,7 @@ if __name__ == "__main__":
    if args.decode_config:
        decode_confs = CfgNode(new_allowed=True)
        decode_confs.merge_from_file(args.decode_config)
-        config.decoding = decode_confs
+        config.decode = decode_confs
    if args.opts:
        config.merge_from_list(args.opts)
    config.freeze()
--- a/paddlespeech/s2t/exps/u2/bin/test.py
+++ b/paddlespeech/s2t/exps/u2/bin/test.py
@ -50,7 +50,7 @@ if __name__ == "__main__":
    if args.decode_config:
        decode_confs = CfgNode(new_allowed=True)
        decode_confs.merge_from_file(args.decode_config)
-        config.decoding = decode_confs
+        config.decode = decode_confs
    if args.opts:
        config.merge_from_list(args.opts)
    config.freeze()
--- a/paddlespeech/s2t/exps/u2/bin/test_wav.py
+++ b/paddlespeech/s2t/exps/u2/bin/test_wav.py
@ -81,7 +81,7 @@ class U2Infer():
            ilen = paddle.to_tensor(feat.shape[0])
            xs = paddle.to_tensor(feat, dtype='float32').unsqueeze(axis=0)
-            decode_config = self.config.decoding
+            decode_config = self.config.decode
            result_transcripts = self.model.decode(
                xs,
                ilen,
@ -135,7 +135,7 @@ if __name__ == "__main__":
    if args.decode_config:
        decode_confs = CfgNode(new_allowed=True)
        decode_confs.merge_from_file(args.decode_config)
-        config.decoding = decode_confs
+        config.decode = decode_confs
    if args.opts:
        config.merge_from_list(args.opts)
    config.freeze()
--- a/paddlespeech/s2t/exps/u2/config.py
+++ b/paddlespeech/s2t/exps/u2/config.py
@ -29,7 +29,7 @@ U2Model.params(_C)
 U2Trainer.params(_C)
-_C.decoding = U2Tester.params()
+_C.decode = U2Tester.params()
 def get_cfg_defaults():