From 61102951aaeb3e3e077b46038fc3526100829bea Mon Sep 17 00:00:00 2001
From: zxcd <228587199@qq.com>
Date: Fri, 6 Dec 2024 06:47:17 +0000
Subject: [PATCH] fix

---
 .../aishell/asr1/conf/chunk_conformer.yaml    | 26 ++---
 .../tal_cs/asr1/conf/chunk_conformer.yaml     | 96 +++++++++++++++++++
 2 files changed, 109 insertions(+), 13 deletions(-)
 create mode 100644 examples/tal_cs/asr1/conf/chunk_conformer.yaml

diff --git a/examples/aishell/asr1/conf/chunk_conformer.yaml b/examples/aishell/asr1/conf/chunk_conformer.yaml
index ec23a0e78..b8b300679 100644
--- a/examples/aishell/asr1/conf/chunk_conformer.yaml
+++ b/examples/aishell/asr1/conf/chunk_conformer.yaml
@@ -6,8 +6,8 @@ cmvn_file_type: "json"
 # encoder related
 encoder: conformer
 encoder_conf:
-    output_size: 512    # dimension of attention
-    attention_heads: 8
+    output_size: 256    # dimension of attention
+    attention_heads: 4
     linear_units: 2048  # the number of units of position-wise feed forward
     num_blocks: 12      # the number of encoder blocks
     dropout_rate: 0.1   # sublayer output dropout
@@ -18,7 +18,7 @@ encoder_conf:
     cnn_module_kernel: 15
     use_cnn_module: True
     activation_type: 'swish'
-    pos_enc_layer_type: 'rel_pos'
+    pos_enc_layer_type: 'rel/ssd3/zhaoxi/repo/tmp/PaddleSpeech/examples/tal_cs/asr1/conf/chunk_conformer.yaml_pos'
     selfattention_layer_type: 'rel_selfattn'
     causal: true
     use_dynamic_chunk: true
@@ -27,7 +27,7 @@ encoder_conf:
 # decoder related
 decoder: transformer
 decoder_conf:
-    attention_heads: 8
+    attention_heads: 4
     linear_units: 2048
     num_blocks: 6
     dropout_rate: 0.1  # sublayer output dropout
@@ -55,14 +55,14 @@ test_manifest: data/manifest.test
 ###########################################
 
 vocab_filepath: data/lang_char/vocab.txt 
-spm_model_prefix: 'data/lang_char/bpe_bpe_11297'
-unit_type: 'spm'
+spm_model_prefix: ''
+unit_type: 'char'
 preprocess_config: conf/preprocess.yaml
 feat_dim: 80
-stride_ms: 20.0
-window_ms: 30.0
+stride_ms: 10.0
+window_ms: 25.0
 sortagrad: 0 # Feed samples from shortest to longest ; -1: enabled for all epochs, 0: disabled, other: enabled for 'other' epochs 
-batch_size: 8
+batch_size: 32
 maxlen_in: 512  # if input length  > maxlen-in, batchsize is automatically reduced
 maxlen_out: 150  # if output length > maxlen-out, batchsize is automatically reduced
 minibatches: 0 # for debug
@@ -78,13 +78,13 @@ num_encs: 1
 ###########################################
 #                 Training                #
 ###########################################
-n_epoch: 100 
-accum_grad: 4
+n_epoch: 240 
+accum_grad: 1
 global_grad_clip: 5.0
-dist_sampler: False
+dist_sampler: True
 optim: adam
 optim_conf:
-  lr: 0.002
+  lr: 0.001
   weight_decay: 1.0e-6
 scheduler: warmuplr
 scheduler_conf:
diff --git a/examples/tal_cs/asr1/conf/chunk_conformer.yaml b/examples/tal_cs/asr1/conf/chunk_conformer.yaml
new file mode 100644
index 000000000..ba0dbb49b
--- /dev/null
+++ b/examples/tal_cs/asr1/conf/chunk_conformer.yaml
@@ -0,0 +1,96 @@
+############################################
+#           Network Architecture           #
+############################################
+cmvn_file: 
+cmvn_file_type: "json"
+# encoder related
+encoder: conformer
+encoder_conf:
+    output_size: 512    # dimension of attention
+    attention_heads: 8
+    linear_units: 2048  # the number of units of position-wise feed forward
+    num_blocks: 12      # the number of encoder blocks
+    dropout_rate: 0.1   # sublayer output dropout
+    positional_dropout_rate: 0.1
+    attention_dropout_rate: 0.0
+    input_layer: conv2d # encoder input type, you can chose conv2d, conv2d6 and conv2d8
+    normalize_before: True
+    cnn_module_kernel: 15
+    use_cnn_module: True
+    activation_type: 'swish'
+    pos_enc_layer_type: 'rel_pos'
+    selfattention_layer_type: 'rel_selfattn'
+    causal: true
+    use_dynamic_chunk: true
+    cnn_module_norm: 'layer_norm' # using nn.LayerNorm makes model converge faster
+    use_dynamic_left_chunk: false
+# decoder related
+decoder: transformer
+decoder_conf:
+    attention_heads: 8
+    linear_units: 2048
+    num_blocks: 6
+    dropout_rate: 0.1  # sublayer output dropout
+    positional_dropout_rate: 0.1
+    self_attention_dropout_rate: 0.0
+    src_attention_dropout_rate: 0.0
+# hybrid CTC/attention
+model_conf:
+    ctc_weight: 0.3
+    lsm_weight: 0.1     # label smoothing option
+    length_normalized_loss: false
+    init_type: 'kaiming_uniform' # !Warning: need to convergence
+
+###########################################
+#                   Data                  #
+###########################################
+
+train_manifest: data/manifest.train
+dev_manifest: data/manifest.dev
+test_manifest: data/manifest.test
+
+
+###########################################
+#              Dataloader                 #
+###########################################
+
+vocab_filepath: data/lang_char/vocab.txt 
+spm_model_prefix: 'data/lang_char/bpe_bpe_11297'
+unit_type: 'spm'
+preprocess_config: conf/preprocess.yaml
+feat_dim: 80
+stride_ms: 20.0
+window_ms: 30.0
+sortagrad: 0 # Feed samples from shortest to longest ; -1: enabled for all epochs, 0: disabled, other: enabled for 'other' epochs 
+batch_size: 32
+maxlen_in: 512  # if input length  > maxlen-in, batchsize is automatically reduced
+maxlen_out: 150  # if output length > maxlen-out, batchsize is automatically reduced
+minibatches: 0 # for debug
+batch_count: auto
+batch_bins: 0
+batch_frames_in: 0
+batch_frames_out: 0
+batch_frames_inout: 0
+num_workers: 2
+subsampling_factor: 1
+num_encs: 1
+
+###########################################
+#                 Training                #
+###########################################
+n_epoch: 100 
+accum_grad: 4
+global_grad_clip: 5.0
+dist_sampler: False
+optim: adam
+optim_conf:
+  lr: 0.002
+  weight_decay: 1.0e-6
+scheduler: warmuplr
+scheduler_conf:
+  warmup_steps: 25000
+  lr_decay: 1.0
+log_interval: 100
+checkpoint:
+  kbest_n: 50
+  latest_n: 5