doc and train config

2 years ago · ac0864515b
parent 559627c8de
commit ac0864515b
6 changed files with 9 additions and 151 deletions
--- a/examples/librispeech/asr3/conf/hubertASR.yaml
+++ b/examples/librispeech/asr3/conf/hubertASR.yaml
@ -1,142 +0,0 @@
-############################################
-#          Network Architecture           #
-############################################
-freeze_hubert: False
-normalize_wav: True
-output_norm: True
-init_type: kaiming_uniform # !Warning: need to convergence
-enc:
-  input_shape: 1024
-  dnn_blocks: 2
-  dnn_neurons: 1024
-  activation: True
-ctc:
-  enc_n_units: 1024
-  blank_id: 0
-  dropout_rate: 0.0
-hubert_params_path: "exp/hubert/pd_hubert_no_fintune.pdparams"
-
-
-task_cfg:
-  label_rate: 50.0
-  sample_rate: 16000
-  normalize: True
-  enable_padding: False
-  max_keep_size: None
-  max_sample_size: 250000
-  min_sample_size: 32000
-  single_target: False
-  random_crop: True
-  pad_audio: False
-
-model_cfg:
-  dropout_input: 0.0
-  final_dropout: 0.0
-  dropout: 0.0
-  attention_dropout: 0.0
-  activation_dropout: 0.1
-  apply_mask: True
-  mask_length: 10
-  mask_prob: 0.5
-  mask_selection: static
-  mask_other: 0.0
-  no_mask_overlap: False
-  mask_channel_length: 64
-  mask_channel_prob: 0.25
-  mask_channel_selection: static
-  mask_channel_other: 0.0
-  no_mask_channel_overlap: False
-  feature_grad_mult: 0.0
-  layerdrop: 0.1
-  normalize: True
-  fp16: True
-  label_rate: 50
-  extractor_mode: layer_norm
-  encoder_layers: 24
-  encoder_embed_dim: 1024
-  encoder_ffn_embed_dim: 4096
-  encoder_attention_heads: 16
-  activation_fn: gelu
-  encoder_layerdrop: 0.1
-  dropout_features: 0.0
-  final_dim: 768
-  untie_final_proj: True
-  layer_norm_first: True
-  conv_feature_layers: "[(512,10,5)] + [(512,3,2)] * 4 + [(512,2,2)] * 2"
-  conv_bias: False
-  logit_temp: 0.1
-  target_glu: False
-  mask_min_space: 1
-  mask_channel_min_space: 1
-  conv_pos: 128
-  conv_pos_groups: 16
-  latent_temp: [2.0, 0.5, 0.999995]
-  skip_masked: False
-  skip_nomask: True
-
-###########################################
-#                   Data                  #
-###########################################
-train_manifest: data/manifest.train-clean-100
-dev_manifest: data/manifest.dev
-test_manifest: data/manifest.test-clean
-
-###########################################
-#              Dataloader                 #
-###########################################
-vocab_filepath: data/lang_char/vocab.txt 
-unit_type: char
-mean_std_filepath: ""
-preprocess_config: conf/preprocess.yaml
-sortagrad: -1 # Feed samples from shortest to longest ; -1: enabled for all epochs 0: disabled other: enabled for other epochs 
-batch_size: 2  # Different batch_size may cause large differences in results
-maxlen_in: 51200000000  # if input length  > maxlen-in batchsize is automatically reduced
-maxlen_out: 1500000  # if output length > maxlen-out batchsize is automatically reduced
-minibatches: 0 # for debug
-batch_count: auto
-batch_bins: 0 
-batch_frames_in: 0
-batch_frames_out: 0
-batch_frames_inout: 0
-num_workers: 0
-subsampling_factor: 1
-num_encs: 1
-dist_sampler: True
-shortest_first: True
-return_lens_rate: True
-  
-############################################
-#             Data Augmentation            #
-############################################
-audio_augment:  # for raw audio 
-  sample_rate: 16000
-  speeds: [95, 100, 105]
-
-###########################################
-#                 Training                #
-###########################################
-n_epoch: 3
-accum_grad: 8
-global_grad_clip: 5.0
-model_optim: adadelta
-model_optim_conf:
-  lr: 1.0
-  epsilon: 1.0e-6
-  rho: 0.95
-model_scheduler: constantlr    
-model_scheduler_conf:
-  warmup_steps: 25000
-  lr_decay: 1.0
-hubert_optim: adadelta
-hubert_optim_conf:
-  lr: 1.0
-  epsilon: 1.0e-6
-  rho: 0.95
-hubert_scheduler: constantlr    
-hubert_scheduler_conf:
-  warmup_steps: 25000
-  lr_decay: 1.0
-log_interval: 1
-checkpoint:
-  kbest_n: 50
-  latest_n: 5
--- a/examples/librispeech/asr3/local/data.sh
+++ b/examples/librispeech/asr3/local/data.sh
@ -1,6 +1,6 @@
 #!/bin/bash

-stage=0
+stage=-1
 stop_stage=100

 unit_type=char
--- a/examples/librispeech/asr3/local/test.sh
+++ b/examples/librispeech/asr3/local/test.sh
@ -31,7 +31,7 @@ python3 utils/format_rsl.py \

 for type in ctc_greedy_search; do
    echo "decoding ${type}"
-    batch_size=8
+    batch_size=16
    python3 -u ${BIN_DIR}/test.py \
        --ngpu ${ngpu} \
        --config ${config_path} \
--- a/examples/librispeech/asr3/run.sh
+++ b/examples/librispeech/asr3/run.sh
@ -6,7 +6,7 @@ set -e

 gpus=0
 stage=0
-stop_stage=4
+stop_stage=0
 conf_path=conf/wav2vec2ASR.yaml
 ips=            #xx.xx.xx.xx,xx.xx.xx.xx
 decode_conf_path=conf/tuning/decode.yaml
--- a/examples/librispeech/asr4/conf/hubertASR.yaml
+++ b/examples/librispeech/asr4/conf/hubertASR.yaml
@ -14,7 +14,7 @@ ctc:
  enc_n_units: 1024
  blank_id: 0
  dropout_rate: 0.0
-hubert_params_path: "exp/hubert/pd_hubert_no_fintune.pdparams"
+hubert_params_path: "exp/hubert/hubert-large-lv60.pdparams"


 task_cfg:
@ -89,9 +89,9 @@ unit_type: char
 mean_std_filepath: ""
 preprocess_config: conf/preprocess.yaml
 sortagrad: -1 # Feed samples from shortest to longest ; -1: enabled for all epochs 0: disabled other: enabled for other epochs 
-batch_size: 2  # Different batch_size may cause large differences in results
-maxlen_in: 51200000000  # if input length  > maxlen-in batchsize is automatically reduced
-maxlen_out: 1500000  # if output length > maxlen-out batchsize is automatically reduced
+batch_size: 4  # Different batch_size may cause large differences in results
+maxlen_in: 1500  # if input length  > maxlen-in batchsize is automatically reduced
+maxlen_out: 150  # if output length > maxlen-out batchsize is automatically reduced
 minibatches: 0 # for debug
 batch_count: auto
 batch_bins: 0 
@ -129,7 +129,7 @@ model_scheduler_conf:
  lr_decay: 1.0
 hubert_optim: adadelta
 hubert_optim_conf:
-  lr: 1.0
+  lr: 0.95
  epsilon: 1.0e-6
  rho: 0.95
 hubert_scheduler: constantlr    
--- a/examples/librispeech/asr4/run.sh
+++ b/examples/librispeech/asr4/run.sh
@ -6,7 +6,7 @@ set -e

 gpus=0
 stage=0
-stop_stage=4
+stop_stage=0
 conf_path=conf/hubertASR.yaml
 ips=            #xx.xx.xx.xx,xx.xx.xx.xx
 decode_conf_path=conf/tuning/decode.yaml