|
|
@ -3,12 +3,12 @@ data:
|
|
|
|
train_manifest: data/manifest.train
|
|
|
|
train_manifest: data/manifest.train
|
|
|
|
dev_manifest: data/manifest.dev
|
|
|
|
dev_manifest: data/manifest.dev
|
|
|
|
test_manifest: data/manifest.test
|
|
|
|
test_manifest: data/manifest.test
|
|
|
|
min_input_len: 0.5 # second
|
|
|
|
min_input_len: 0.0 # second
|
|
|
|
max_input_len: 30.0 # second
|
|
|
|
max_input_len: 10.0 # second
|
|
|
|
min_output_len: 0.0 # tokens
|
|
|
|
min_output_len: 0.0 # tokens
|
|
|
|
max_output_len: 400.0 # tokens
|
|
|
|
max_output_len: 150.0 # tokens
|
|
|
|
min_output_input_ratio: 0.05
|
|
|
|
min_output_input_ratio: 0.005
|
|
|
|
max_output_input_ratio: 100.0
|
|
|
|
max_output_input_ratio: 1000.0
|
|
|
|
|
|
|
|
|
|
|
|
collator:
|
|
|
|
collator:
|
|
|
|
vocab_filepath: data/vocab.txt
|
|
|
|
vocab_filepath: data/vocab.txt
|
|
|
@ -42,10 +42,10 @@ model:
|
|
|
|
# encoder related
|
|
|
|
# encoder related
|
|
|
|
encoder: transformer
|
|
|
|
encoder: transformer
|
|
|
|
encoder_conf:
|
|
|
|
encoder_conf:
|
|
|
|
output_size: 256 # dimension of attention
|
|
|
|
output_size: 128 # dimension of attention
|
|
|
|
attention_heads: 4
|
|
|
|
attention_heads: 4
|
|
|
|
linear_units: 2048 # the number of units of position-wise feed forward
|
|
|
|
linear_units: 1024 # the number of units of position-wise feed forward
|
|
|
|
num_blocks: 12 # the number of encoder blocks
|
|
|
|
num_blocks: 6 # the number of encoder blocks
|
|
|
|
dropout_rate: 0.1
|
|
|
|
dropout_rate: 0.1
|
|
|
|
positional_dropout_rate: 0.1
|
|
|
|
positional_dropout_rate: 0.1
|
|
|
|
attention_dropout_rate: 0.0
|
|
|
|
attention_dropout_rate: 0.0
|
|
|
@ -56,7 +56,7 @@ model:
|
|
|
|
decoder: transformer
|
|
|
|
decoder: transformer
|
|
|
|
decoder_conf:
|
|
|
|
decoder_conf:
|
|
|
|
attention_heads: 4
|
|
|
|
attention_heads: 4
|
|
|
|
linear_units: 2048
|
|
|
|
linear_units: 1024
|
|
|
|
num_blocks: 6
|
|
|
|
num_blocks: 6
|
|
|
|
dropout_rate: 0.1
|
|
|
|
dropout_rate: 0.1
|
|
|
|
positional_dropout_rate: 0.1
|
|
|
|
positional_dropout_rate: 0.1
|
|
|
@ -65,26 +65,26 @@ model:
|
|
|
|
|
|
|
|
|
|
|
|
# hybrid CTC/attention
|
|
|
|
# hybrid CTC/attention
|
|
|
|
model_conf:
|
|
|
|
model_conf:
|
|
|
|
ctc_weight: 0.3
|
|
|
|
ctc_weight: 0.5
|
|
|
|
ctc_dropoutrate: 0.0
|
|
|
|
ctc_dropoutrate: 0.0
|
|
|
|
ctc_grad_norm_type: instance
|
|
|
|
ctc_grad_norm_type: batch
|
|
|
|
lsm_weight: 0.1 # label smoothing option
|
|
|
|
lsm_weight: 0.1 # label smoothing option
|
|
|
|
length_normalized_loss: false
|
|
|
|
length_normalized_loss: false
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
training:
|
|
|
|
training:
|
|
|
|
n_epoch: 120
|
|
|
|
n_epoch: 200
|
|
|
|
accum_grad: 2
|
|
|
|
accum_grad: 2
|
|
|
|
global_grad_clip: 5.0
|
|
|
|
global_grad_clip: 5.0
|
|
|
|
optim: adam
|
|
|
|
optim: adam
|
|
|
|
optim_conf:
|
|
|
|
optim_conf:
|
|
|
|
lr: 0.002
|
|
|
|
lr: 0.004
|
|
|
|
weight_decay: 1e-06
|
|
|
|
weight_decay: 1e-06
|
|
|
|
scheduler: warmuplr # pytorch v1.1.0+ required
|
|
|
|
scheduler: warmuplr # pytorch v1.1.0+ required
|
|
|
|
scheduler_conf:
|
|
|
|
scheduler_conf:
|
|
|
|
warmup_steps: 400
|
|
|
|
warmup_steps: 2000
|
|
|
|
lr_decay: 1.0
|
|
|
|
lr_decay: 1.0
|
|
|
|
log_interval: 100
|
|
|
|
log_interval: 10
|
|
|
|
checkpoint:
|
|
|
|
checkpoint:
|
|
|
|
kbest_n: 50
|
|
|
|
kbest_n: 50
|
|
|
|
latest_n: 5
|
|
|
|
latest_n: 5
|
|
|
|