From c32cb734a6503c69f10c5557c75deaeb3510e411 Mon Sep 17 00:00:00 2001 From: Junkun Date: Tue, 28 Sep 2021 01:17:26 -0700 Subject: [PATCH 1/2] update the result of TED-EN-ZH --- deepspeech/models/u2_st.py | 5 +++-- examples/ted_en_zh/t0/README.md | 7 ++++++- examples/ted_en_zh/t0/local/test.sh | 3 ++- 3 files changed, 11 insertions(+), 4 deletions(-) diff --git a/deepspeech/models/u2_st.py b/deepspeech/models/u2_st.py index a3d99942..8f87f6da 100644 --- a/deepspeech/models/u2_st.py +++ b/deepspeech/models/u2_st.py @@ -113,7 +113,8 @@ class U2STBaseModel(nn.Layer): asr_weight: float=0.0, ignore_id: int=IGNORE_ID, lsm_weight: float=0.0, - length_normalized_loss: bool=False): + length_normalized_loss: bool=False, + **kwargs): assert 0.0 <= ctc_weight <= 1.0, ctc_weight super().__init__() @@ -650,7 +651,7 @@ class U2STModel(U2STBaseModel): odim=vocab_size, enc_n_units=encoder.output_size(), blank_id=0, - dropout_rate=model_conf['ctc_dropout_rate'], + dropout_rate=model_conf['ctc_dropoutrate'], reduction=True, # sum batch_average=True, # sum / batch_size grad_norm_type=model_conf['ctc_grad_norm_type']) diff --git a/examples/ted_en_zh/t0/README.md b/examples/ted_en_zh/t0/README.md index e2443d36..9bca2643 100644 --- a/examples/ted_en_zh/t0/README.md +++ b/examples/ted_en_zh/t0/README.md @@ -6,5 +6,10 @@ | Data Subset | Duration in Seconds | | --- | --- | | data/manifest.train | 0.942 ~ 60 | -| data/manifest.dev | 1.151 ~ 39 | +| data/manifest.dev | 1.151 ~ 39 | | data/manifest.test | 1.1 ~ 42.746 | + +## Transformer +| Model | Params | Config | Char-BLEU | +| --- | --- | --- | --- | +| Transformer+ASR MTL | 50.26M | conf/transformer_joint_noam.yaml | 17.38 | \ No newline at end of file diff --git a/examples/ted_en_zh/t0/local/test.sh b/examples/ted_en_zh/t0/local/test.sh index 34475085..7235c6f9 100755 --- a/examples/ted_en_zh/t0/local/test.sh +++ b/examples/ted_en_zh/t0/local/test.sh @@ -19,7 +19,8 @@ for type in fullsentence; do --config ${config_path} \ --result_file ${ckpt_prefix}.${type}.rsl \ --checkpoint_path ${ckpt_prefix} \ - --opts decoding.decoding_method ${type} decoding.batch_size ${batch_size} + --opts decoding.decoding_method ${type} \ + --opts decoding.batch_size ${batch_size} if [ $? -ne 0 ]; then echo "Failed in evaluation!" From a0c94209e22eeb515b8eba7b1ef946e5561e93d0 Mon Sep 17 00:00:00 2001 From: Junkun Date: Tue, 28 Sep 2021 17:07:20 -0700 Subject: [PATCH 2/2] update the result of timit --- examples/timit/s1/README.md | 10 ++++++++- examples/timit/s1/conf/transformer.yaml | 30 ++++++++++++------------- examples/timit/s1/local/test.sh | 6 +++-- 3 files changed, 28 insertions(+), 18 deletions(-) diff --git a/examples/timit/s1/README.md b/examples/timit/s1/README.md index 4d9b146a..6d719a7d 100644 --- a/examples/timit/s1/README.md +++ b/examples/timit/s1/README.md @@ -1,3 +1,11 @@ # TIMIT -Results will be organized and updated soon. + + + +### Transformer +| Model | Params | Config | Decode method | PER | +| --- | --- | --- | --- | --- | +| transformer | 5.17M | conf/transformer.yaml | attention | 0.5531 | +| transformer | 5.17M | conf/transformer.yaml | ctc_greedy_search | 0.3922 | +| transformer | 5.17M | conf/transformer.yaml | ctc_prefix_beam_search | 0.3768 | \ No newline at end of file diff --git a/examples/timit/s1/conf/transformer.yaml b/examples/timit/s1/conf/transformer.yaml index c3b51996..a27b3160 100644 --- a/examples/timit/s1/conf/transformer.yaml +++ b/examples/timit/s1/conf/transformer.yaml @@ -3,12 +3,12 @@ data: train_manifest: data/manifest.train dev_manifest: data/manifest.dev test_manifest: data/manifest.test - min_input_len: 0.5 # second - max_input_len: 30.0 # second + min_input_len: 0.0 # second + max_input_len: 10.0 # second min_output_len: 0.0 # tokens - max_output_len: 400.0 # tokens - min_output_input_ratio: 0.05 - max_output_input_ratio: 100.0 + max_output_len: 150.0 # tokens + min_output_input_ratio: 0.005 + max_output_input_ratio: 1000.0 collator: vocab_filepath: data/vocab.txt @@ -42,10 +42,10 @@ model: # encoder related encoder: transformer encoder_conf: - output_size: 256 # dimension of attention + output_size: 128 # dimension of attention attention_heads: 4 - linear_units: 2048 # the number of units of position-wise feed forward - num_blocks: 12 # the number of encoder blocks + linear_units: 1024 # the number of units of position-wise feed forward + num_blocks: 6 # the number of encoder blocks dropout_rate: 0.1 positional_dropout_rate: 0.1 attention_dropout_rate: 0.0 @@ -56,7 +56,7 @@ model: decoder: transformer decoder_conf: attention_heads: 4 - linear_units: 2048 + linear_units: 1024 num_blocks: 6 dropout_rate: 0.1 positional_dropout_rate: 0.1 @@ -65,26 +65,26 @@ model: # hybrid CTC/attention model_conf: - ctc_weight: 0.3 + ctc_weight: 0.5 ctc_dropoutrate: 0.0 - ctc_grad_norm_type: instance + ctc_grad_norm_type: batch lsm_weight: 0.1 # label smoothing option length_normalized_loss: false training: - n_epoch: 120 + n_epoch: 200 accum_grad: 2 global_grad_clip: 5.0 optim: adam optim_conf: - lr: 0.002 + lr: 0.004 weight_decay: 1e-06 scheduler: warmuplr # pytorch v1.1.0+ required scheduler_conf: - warmup_steps: 400 + warmup_steps: 2000 lr_decay: 1.0 - log_interval: 100 + log_interval: 10 checkpoint: kbest_n: 50 latest_n: 5 diff --git a/examples/timit/s1/local/test.sh b/examples/timit/s1/local/test.sh index 868c8fda..05813179 100755 --- a/examples/timit/s1/local/test.sh +++ b/examples/timit/s1/local/test.sh @@ -36,7 +36,8 @@ for type in attention ctc_greedy_search; do --config ${config_path} \ --result_file ${ckpt_prefix}.${type}.rsl \ --checkpoint_path ${ckpt_prefix} \ - --opts decoding.decoding_method ${type} decoding.batch_size ${batch_size} + --opts decoding.decoding_method ${type} \ + --opts decoding.batch_size ${batch_size} if [ $? -ne 0 ]; then echo "Failed in evaluation!" @@ -52,7 +53,8 @@ for type in ctc_prefix_beam_search attention_rescoring; do --config ${config_path} \ --result_file ${ckpt_prefix}.${type}.rsl \ --checkpoint_path ${ckpt_prefix} \ - --opts decoding.decoding_method ${type} decoding.batch_size ${batch_size} + --opts decoding.decoding_method ${type} \ + --opts decoding.batch_size ${batch_size} if [ $? -ne 0 ]; then echo "Failed in evaluation!"