From c32cb734a6503c69f10c5557c75deaeb3510e411 Mon Sep 17 00:00:00 2001 From: Junkun Date: Tue, 28 Sep 2021 01:17:26 -0700 Subject: [PATCH 1/4] update the result of TED-EN-ZH --- deepspeech/models/u2_st.py | 5 +++-- examples/ted_en_zh/t0/README.md | 7 ++++++- examples/ted_en_zh/t0/local/test.sh | 3 ++- 3 files changed, 11 insertions(+), 4 deletions(-) diff --git a/deepspeech/models/u2_st.py b/deepspeech/models/u2_st.py index a3d99942..8f87f6da 100644 --- a/deepspeech/models/u2_st.py +++ b/deepspeech/models/u2_st.py @@ -113,7 +113,8 @@ class U2STBaseModel(nn.Layer): asr_weight: float=0.0, ignore_id: int=IGNORE_ID, lsm_weight: float=0.0, - length_normalized_loss: bool=False): + length_normalized_loss: bool=False, + **kwargs): assert 0.0 <= ctc_weight <= 1.0, ctc_weight super().__init__() @@ -650,7 +651,7 @@ class U2STModel(U2STBaseModel): odim=vocab_size, enc_n_units=encoder.output_size(), blank_id=0, - dropout_rate=model_conf['ctc_dropout_rate'], + dropout_rate=model_conf['ctc_dropoutrate'], reduction=True, # sum batch_average=True, # sum / batch_size grad_norm_type=model_conf['ctc_grad_norm_type']) diff --git a/examples/ted_en_zh/t0/README.md b/examples/ted_en_zh/t0/README.md index e2443d36..9bca2643 100644 --- a/examples/ted_en_zh/t0/README.md +++ b/examples/ted_en_zh/t0/README.md @@ -6,5 +6,10 @@ | Data Subset | Duration in Seconds | | --- | --- | | data/manifest.train | 0.942 ~ 60 | -| data/manifest.dev | 1.151 ~ 39 | +| data/manifest.dev | 1.151 ~ 39 | | data/manifest.test | 1.1 ~ 42.746 | + +## Transformer +| Model | Params | Config | Char-BLEU | +| --- | --- | --- | --- | +| Transformer+ASR MTL | 50.26M | conf/transformer_joint_noam.yaml | 17.38 | \ No newline at end of file diff --git a/examples/ted_en_zh/t0/local/test.sh b/examples/ted_en_zh/t0/local/test.sh index 34475085..7235c6f9 100755 --- a/examples/ted_en_zh/t0/local/test.sh +++ b/examples/ted_en_zh/t0/local/test.sh @@ -19,7 +19,8 @@ for type in fullsentence; do --config ${config_path} \ --result_file ${ckpt_prefix}.${type}.rsl \ --checkpoint_path ${ckpt_prefix} \ - --opts decoding.decoding_method ${type} decoding.batch_size ${batch_size} + --opts decoding.decoding_method ${type} \ + --opts decoding.batch_size ${batch_size} if [ $? -ne 0 ]; then echo "Failed in evaluation!" From a0c94209e22eeb515b8eba7b1ef946e5561e93d0 Mon Sep 17 00:00:00 2001 From: Junkun Date: Tue, 28 Sep 2021 17:07:20 -0700 Subject: [PATCH 2/4] update the result of timit --- examples/timit/s1/README.md | 10 ++++++++- examples/timit/s1/conf/transformer.yaml | 30 ++++++++++++------------- examples/timit/s1/local/test.sh | 6 +++-- 3 files changed, 28 insertions(+), 18 deletions(-) diff --git a/examples/timit/s1/README.md b/examples/timit/s1/README.md index 4d9b146a..6d719a7d 100644 --- a/examples/timit/s1/README.md +++ b/examples/timit/s1/README.md @@ -1,3 +1,11 @@ # TIMIT -Results will be organized and updated soon. + + + +### Transformer +| Model | Params | Config | Decode method | PER | +| --- | --- | --- | --- | --- | +| transformer | 5.17M | conf/transformer.yaml | attention | 0.5531 | +| transformer | 5.17M | conf/transformer.yaml | ctc_greedy_search | 0.3922 | +| transformer | 5.17M | conf/transformer.yaml | ctc_prefix_beam_search | 0.3768 | \ No newline at end of file diff --git a/examples/timit/s1/conf/transformer.yaml b/examples/timit/s1/conf/transformer.yaml index c3b51996..a27b3160 100644 --- a/examples/timit/s1/conf/transformer.yaml +++ b/examples/timit/s1/conf/transformer.yaml @@ -3,12 +3,12 @@ data: train_manifest: data/manifest.train dev_manifest: data/manifest.dev test_manifest: data/manifest.test - min_input_len: 0.5 # second - max_input_len: 30.0 # second + min_input_len: 0.0 # second + max_input_len: 10.0 # second min_output_len: 0.0 # tokens - max_output_len: 400.0 # tokens - min_output_input_ratio: 0.05 - max_output_input_ratio: 100.0 + max_output_len: 150.0 # tokens + min_output_input_ratio: 0.005 + max_output_input_ratio: 1000.0 collator: vocab_filepath: data/vocab.txt @@ -42,10 +42,10 @@ model: # encoder related encoder: transformer encoder_conf: - output_size: 256 # dimension of attention + output_size: 128 # dimension of attention attention_heads: 4 - linear_units: 2048 # the number of units of position-wise feed forward - num_blocks: 12 # the number of encoder blocks + linear_units: 1024 # the number of units of position-wise feed forward + num_blocks: 6 # the number of encoder blocks dropout_rate: 0.1 positional_dropout_rate: 0.1 attention_dropout_rate: 0.0 @@ -56,7 +56,7 @@ model: decoder: transformer decoder_conf: attention_heads: 4 - linear_units: 2048 + linear_units: 1024 num_blocks: 6 dropout_rate: 0.1 positional_dropout_rate: 0.1 @@ -65,26 +65,26 @@ model: # hybrid CTC/attention model_conf: - ctc_weight: 0.3 + ctc_weight: 0.5 ctc_dropoutrate: 0.0 - ctc_grad_norm_type: instance + ctc_grad_norm_type: batch lsm_weight: 0.1 # label smoothing option length_normalized_loss: false training: - n_epoch: 120 + n_epoch: 200 accum_grad: 2 global_grad_clip: 5.0 optim: adam optim_conf: - lr: 0.002 + lr: 0.004 weight_decay: 1e-06 scheduler: warmuplr # pytorch v1.1.0+ required scheduler_conf: - warmup_steps: 400 + warmup_steps: 2000 lr_decay: 1.0 - log_interval: 100 + log_interval: 10 checkpoint: kbest_n: 50 latest_n: 5 diff --git a/examples/timit/s1/local/test.sh b/examples/timit/s1/local/test.sh index 868c8fda..05813179 100755 --- a/examples/timit/s1/local/test.sh +++ b/examples/timit/s1/local/test.sh @@ -36,7 +36,8 @@ for type in attention ctc_greedy_search; do --config ${config_path} \ --result_file ${ckpt_prefix}.${type}.rsl \ --checkpoint_path ${ckpt_prefix} \ - --opts decoding.decoding_method ${type} decoding.batch_size ${batch_size} + --opts decoding.decoding_method ${type} \ + --opts decoding.batch_size ${batch_size} if [ $? -ne 0 ]; then echo "Failed in evaluation!" @@ -52,7 +53,8 @@ for type in ctc_prefix_beam_search attention_rescoring; do --config ${config_path} \ --result_file ${ckpt_prefix}.${type}.rsl \ --checkpoint_path ${ckpt_prefix} \ - --opts decoding.decoding_method ${type} decoding.batch_size ${batch_size} + --opts decoding.decoding_method ${type} \ + --opts decoding.batch_size ${batch_size} if [ $? -ne 0 ]; then echo "Failed in evaluation!" From 251d32a60947dd675bfe18351c72ac4ebd2597b4 Mon Sep 17 00:00:00 2001 From: Hui Zhang Date: Wed, 29 Sep 2021 09:00:07 +0000 Subject: [PATCH 3/4] fix timit scripts; reader filtype case; --- deepspeech/io/reader.py | 2 +- deepspeech/utils/error_rate.py | 3 + deepspeech/utils/tensor_utils.py | 13 ++- examples/ted_en_zh/t0/README.md | 2 +- examples/ted_en_zh/t0/local/data.sh | 2 + examples/timit/s1/.gitignore | 3 + examples/timit/s1/README.md | 12 +-- examples/timit/s1/local/test.sh | 103 +++++++++++++------- examples/timit/s1/local/timit_data_prep.sh | 0 examples/timit/s1/local/timit_norm_trans.pl | 0 examples/timit/s1/run.sh | 8 +- requirements.txt | 1 + 12 files changed, 97 insertions(+), 52 deletions(-) create mode 100644 examples/timit/s1/.gitignore mode change 100644 => 100755 examples/timit/s1/local/timit_data_prep.sh mode change 100644 => 100755 examples/timit/s1/local/timit_norm_trans.pl diff --git a/deepspeech/io/reader.py b/deepspeech/io/reader.py index e7c43a78..5873788b 100644 --- a/deepspeech/io/reader.py +++ b/deepspeech/io/reader.py @@ -322,7 +322,7 @@ class LoadInputsAndTargets(): "Not supported: loader_type={}".format(filetype)) def file_type(self, filepath): - suffix = filepath.split(":")[0].split('.')[-1] + suffix = filepath.split(":")[0].split('.')[-1].lower() if suffix == 'ark': return 'mat' elif suffix == 'scp': diff --git a/deepspeech/utils/error_rate.py b/deepspeech/utils/error_rate.py index b6399bab..6fd593eb 100644 --- a/deepspeech/utils/error_rate.py +++ b/deepspeech/utils/error_rate.py @@ -14,6 +14,7 @@ """This module provides functions to calculate error rate in different level. e.g. wer for word-level, cer for char-level. """ +import editdistance import numpy as np __all__ = ['word_errors', 'char_errors', 'wer', 'cer'] @@ -89,6 +90,7 @@ def word_errors(reference, hypothesis, ignore_case=False, delimiter=' '): hyp_words = list(filter(None, hypothesis.split(delimiter))) edit_distance = _levenshtein_distance(ref_words, hyp_words) + # edit_distance = editdistance.eval(ref_words, hyp_words) return float(edit_distance), len(ref_words) @@ -119,6 +121,7 @@ def char_errors(reference, hypothesis, ignore_case=False, remove_space=False): hypothesis = join_char.join(list(filter(None, hypothesis.split(' ')))) edit_distance = _levenshtein_distance(reference, hypothesis) + # edit_distance = editdistance.eval(reference, hypothesis) return float(edit_distance), len(reference) diff --git a/deepspeech/utils/tensor_utils.py b/deepspeech/utils/tensor_utils.py index 61798816..0050794c 100644 --- a/deepspeech/utils/tensor_utils.py +++ b/deepspeech/utils/tensor_utils.py @@ -93,20 +93,25 @@ def pad_sequence(sequences: List[paddle.Tensor], for i, tensor in enumerate(sequences): length = tensor.shape[0] # use index notation to prevent duplicate references to the tensor + logger.info( + f"length {length}, out_tensor {out_tensor.shape}, tensor {tensor.shape}" + ) if batch_first: # TODO (Hui Zhang): set_value op not supprot `end==start` + # TODO (Hui Zhang): set_value op not support int16 + # TODO (Hui Zhang): set_varbase 2 rank not support [0,0,...] # out_tensor[i, :length, ...] = tensor if length != 0: - out_tensor[i, :length, ...] = tensor + out_tensor[i, :length] = tensor else: - out_tensor[i, length, ...] = tensor + out_tensor[i, length] = tensor else: # TODO (Hui Zhang): set_value op not supprot `end==start` # out_tensor[:length, i, ...] = tensor if length != 0: - out_tensor[:length, i, ...] = tensor + out_tensor[:length, i] = tensor else: - out_tensor[length, i, ...] = tensor + out_tensor[length, i] = tensor return out_tensor diff --git a/examples/ted_en_zh/t0/README.md b/examples/ted_en_zh/t0/README.md index 9bca2643..66a5dbec 100644 --- a/examples/ted_en_zh/t0/README.md +++ b/examples/ted_en_zh/t0/README.md @@ -12,4 +12,4 @@ ## Transformer | Model | Params | Config | Char-BLEU | | --- | --- | --- | --- | -| Transformer+ASR MTL | 50.26M | conf/transformer_joint_noam.yaml | 17.38 | \ No newline at end of file +| Transformer+ASR MTL | 50.26M | conf/transformer_joint_noam.yaml | 17.38 | diff --git a/examples/ted_en_zh/t0/local/data.sh b/examples/ted_en_zh/t0/local/data.sh index 43911c34..96aa745a 100755 --- a/examples/ted_en_zh/t0/local/data.sh +++ b/examples/ted_en_zh/t0/local/data.sh @@ -1,5 +1,7 @@ #!/bin/bash +set -e + stage=-1 stop_stage=100 diff --git a/examples/timit/s1/.gitignore b/examples/timit/s1/.gitignore new file mode 100644 index 00000000..7a9843bc --- /dev/null +++ b/examples/timit/s1/.gitignore @@ -0,0 +1,3 @@ +data +exp +test.profile diff --git a/examples/timit/s1/README.md b/examples/timit/s1/README.md index 6d719a7d..d516040d 100644 --- a/examples/timit/s1/README.md +++ b/examples/timit/s1/README.md @@ -1,11 +1,9 @@ # TIMIT - - - ### Transformer -| Model | Params | Config | Decode method | PER | +| Model | Params | Config | Decode method | Loss | PER | | --- | --- | --- | --- | --- | -| transformer | 5.17M | conf/transformer.yaml | attention | 0.5531 | -| transformer | 5.17M | conf/transformer.yaml | ctc_greedy_search | 0.3922 | -| transformer | 5.17M | conf/transformer.yaml | ctc_prefix_beam_search | 0.3768 | \ No newline at end of file +| transformer | 5.17M | conf/transformer.yaml | attention | 49.25688171386719 | 0.510742 | +| transformer | 5.17M | conf/transformer.yaml | ctc_greedy_search | 49.25688171386719 | 0.382398 | +| transformer | 5.17M | conf/transformer.yaml | ctc_prefix_beam_search | 49.25688171386719 | 0.367429 | +| transformer | 5.17M | conf/transformer.yaml | attention_rescore | 49.25688171386719 | 0.357173 | diff --git a/examples/timit/s1/local/test.sh b/examples/timit/s1/local/test.sh index 05813179..575bff57 100755 --- a/examples/timit/s1/local/test.sh +++ b/examples/timit/s1/local/test.sh @@ -1,10 +1,18 @@ #!/bin/bash +set -e + +stage=0 +stop_stage=50 + +. ${MAIN_ROOT}/utils/parse_options.sh || exit 1; + if [ $# != 2 ];then echo "usage: ${0} config_path ckpt_path_prefix" exit -1 fi + ngpu=$(echo $CUDA_VISIBLE_DEVICES | awk -F "," '{print NF}') echo "using $ngpu gpus..." @@ -23,44 +31,67 @@ fi # exit 1 #fi -for type in attention ctc_greedy_search; do - echo "decoding ${type}" - if [ ${chunk_mode} == true ];then - # stream decoding only support batchsize=1 - batch_size=1 - else - batch_size=64 - fi - python3 -u ${BIN_DIR}/test.py \ - --nproc ${ngpu} \ - --config ${config_path} \ - --result_file ${ckpt_prefix}.${type}.rsl \ - --checkpoint_path ${ckpt_prefix} \ - --opts decoding.decoding_method ${type} \ - --opts decoding.batch_size ${batch_size} - - if [ $? -ne 0 ]; then - echo "Failed in evaluation!" - exit 1 - fi -done +if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then + for type in attention ctc_greedy_search; do + echo "decoding ${type}" + if [ ${chunk_mode} == true ];then + # stream decoding only support batchsize=1 + batch_size=1 + else + batch_size=64 + fi + python3 -u ${BIN_DIR}/test.py \ + --nproc ${ngpu} \ + --config ${config_path} \ + --result_file ${ckpt_prefix}.${type}.rsl \ + --checkpoint_path ${ckpt_prefix} \ + --opts decoding.decoding_method ${type} \ + --opts decoding.batch_size ${batch_size} + + if [ $? -ne 0 ]; then + echo "Failed in evaluation!" + exit 1 + fi + done +fi -for type in ctc_prefix_beam_search attention_rescoring; do - echo "decoding ${type}" - batch_size=1 - python3 -u ${BIN_DIR}/test.py \ - --nproc ${ngpu} \ - --config ${config_path} \ - --result_file ${ckpt_prefix}.${type}.rsl \ - --checkpoint_path ${ckpt_prefix} \ - --opts decoding.decoding_method ${type} \ - --opts decoding.batch_size ${batch_size} - if [ $? -ne 0 ]; then - echo "Failed in evaluation!" - exit 1 - fi -done +if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then + for type in ctc_prefix_beam_search; do + echo "decoding ${type}" + batch_size=1 + python3 -u ${BIN_DIR}/test.py \ + --nproc ${ngpu} \ + --config ${config_path} \ + --result_file ${ckpt_prefix}.${type}.rsl \ + --checkpoint_path ${ckpt_prefix} \ + --opts decoding.decoding_method ${type} \ + --opts decoding.batch_size ${batch_size} + + if [ $? -ne 0 ]; then + echo "Failed in evaluation!" + exit 1 + fi + done +fi +if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then + for type in attention_rescoring; do + echo "decoding ${type}" + batch_size=1 + python3 -u ${BIN_DIR}/test.py \ + --nproc ${ngpu} \ + --config ${config_path} \ + --result_file ${ckpt_prefix}.${type}.rsl \ + --checkpoint_path ${ckpt_prefix} \ + --opts decoding.decoding_method ${type} \ + --opts decoding.batch_size ${batch_size} + + if [ $? -ne 0 ]; then + echo "Failed in evaluation!" + exit 1 + fi + done +fi exit 0 diff --git a/examples/timit/s1/local/timit_data_prep.sh b/examples/timit/s1/local/timit_data_prep.sh old mode 100644 new mode 100755 diff --git a/examples/timit/s1/local/timit_norm_trans.pl b/examples/timit/s1/local/timit_norm_trans.pl old mode 100644 new mode 100755 diff --git a/examples/timit/s1/run.sh b/examples/timit/s1/run.sh index 75a2e0c5..207a9b84 100755 --- a/examples/timit/s1/run.sh +++ b/examples/timit/s1/run.sh @@ -1,13 +1,15 @@ #!/bin/bash set -e -source path.sh + +. path.sh || exit 1; stage=0 stop_stage=50 conf_path=conf/transformer.yaml avg_num=10 -TIMIT_path= #path of TIMIT (Required, e.g. /export/corpora5/LDC/LDC93S1/timit/TIMIT) -source ${MAIN_ROOT}/utils/parse_options.sh || exit 1; +TIMIT_path=/workspace/zhanghui/dataset/data/lisa/data/timit/raw/TIMIT + +. ${MAIN_ROOT}/utils/parse_options.sh || exit 1; avg_ckpt=avg_${avg_num} ckpt=$(basename ${conf_path} | awk -F'.' '{print $1}') diff --git a/requirements.txt b/requirements.txt index 925e0a31..9ecf6bbd 100644 --- a/requirements.txt +++ b/requirements.txt @@ -19,3 +19,4 @@ tqdm typeguard visualdl==2.2.0 yacs +editdistance \ No newline at end of file From d05baeb6b0756c20e1bb7729bfec19af0a61cbb5 Mon Sep 17 00:00:00 2001 From: Hui Zhang Date: Thu, 30 Sep 2021 02:25:13 +0000 Subject: [PATCH 4/4] update ted zh en --- deepspeech/exps/u2_st/config.py | 2 +- examples/ted_en_zh/t0/.gitignore | 2 +- examples/ted_en_zh/t0/local/data.sh | 2 +- examples/ted_en_zh/t0/run.sh | 2 +- examples/timit/s1/run.sh | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/deepspeech/exps/u2_st/config.py b/deepspeech/exps/u2_st/config.py index b1b7b357..818b0e25 100644 --- a/deepspeech/exps/u2_st/config.py +++ b/deepspeech/exps/u2_st/config.py @@ -15,7 +15,7 @@ from yacs.config import CfgNode from deepspeech.exps.u2_st.model import U2STTester from deepspeech.exps.u2_st.model import U2STTrainer -from deepspeech.io.collator_st import SpeechCollator +from deepspeech.io.collator import SpeechCollator from deepspeech.io.dataset import ManifestDataset from deepspeech.models.u2_st import U2STModel diff --git a/examples/ted_en_zh/t0/.gitignore b/examples/ted_en_zh/t0/.gitignore index 469c6171..123e5174 100644 --- a/examples/ted_en_zh/t0/.gitignore +++ b/examples/ted_en_zh/t0/.gitignore @@ -1,3 +1,3 @@ -TED-En-Zh +TED_EnZh data exp diff --git a/examples/ted_en_zh/t0/local/data.sh b/examples/ted_en_zh/t0/local/data.sh index 96aa745a..3aae24fd 100755 --- a/examples/ted_en_zh/t0/local/data.sh +++ b/examples/ted_en_zh/t0/local/data.sh @@ -9,7 +9,7 @@ stop_stage=100 nbpe=8000 bpemode=unigram bpeprefix="data/bpe_${bpemode}_${nbpe}" -data_dir=/mnt/dataset/TED_EnZh +data_dir=./TED_EnZh source ${MAIN_ROOT}/utils/parse_options.sh diff --git a/examples/ted_en_zh/t0/run.sh b/examples/ted_en_zh/t0/run.sh index 7508f0e8..e9f4a058 100755 --- a/examples/ted_en_zh/t0/run.sh +++ b/examples/ted_en_zh/t0/run.sh @@ -6,7 +6,7 @@ stage=0 stop_stage=100 conf_path=conf/transformer_joint_noam.yaml avg_num=5 -data_path=./TED-En-Zh # path to unzipped data +data_path=./TED_EnZh # path to unzipped data source ${MAIN_ROOT}/utils/parse_options.sh || exit 1; avg_ckpt=avg_${avg_num} diff --git a/examples/timit/s1/run.sh b/examples/timit/s1/run.sh index 207a9b84..4c0a5cdc 100755 --- a/examples/timit/s1/run.sh +++ b/examples/timit/s1/run.sh @@ -7,7 +7,7 @@ stage=0 stop_stage=50 conf_path=conf/transformer.yaml avg_num=10 -TIMIT_path=/workspace/zhanghui/dataset/data/lisa/data/timit/raw/TIMIT +TIMIT_path=/path/to/TIMIT . ${MAIN_ROOT}/utils/parse_options.sh || exit 1;