diff --git a/deepspeech/io/reader.py b/deepspeech/io/reader.py index e7c43a78..5873788b 100644 --- a/deepspeech/io/reader.py +++ b/deepspeech/io/reader.py @@ -322,7 +322,7 @@ class LoadInputsAndTargets(): "Not supported: loader_type={}".format(filetype)) def file_type(self, filepath): - suffix = filepath.split(":")[0].split('.')[-1] + suffix = filepath.split(":")[0].split('.')[-1].lower() if suffix == 'ark': return 'mat' elif suffix == 'scp': diff --git a/deepspeech/utils/error_rate.py b/deepspeech/utils/error_rate.py index b6399bab..6fd593eb 100644 --- a/deepspeech/utils/error_rate.py +++ b/deepspeech/utils/error_rate.py @@ -14,6 +14,7 @@ """This module provides functions to calculate error rate in different level. e.g. wer for word-level, cer for char-level. """ +import editdistance import numpy as np __all__ = ['word_errors', 'char_errors', 'wer', 'cer'] @@ -89,6 +90,7 @@ def word_errors(reference, hypothesis, ignore_case=False, delimiter=' '): hyp_words = list(filter(None, hypothesis.split(delimiter))) edit_distance = _levenshtein_distance(ref_words, hyp_words) + # edit_distance = editdistance.eval(ref_words, hyp_words) return float(edit_distance), len(ref_words) @@ -119,6 +121,7 @@ def char_errors(reference, hypothesis, ignore_case=False, remove_space=False): hypothesis = join_char.join(list(filter(None, hypothesis.split(' ')))) edit_distance = _levenshtein_distance(reference, hypothesis) + # edit_distance = editdistance.eval(reference, hypothesis) return float(edit_distance), len(reference) diff --git a/deepspeech/utils/tensor_utils.py b/deepspeech/utils/tensor_utils.py index 61798816..0050794c 100644 --- a/deepspeech/utils/tensor_utils.py +++ b/deepspeech/utils/tensor_utils.py @@ -93,20 +93,25 @@ def pad_sequence(sequences: List[paddle.Tensor], for i, tensor in enumerate(sequences): length = tensor.shape[0] # use index notation to prevent duplicate references to the tensor + logger.info( + f"length {length}, out_tensor {out_tensor.shape}, tensor {tensor.shape}" + ) if batch_first: # TODO (Hui Zhang): set_value op not supprot `end==start` + # TODO (Hui Zhang): set_value op not support int16 + # TODO (Hui Zhang): set_varbase 2 rank not support [0,0,...] # out_tensor[i, :length, ...] = tensor if length != 0: - out_tensor[i, :length, ...] = tensor + out_tensor[i, :length] = tensor else: - out_tensor[i, length, ...] = tensor + out_tensor[i, length] = tensor else: # TODO (Hui Zhang): set_value op not supprot `end==start` # out_tensor[:length, i, ...] = tensor if length != 0: - out_tensor[:length, i, ...] = tensor + out_tensor[:length, i] = tensor else: - out_tensor[length, i, ...] = tensor + out_tensor[length, i] = tensor return out_tensor diff --git a/examples/ted_en_zh/t0/README.md b/examples/ted_en_zh/t0/README.md index 9bca2643..66a5dbec 100644 --- a/examples/ted_en_zh/t0/README.md +++ b/examples/ted_en_zh/t0/README.md @@ -12,4 +12,4 @@ ## Transformer | Model | Params | Config | Char-BLEU | | --- | --- | --- | --- | -| Transformer+ASR MTL | 50.26M | conf/transformer_joint_noam.yaml | 17.38 | \ No newline at end of file +| Transformer+ASR MTL | 50.26M | conf/transformer_joint_noam.yaml | 17.38 | diff --git a/examples/ted_en_zh/t0/local/data.sh b/examples/ted_en_zh/t0/local/data.sh index 43911c34..96aa745a 100755 --- a/examples/ted_en_zh/t0/local/data.sh +++ b/examples/ted_en_zh/t0/local/data.sh @@ -1,5 +1,7 @@ #!/bin/bash +set -e + stage=-1 stop_stage=100 diff --git a/examples/timit/s1/.gitignore b/examples/timit/s1/.gitignore new file mode 100644 index 00000000..7a9843bc --- /dev/null +++ b/examples/timit/s1/.gitignore @@ -0,0 +1,3 @@ +data +exp +test.profile diff --git a/examples/timit/s1/README.md b/examples/timit/s1/README.md index 6d719a7d..d516040d 100644 --- a/examples/timit/s1/README.md +++ b/examples/timit/s1/README.md @@ -1,11 +1,9 @@ # TIMIT - - - ### Transformer -| Model | Params | Config | Decode method | PER | +| Model | Params | Config | Decode method | Loss | PER | | --- | --- | --- | --- | --- | -| transformer | 5.17M | conf/transformer.yaml | attention | 0.5531 | -| transformer | 5.17M | conf/transformer.yaml | ctc_greedy_search | 0.3922 | -| transformer | 5.17M | conf/transformer.yaml | ctc_prefix_beam_search | 0.3768 | \ No newline at end of file +| transformer | 5.17M | conf/transformer.yaml | attention | 49.25688171386719 | 0.510742 | +| transformer | 5.17M | conf/transformer.yaml | ctc_greedy_search | 49.25688171386719 | 0.382398 | +| transformer | 5.17M | conf/transformer.yaml | ctc_prefix_beam_search | 49.25688171386719 | 0.367429 | +| transformer | 5.17M | conf/transformer.yaml | attention_rescore | 49.25688171386719 | 0.357173 | diff --git a/examples/timit/s1/local/test.sh b/examples/timit/s1/local/test.sh index 05813179..575bff57 100755 --- a/examples/timit/s1/local/test.sh +++ b/examples/timit/s1/local/test.sh @@ -1,10 +1,18 @@ #!/bin/bash +set -e + +stage=0 +stop_stage=50 + +. ${MAIN_ROOT}/utils/parse_options.sh || exit 1; + if [ $# != 2 ];then echo "usage: ${0} config_path ckpt_path_prefix" exit -1 fi + ngpu=$(echo $CUDA_VISIBLE_DEVICES | awk -F "," '{print NF}') echo "using $ngpu gpus..." @@ -23,44 +31,67 @@ fi # exit 1 #fi -for type in attention ctc_greedy_search; do - echo "decoding ${type}" - if [ ${chunk_mode} == true ];then - # stream decoding only support batchsize=1 - batch_size=1 - else - batch_size=64 - fi - python3 -u ${BIN_DIR}/test.py \ - --nproc ${ngpu} \ - --config ${config_path} \ - --result_file ${ckpt_prefix}.${type}.rsl \ - --checkpoint_path ${ckpt_prefix} \ - --opts decoding.decoding_method ${type} \ - --opts decoding.batch_size ${batch_size} - - if [ $? -ne 0 ]; then - echo "Failed in evaluation!" - exit 1 - fi -done +if [ ${stage} -le 0 ] && [ ${stop_stage} -ge 0 ]; then + for type in attention ctc_greedy_search; do + echo "decoding ${type}" + if [ ${chunk_mode} == true ];then + # stream decoding only support batchsize=1 + batch_size=1 + else + batch_size=64 + fi + python3 -u ${BIN_DIR}/test.py \ + --nproc ${ngpu} \ + --config ${config_path} \ + --result_file ${ckpt_prefix}.${type}.rsl \ + --checkpoint_path ${ckpt_prefix} \ + --opts decoding.decoding_method ${type} \ + --opts decoding.batch_size ${batch_size} + + if [ $? -ne 0 ]; then + echo "Failed in evaluation!" + exit 1 + fi + done +fi -for type in ctc_prefix_beam_search attention_rescoring; do - echo "decoding ${type}" - batch_size=1 - python3 -u ${BIN_DIR}/test.py \ - --nproc ${ngpu} \ - --config ${config_path} \ - --result_file ${ckpt_prefix}.${type}.rsl \ - --checkpoint_path ${ckpt_prefix} \ - --opts decoding.decoding_method ${type} \ - --opts decoding.batch_size ${batch_size} - if [ $? -ne 0 ]; then - echo "Failed in evaluation!" - exit 1 - fi -done +if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then + for type in ctc_prefix_beam_search; do + echo "decoding ${type}" + batch_size=1 + python3 -u ${BIN_DIR}/test.py \ + --nproc ${ngpu} \ + --config ${config_path} \ + --result_file ${ckpt_prefix}.${type}.rsl \ + --checkpoint_path ${ckpt_prefix} \ + --opts decoding.decoding_method ${type} \ + --opts decoding.batch_size ${batch_size} + + if [ $? -ne 0 ]; then + echo "Failed in evaluation!" + exit 1 + fi + done +fi +if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then + for type in attention_rescoring; do + echo "decoding ${type}" + batch_size=1 + python3 -u ${BIN_DIR}/test.py \ + --nproc ${ngpu} \ + --config ${config_path} \ + --result_file ${ckpt_prefix}.${type}.rsl \ + --checkpoint_path ${ckpt_prefix} \ + --opts decoding.decoding_method ${type} \ + --opts decoding.batch_size ${batch_size} + + if [ $? -ne 0 ]; then + echo "Failed in evaluation!" + exit 1 + fi + done +fi exit 0 diff --git a/examples/timit/s1/local/timit_data_prep.sh b/examples/timit/s1/local/timit_data_prep.sh old mode 100644 new mode 100755 diff --git a/examples/timit/s1/local/timit_norm_trans.pl b/examples/timit/s1/local/timit_norm_trans.pl old mode 100644 new mode 100755 diff --git a/examples/timit/s1/run.sh b/examples/timit/s1/run.sh index 75a2e0c5..207a9b84 100755 --- a/examples/timit/s1/run.sh +++ b/examples/timit/s1/run.sh @@ -1,13 +1,15 @@ #!/bin/bash set -e -source path.sh + +. path.sh || exit 1; stage=0 stop_stage=50 conf_path=conf/transformer.yaml avg_num=10 -TIMIT_path= #path of TIMIT (Required, e.g. /export/corpora5/LDC/LDC93S1/timit/TIMIT) -source ${MAIN_ROOT}/utils/parse_options.sh || exit 1; +TIMIT_path=/workspace/zhanghui/dataset/data/lisa/data/timit/raw/TIMIT + +. ${MAIN_ROOT}/utils/parse_options.sh || exit 1; avg_ckpt=avg_${avg_num} ckpt=$(basename ${conf_path} | awk -F'.' '{print $1}') diff --git a/requirements.txt b/requirements.txt index 925e0a31..9ecf6bbd 100644 --- a/requirements.txt +++ b/requirements.txt @@ -19,3 +19,4 @@ tqdm typeguard visualdl==2.2.0 yacs +editdistance \ No newline at end of file