diff --git a/deepspeech/utils/error_rate.py b/deepspeech/utils/error_rate.py index 6fd593eb..81f458b6 100644 --- a/deepspeech/utils/error_rate.py +++ b/deepspeech/utils/error_rate.py @@ -19,6 +19,8 @@ import numpy as np __all__ = ['word_errors', 'char_errors', 'wer', 'cer'] +editdistance.eval("a", "b") + def _levenshtein_distance(ref, hyp): """Levenshtein distance is a string metric for measuring the difference @@ -90,6 +92,7 @@ def word_errors(reference, hypothesis, ignore_case=False, delimiter=' '): hyp_words = list(filter(None, hypothesis.split(delimiter))) edit_distance = _levenshtein_distance(ref_words, hyp_words) + # `editdistance.eavl precision` less than `_levenshtein_distance` # edit_distance = editdistance.eval(ref_words, hyp_words) return float(edit_distance), len(ref_words) @@ -121,6 +124,7 @@ def char_errors(reference, hypothesis, ignore_case=False, remove_space=False): hypothesis = join_char.join(list(filter(None, hypothesis.split(' ')))) edit_distance = _levenshtein_distance(reference, hypothesis) + # `editdistance.eavl precision` less than `_levenshtein_distance` # edit_distance = editdistance.eval(reference, hypothesis) return float(edit_distance), len(reference) diff --git a/requirements.txt b/requirements.txt index 9ecf6bbd..332b5238 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,5 @@ coverage +editdistance gpustat jsonlines kaldiio @@ -19,4 +20,3 @@ tqdm typeguard visualdl==2.2.0 yacs -editdistance \ No newline at end of file