From 7689ff412b59711e8557a5beb7d08b3de1c0ed17 Mon Sep 17 00:00:00 2001 From: Hui Zhang Date: Sun, 24 Oct 2021 15:03:04 +0000 Subject: [PATCH] wer/cer/bleu Calculator, label smoothing func --- deepspeech/utils/asr_utils.py | 52 +++++++++++ deepspeech/utils/bleu_score.py | 72 ++++++++++++++-- deepspeech/utils/error_rate.py | 153 ++++++++++++++++++++++++++++++++- 3 files changed, 269 insertions(+), 8 deletions(-) create mode 100644 deepspeech/utils/asr_utils.py diff --git a/deepspeech/utils/asr_utils.py b/deepspeech/utils/asr_utils.py new file mode 100644 index 000000000..06cf64876 --- /dev/null +++ b/deepspeech/utils/asr_utils.py @@ -0,0 +1,52 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import json +import numpy as np + +__all__ = ["label_smoothing_dist"] + + +# TODO(takaaki-hori): add different smoothing methods +def label_smoothing_dist(odim, lsm_type, transcript=None, blank=0): + """Obtain label distribution for loss smoothing. + + :param odim: + :param lsm_type: + :param blank: + :param transcript: + :return: + """ + if transcript is not None: + with open(transcript, "rb") as f: + trans_json = json.load(f)["utts"] + + if lsm_type == "unigram": + assert transcript is not None, ( + "transcript is required for %s label smoothing" % lsm_type + ) + labelcount = np.zeros(odim) + for k, v in trans_json.items(): + ids = np.array([int(n) for n in v["output"][0]["tokenid"].split()]) + # to avoid an error when there is no text in an uttrance + if len(ids) > 0: + labelcount[ids] += 1 + labelcount[odim - 1] = len(transcript) # count + labelcount[labelcount == 0] = 1 # flooring + labelcount[blank] = 0 # remove counts for blank + labeldist = labelcount.astype(np.float32) / np.sum(labelcount) + else: + logging.error("Error: unexpected label smoothing type: %s" % lsm_type) + sys.exit() + + return labeldist diff --git a/deepspeech/utils/bleu_score.py b/deepspeech/utils/bleu_score.py index 09646133a..93749dddc 100644 --- a/deepspeech/utils/bleu_score.py +++ b/deepspeech/utils/bleu_score.py @@ -15,16 +15,16 @@ e.g. wer for word-level, cer for char-level. """ import sacrebleu +import nltk +import numpy as np -__all__ = ['bleu', 'char_bleu'] +__all__ = ['bleu', 'char_bleu', "ErrorCalculator"] def bleu(hypothesis, reference): """Calculate BLEU. BLEU compares reference text and hypothesis text in word-level using scarebleu. - - :param reference: The reference sentences. :type reference: list[list[str]] :param hypothesis: The hypothesis sentence. @@ -39,8 +39,6 @@ def char_bleu(hypothesis, reference): """Calculate BLEU. BLEU compares reference text and hypothesis text in char-level using scarebleu. - - :param reference: The reference sentences. :type reference: list[list[str]] :param hypothesis: The hypothesis sentence. @@ -52,3 +50,67 @@ def char_bleu(hypothesis, reference): for ref in reference] return sacrebleu.corpus_bleu(hypothesis, reference) + + +class ErrorCalculator(): + """Calculate BLEU for ST and MT models during training. + + :param y_hats: numpy array with predicted text + :param y_pads: numpy array with true (target) text + :param char_list: vocabulary list + :param sym_space: space symbol + :param sym_pad: pad symbol + :param report_bleu: report BLUE score if True + """ + + def __init__(self, char_list, sym_space, sym_pad, report_bleu=False): + """Construct an ErrorCalculator object.""" + super().__init__() + self.char_list = char_list + self.space = sym_space + self.pad = sym_pad + self.report_bleu = report_bleu + if self.space in self.char_list: + self.idx_space = self.char_list.index(self.space) + else: + self.idx_space = None + + def __call__(self, ys_hat, ys_pad): + """Calculate corpus-level BLEU score. + + :param torch.Tensor ys_hat: prediction (batch, seqlen) + :param torch.Tensor ys_pad: reference (batch, seqlen) + :return: corpus-level BLEU score in a mini-batch + :rtype float + """ + bleu = None + if not self.report_bleu: + return bleu + + bleu = self.calculate_corpus_bleu(ys_hat, ys_pad) + return bleu + + def calculate_corpus_bleu(self, ys_hat, ys_pad): + """Calculate corpus-level BLEU score in a mini-batch. + + :param torch.Tensor seqs_hat: prediction (batch, seqlen) + :param torch.Tensor seqs_true: reference (batch, seqlen) + :return: corpus-level BLEU score + :rtype float + """ + seqs_hat, seqs_true = [], [] + for i, y_hat in enumerate(ys_hat): + y_true = ys_pad[i] + eos_true = np.where(y_true == -1)[0] + ymax = eos_true[0] if len(eos_true) > 0 else len(y_true) + # NOTE: padding index (-1) in y_true is used to pad y_hat + # because y_hats is not padded with -1 + seq_hat = [self.char_list[int(idx)] for idx in y_hat[:ymax]] + seq_true = [self.char_list[int(idx)] for idx in y_true if int(idx) != -1] + seq_hat_text = "".join(seq_hat).replace(self.space, " ") + seq_hat_text = seq_hat_text.replace(self.pad, "") + seq_true_text = "".join(seq_true).replace(self.space, " ") + seqs_hat.append(seq_hat_text) + seqs_true.append(seq_true_text) + bleu = nltk.bleu_score.corpus_bleu([[ref] for ref in seqs_true], seqs_hat) + return bleu * 100 diff --git a/deepspeech/utils/error_rate.py b/deepspeech/utils/error_rate.py index 81f458b6e..0ad62b6b6 100644 --- a/deepspeech/utils/error_rate.py +++ b/deepspeech/utils/error_rate.py @@ -16,10 +16,11 @@ e.g. wer for word-level, cer for char-level. """ import editdistance import numpy as np +import logging +import sys +from itertools import groupby -__all__ = ['word_errors', 'char_errors', 'wer', 'cer'] - -editdistance.eval("a", "b") +__all__ = ['word_errors', 'char_errors', 'wer', 'cer', "ErrorCalculator"] def _levenshtein_distance(ref, hyp): @@ -211,3 +212,149 @@ def cer(reference, hypothesis, ignore_case=False, remove_space=False): cer = float(edit_distance) / ref_len return cer + + +class ErrorCalculator(): + """Calculate CER and WER for E2E_ASR and CTC models during training. + + :param y_hats: numpy array with predicted text + :param y_pads: numpy array with true (target) text + :param char_list: List[str] + :param sym_space: + :param sym_blank: + :return: + """ + + def __init__( + self, char_list, sym_space, sym_blank, report_cer=False, report_wer=False + ): + """Construct an ErrorCalculator object.""" + super().__init__() + + self.report_cer = report_cer + self.report_wer = report_wer + + self.char_list = char_list + self.space = sym_space + self.blank = sym_blank + self.idx_blank = self.char_list.index(self.blank) + if self.space in self.char_list: + self.idx_space = self.char_list.index(self.space) + else: + self.idx_space = None + + def __call__(self, ys_hat, ys_pad, is_ctc=False): + """Calculate sentence-level WER/CER score. + + :param paddle.Tensor ys_hat: prediction (batch, seqlen) + :param paddle.Tensor ys_pad: reference (batch, seqlen) + :param bool is_ctc: calculate CER score for CTC + :return: sentence-level WER score + :rtype float + :return: sentence-level CER score + :rtype float + """ + cer, wer = None, None + if is_ctc: + return self.calculate_cer_ctc(ys_hat, ys_pad) + elif not self.report_cer and not self.report_wer: + return cer, wer + + seqs_hat, seqs_true = self.convert_to_char(ys_hat, ys_pad) + if self.report_cer: + cer = self.calculate_cer(seqs_hat, seqs_true) + + if self.report_wer: + wer = self.calculate_wer(seqs_hat, seqs_true) + return cer, wer + + def calculate_cer_ctc(self, ys_hat, ys_pad): + """Calculate sentence-level CER score for CTC. + + :param paddle.Tensor ys_hat: prediction (batch, seqlen) + :param paddle.Tensor ys_pad: reference (batch, seqlen) + :return: average sentence-level CER score + :rtype float + """ + cers, char_ref_lens = [], [] + for i, y in enumerate(ys_hat): + y_hat = [x[0] for x in groupby(y)] + y_true = ys_pad[i] + seq_hat, seq_true = [], [] + for idx in y_hat: + idx = int(idx) + if idx != -1 and idx != self.idx_blank and idx != self.idx_space: + seq_hat.append(self.char_list[int(idx)]) + + for idx in y_true: + idx = int(idx) + if idx != -1 and idx != self.idx_blank and idx != self.idx_space: + seq_true.append(self.char_list[int(idx)]) + + hyp_chars = "".join(seq_hat) + ref_chars = "".join(seq_true) + if len(ref_chars) > 0: + cers.append(editdistance.eval(hyp_chars, ref_chars)) + char_ref_lens.append(len(ref_chars)) + + cer_ctc = float(sum(cers)) / sum(char_ref_lens) if cers else None + return cer_ctc + + def convert_to_char(self, ys_hat, ys_pad): + """Convert index to character. + + :param paddle.Tensor seqs_hat: prediction (batch, seqlen) + :param paddle.Tensor seqs_true: reference (batch, seqlen) + :return: token list of prediction + :rtype list + :return: token list of reference + :rtype list + """ + seqs_hat, seqs_true = [], [] + for i, y_hat in enumerate(ys_hat): + y_true = ys_pad[i] + eos_true = np.where(y_true == -1)[0] + ymax = eos_true[0] if len(eos_true) > 0 else len(y_true) + # NOTE: padding index (-1) in y_true is used to pad y_hat + seq_hat = [self.char_list[int(idx)] for idx in y_hat[:ymax]] + seq_true = [self.char_list[int(idx)] for idx in y_true if int(idx) != -1] + seq_hat_text = "".join(seq_hat).replace(self.space, " ") + seq_hat_text = seq_hat_text.replace(self.blank, "") + seq_true_text = "".join(seq_true).replace(self.space, " ") + seqs_hat.append(seq_hat_text) + seqs_true.append(seq_true_text) + return seqs_hat, seqs_true + + def calculate_cer(self, seqs_hat, seqs_true): + """Calculate sentence-level CER score. + + :param list seqs_hat: prediction + :param list seqs_true: reference + :return: average sentence-level CER score + :rtype float + """ + char_eds, char_ref_lens = [], [] + for i, seq_hat_text in enumerate(seqs_hat): + seq_true_text = seqs_true[i] + hyp_chars = seq_hat_text.replace(" ", "") + ref_chars = seq_true_text.replace(" ", "") + char_eds.append(editdistance.eval(hyp_chars, ref_chars)) + char_ref_lens.append(len(ref_chars)) + return float(sum(char_eds)) / sum(char_ref_lens) + + def calculate_wer(self, seqs_hat, seqs_true): + """Calculate sentence-level WER score. + + :param list seqs_hat: prediction + :param list seqs_true: reference + :return: average sentence-level WER score + :rtype float + """ + word_eds, word_ref_lens = [], [] + for i, seq_hat_text in enumerate(seqs_hat): + seq_true_text = seqs_true[i] + hyp_words = seq_hat_text.split() + ref_words = seq_true_text.split() + word_eds.append(editdistance.eval(hyp_words, ref_words)) + word_ref_lens.append(len(ref_words)) + return float(sum(word_eds)) / sum(word_ref_lens)