diff --git a/deepspeech/decoders/README.md b/deepspeech/decoders/README.md index 11eb5606..ee7d53de 100644 --- a/deepspeech/decoders/README.md +++ b/deepspeech/decoders/README.md @@ -10,4 +10,4 @@ * [Vectorized Beam Search for CTC-Attention-based Speech Recognition](https://www.isca-speech.org/archive/pdfs/interspeech_2019/seki19b_interspeech.pdf) ### Streaming Join CTC/ATT Beam Search -* [STREAMING TRANSFORMER ASR WITH BLOCKWISE SYNCHRONOUS BEAM SEARCH](https://arxiv.org/abs/2006.14941) \ No newline at end of file +* [STREAMING TRANSFORMER ASR WITH BLOCKWISE SYNCHRONOUS BEAM SEARCH](https://arxiv.org/abs/2006.14941) diff --git a/deepspeech/decoders/__init__.py b/deepspeech/decoders/__init__.py index 1ea05143..f04a6d19 100644 --- a/deepspeech/decoders/__init__.py +++ b/deepspeech/decoders/__init__.py @@ -1 +1,14 @@ -from .ctcdecoder import swig_wrapper \ No newline at end of file +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from .ctcdecoder import swig_wrapper diff --git a/deepspeech/decoders/scores/__init__.py b/deepspeech/decoders/scores/__init__.py index e69de29b..185a92b8 100644 --- a/deepspeech/decoders/scores/__init__.py +++ b/deepspeech/decoders/scores/__init__.py @@ -0,0 +1,13 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/deepspeech/decoders/scores/ctc.py b/deepspeech/decoders/scores/ctc.py index aaa3dc86..4871d6e1 100644 --- a/deepspeech/decoders/scores/ctc.py +++ b/deepspeech/decoders/scores/ctc.py @@ -1,5 +1,17 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. """ScorerInterface implementation for CTC.""" - import numpy as np import paddle @@ -81,8 +93,7 @@ class CTCPrefixScorer(BatchPartialScorerInterface): prev_score, state = state presub_score, new_st = self.impl(y.cpu(), ids.cpu(), state) tscore = paddle.to_tensor( - presub_score - prev_score, place=x.place, dtype=x.dtype - ) + presub_score - prev_score, place=x.place, dtype=x.dtype) return tscore, (presub_score, new_st) def batch_init_state(self, x: paddle.Tensor): @@ -115,15 +126,9 @@ class CTCPrefixScorer(BatchPartialScorerInterface): """ batch_state = ( - ( - paddle.stack([s[0] for s in state], axis=2), - paddle.stack([s[1] for s in state]), - state[0][2], - state[0][3], - ) - if state[0] is not None - else None - ) + (paddle.stack([s[0] for s in state], axis=2), + paddle.stack([s[1] for s in state]), state[0][2], state[0][3], ) + if state[0] is not None else None) return self.impl(y, batch_state, ids) def extend_prob(self, x: paddle.Tensor): diff --git a/deepspeech/decoders/scores/ctc_prefix_score.py b/deepspeech/decoders/scores/ctc_prefix_score.py index 754e43ae..c85d546d 100644 --- a/deepspeech/decoders/scores/ctc_prefix_score.py +++ b/deepspeech/decoders/scores/ctc_prefix_score.py @@ -1,11 +1,8 @@ #!/usr/bin/env python3 - # Copyright 2018 Mitsubishi Electric Research Labs (Takaaki Hori) # Apache 2.0 (http://www.apache.org/licenses/LICENSE-2.0) - -import paddle - import numpy as np +import paddle import six @@ -49,9 +46,10 @@ class CTCPrefixScorePD(): x[i, l:, blank] = 0 # Reshape input x xn = x.transpose([1, 0, 2]) # (B, T, O) -> (T, B, O) - xb = xn[:, :, self.blank].unsqueeze(2).expand(-1, -1, self.odim) # (T,B,O) + xb = xn[:, :, self.blank].unsqueeze(2).expand(-1, -1, + self.odim) # (T,B,O) self.x = paddle.stack([xn, xb]) # (2, T, B, O) - self.end_frames = paddle.to_tensor(xlens) - 1 # (B,) + self.end_frames = paddle.to_tensor(xlens) - 1 # (B,) # Setup CTC windowing self.margin = margin @@ -59,7 +57,7 @@ class CTCPrefixScorePD(): self.frame_ids = paddle.arange(self.input_length, dtype=self.dtype) # Base indices for index conversion # B idx, hyp idx. shape (B*W, 1) - self.idx_bh = None + self.idx_bh = None # B idx. shape (B,) self.idx_b = paddle.arange(self.batch) # B idx, O idx. shape (B, 1) @@ -78,56 +76,59 @@ class CTCPrefixScorePD(): last_ids = [yi[-1] for yi in y] # last output label ids n_bh = len(last_ids) # batch * hyps n_hyps = n_bh // self.batch # assuming each utterance has the same # of hyps - self.scoring_num = scoring_ids.size(-1) if scoring_ids is not None else 0 + self.scoring_num = scoring_ids.size( + -1) if scoring_ids is not None else 0 # prepare state info if state is None: r_prev = paddle.full( (self.input_length, 2, self.batch, n_hyps), self.logzero, - dtype=self.dtype, - ) # (T, 2, B, W) - r_prev[:, 1] = paddle.cumsum(self.x[0, :, :, self.blank], 0).unsqueeze(2) - r_prev = r_prev.view(-1, 2, n_bh) # (T, 2, BW) + dtype=self.dtype, ) # (T, 2, B, W) + r_prev[:, 1] = paddle.cumsum(self.x[0, :, :, self.blank], + 0).unsqueeze(2) + r_prev = r_prev.view(-1, 2, n_bh) # (T, 2, BW) s_prev = 0.0 # score - f_min_prev = 0 # eq. 22-23 - f_max_prev = 1 # eq. 22-23 + f_min_prev = 0 # eq. 22-23 + f_max_prev = 1 # eq. 22-23 else: r_prev, s_prev, f_min_prev, f_max_prev = state # select input dimensions for scoring if self.scoring_num > 0: # (BW, O) - scoring_idmap = paddle.full((n_bh, self.odim), -1, dtype=paddle.long) + scoring_idmap = paddle.full( + (n_bh, self.odim), -1, dtype=paddle.long) snum = self.scoring_num if self.idx_bh is None or n_bh > len(self.idx_bh): - self.idx_bh = paddle.arange(n_bh).view(-1, 1) # (BW, 1) + self.idx_bh = paddle.arange(n_bh).view(-1, 1) # (BW, 1) scoring_idmap[self.idx_bh[:n_bh], scoring_ids] = paddle.arange(snum) scoring_idx = ( - scoring_ids + self.idx_bo.repeat(1, n_hyps).view(-1, 1) # (BW,1) - ).view(-1) # (BWO) + scoring_ids + self.idx_bo.repeat(1, n_hyps).view(-1, + 1) # (BW,1) + ).view(-1) # (BWO) # x_ shape (2, T, B*W, O) x_ = paddle.index_select( - self.x.view(2, -1, self.batch * self.odim), scoring_idx, 2 - ).view(2, -1, n_bh, snum) + self.x.view(2, -1, self.batch * self.odim), scoring_idx, + 2).view(2, -1, n_bh, snum) else: scoring_ids = None scoring_idmap = None snum = self.odim # x_ shape (2, T, B*W, O) - x_ = self.x.unsqueeze(3).repeat(1, 1, 1, n_hyps, 1).view(2, -1, n_bh, snum) + x_ = self.x.unsqueeze(3).repeat(1, 1, 1, n_hyps, 1).view(2, -1, + n_bh, snum) # new CTC forward probs are prepared as a (T x 2 x BW x S) tensor # that corresponds to r_t^n(h) and r_t^b(h) in a batch. r = paddle.full( (self.input_length, 2, n_bh, snum), self.logzero, - dtype=self.dtype, - ) + dtype=self.dtype, ) if output_length == 0: r[0, 0] = x_[0, 0] - r_sum = paddle.logsumexp(r_prev, 1) #(T,BW) - log_phi = r_sum.unsqueeze(2).repeat(1, 1, snum) # (T, BW, O) + r_sum = paddle.logsumexp(r_prev, 1) #(T,BW) + log_phi = r_sum.unsqueeze(2).repeat(1, 1, snum) # (T, BW, O) if scoring_ids is not None: for idx in range(n_bh): pos = scoring_idmap[idx, last_ids[idx]] @@ -152,27 +153,30 @@ class CTCPrefixScorePD(): # compute forward probabilities log(r_t^n(h)) and log(r_t^b(h)) for t in range(start, end): - rp = r[t - 1] # (2 x BW x O') + rp = r[t - 1] # (2 x BW x O') rr = paddle.stack([rp[0], log_phi[t - 1], rp[0], rp[1]]).view( - 2, 2, n_bh, snum - ) # (2,2,BW,O') + 2, 2, n_bh, snum) # (2,2,BW,O') r[t] = paddle.logsumexp(rr, 1) + x_[:, t] # compute log prefix probabilities log(psi) - log_phi_x = paddle.concat((log_phi[0].unsqueeze(0), log_phi[:-1]), axis=0) + x_[0] + log_phi_x = paddle.concat( + (log_phi[0].unsqueeze(0), log_phi[:-1]), axis=0) + x_[0] if scoring_ids is not None: - log_psi = paddle.full((n_bh, self.odim), self.logzero, dtype=self.dtype) + log_psi = paddle.full( + (n_bh, self.odim), self.logzero, dtype=self.dtype) log_psi_ = paddle.logsumexp( - paddle.concat((log_phi_x[start:end], r[start - 1, 0].unsqueeze(0)), axis=0), - axis=0, - ) + paddle.concat( + (log_phi_x[start:end], r[start - 1, 0].unsqueeze(0)), + axis=0), + axis=0, ) for si in range(n_bh): log_psi[si, scoring_ids[si]] = log_psi_[si] else: log_psi = paddle.logsumexp( - paddle.concat((log_phi_x[start:end], r[start - 1, 0].unsqueeze(0)), axis=0), - axis=0, - ) + paddle.concat( + (log_phi_x[start:end], r[start - 1, 0].unsqueeze(0)), + axis=0), + axis=0, ) for si in range(n_bh): log_psi[si, self.eos] = r_sum[self.end_frames[si // n_hyps], si] @@ -193,16 +197,16 @@ class CTCPrefixScorePD(): # convert ids to BHO space n_bh = len(s) n_hyps = n_bh // self.batch - vidx = (best_ids + (self.idx_b * (n_hyps * self.odim)).view(-1, 1)).view(-1) + vidx = (best_ids + (self.idx_b * + (n_hyps * self.odim)).view(-1, 1)).view(-1) # select hypothesis scores s_new = paddle.index_select(s.view(-1), vidx, 0) s_new = s_new.view(-1, 1).repeat(1, self.odim).view(n_bh, self.odim) # convert ids to BHS space (S: scoring_num) if scoring_idmap is not None: snum = self.scoring_num - hyp_idx = (best_ids // self.odim + (self.idx_b * n_hyps).view(-1, 1)).view( - -1 - ) + hyp_idx = (best_ids // self.odim + + (self.idx_b * n_hyps).view(-1, 1)).view(-1) label_ids = paddle.fmod(best_ids, self.odim).view(-1) score_idx = scoring_idmap[hyp_idx, label_ids] score_idx[score_idx == -1] = 0 @@ -211,8 +215,7 @@ class CTCPrefixScorePD(): snum = self.odim # select forward probabilities r_new = paddle.index_select(r.view(-1, 2, n_bh * snum), vidx, 2).view( - -1, 2, n_bh - ) + -1, 2, n_bh) return r_new, s_new, f_min, f_max def extend_prob(self, x): @@ -233,7 +236,7 @@ class CTCPrefixScorePD(): xn = x.transpose([1, 0, 2]) # (B, T, O) -> (T, B, O) xb = xn[:, :, self.blank].unsqueeze(2).expand(-1, -1, self.odim) self.x = paddle.stack([xn, xb]) # (2, T, B, O) - self.x[:, : tmp_x.shape[1], :, :] = tmp_x + self.x[:, :tmp_x.shape[1], :, :] = tmp_x self.input_length = x.size(1) self.end_frames = paddle.to_tensor(xlens) - 1 @@ -254,12 +257,12 @@ class CTCPrefixScorePD(): r_prev_new = paddle.full( (self.input_length, 2), self.logzero, - dtype=self.dtype, - ) + dtype=self.dtype, ) start = max(r_prev.shape[0], 1) r_prev_new[0:start] = r_prev for t in range(start, self.input_length): - r_prev_new[t, 1] = r_prev_new[t - 1, 1] + self.x[0, t, :, self.blank] + r_prev_new[t, 1] = r_prev_new[t - 1, 1] + self.x[0, t, :, + self.blank] return (r_prev_new, s_prev, f_min_prev, f_max_prev) @@ -279,7 +282,7 @@ class CTCPrefixScore(): self.blank = blank self.eos = eos self.input_length = len(x) - self.x = x # (T, O) + self.x = x # (T, O) def initial_state(self): """Obtain an initial CTC state @@ -318,12 +321,12 @@ class CTCPrefixScore(): r[output_length - 1] = self.logzero # prepare forward probabilities for the last label - r_sum = self.xp.logaddexp( - r_prev[:, 0], r_prev[:, 1] - ) # log(r_t^n(g) + r_t^b(g)) + r_sum = self.xp.logaddexp(r_prev[:, 0], + r_prev[:, 1]) # log(r_t^n(g) + r_t^b(g)) last = y[-1] if output_length > 0 and last in cs: - log_phi = self.xp.ndarray((self.input_length, len(cs)), dtype=np.float32) + log_phi = self.xp.ndarray( + (self.input_length, len(cs)), dtype=np.float32) for i in six.moves.range(len(cs)): log_phi[:, i] = r_sum if cs[i] != last else r_prev[:, 1] else: @@ -335,9 +338,8 @@ class CTCPrefixScore(): log_psi = r[start - 1, 0] for t in six.moves.range(start, self.input_length): r[t, 0] = self.xp.logaddexp(r[t - 1, 0], log_phi[t - 1]) + xs[t] - r[t, 1] = ( - self.xp.logaddexp(r[t - 1, 0], r[t - 1, 1]) + self.x[t, self.blank] - ) + r[t, 1] = (self.xp.logaddexp(r[t - 1, 0], r[t - 1, 1]) + + self.x[t, self.blank]) log_psi = self.xp.logaddexp(log_psi, log_phi[t - 1] + xs[t]) # get P(...eos|X) that ends with the prefix itself diff --git a/deepspeech/decoders/scores/length_bonus.py b/deepspeech/decoders/scores/length_bonus.py index 76f45f80..864e22d1 100644 --- a/deepspeech/decoders/scores/length_bonus.py +++ b/deepspeech/decoders/scores/length_bonus.py @@ -1,3 +1,16 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. """Length bonus module.""" from typing import Any from typing import List @@ -34,11 +47,13 @@ class LengthBonus(BatchScorerInterface): and None """ - return paddle.to_tensor([1.0], place=x.place, dtype=x.dtype).expand(self.n), None + return paddle.to_tensor( + [1.0], place=x.place, dtype=x.dtype).expand(self.n), None - def batch_score( - self, ys: paddle.Tensor, states: List[Any], xs: paddle.Tensor - ) -> Tuple[paddle.Tensor, List[Any]]: + def batch_score(self, + ys: paddle.Tensor, + states: List[Any], + xs: paddle.Tensor) -> Tuple[paddle.Tensor, List[Any]]: """Score new token batch. Args: @@ -53,9 +68,5 @@ class LengthBonus(BatchScorerInterface): and next state list for ys. """ - return ( - paddle.to_tensor([1.0], place=xs.place, dtype=xs.dtype).expand( - ys.shape[0], self.n - ), - None, - ) + return (paddle.to_tensor([1.0], place=xs.place, dtype=xs.dtype).expand( + ys.shape[0], self.n), None, ) diff --git a/deepspeech/decoders/scores/ngram.py b/deepspeech/decoders/scores/ngram.py index 9809427b..050a8c81 100644 --- a/deepspeech/decoders/scores/ngram.py +++ b/deepspeech/decoders/scores/ngram.py @@ -1,5 +1,17 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. """Ngram lm implement.""" - from abc import ABC import kenlm @@ -51,9 +63,8 @@ class Ngrambase(ABC): self.lm.BaseScore(state, ys, out_state) scores = paddle.empty_like(next_token, dtype=x.dtype) for i, j in enumerate(next_token): - scores[i] = self.lm.BaseScore( - out_state, self.chardict[j], self.tmpkenlmstate - ) + scores[i] = self.lm.BaseScore(out_state, self.chardict[j], + self.tmpkenlmstate) return scores, out_state @@ -74,7 +85,8 @@ class NgramFullScorer(Ngrambase, BatchScorerInterface): and next state list for ys. """ - return self.score_partial_(y, paddle.to_tensor(range(self.charlen)), state, x) + return self.score_partial_( + y, paddle.to_tensor(range(self.charlen)), state, x) class NgramPartScorer(Ngrambase, PartialScorerInterface): diff --git a/deepspeech/decoders/scores/score_interface.py b/deepspeech/decoders/scores/score_interface.py index c52f8d19..3a9c500b 100644 --- a/deepspeech/decoders/scores/score_interface.py +++ b/deepspeech/decoders/scores/score_interface.py @@ -1,11 +1,23 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. """Scorer interface module.""" - +import warnings from typing import Any from typing import List from typing import Tuple import paddle -import warnings class ScorerInterface: @@ -37,7 +49,7 @@ class ScorerInterface: """ return None - def select_state(self, state: Any, i: int, new_id: int = None) -> Any: + def select_state(self, state: Any, i: int, new_id: int=None) -> Any: """Select state with relative ids in the main beam search. Args: @@ -51,9 +63,8 @@ class ScorerInterface: """ return None if state is None else state[i] - def score( - self, y: paddle.Tensor, state: Any, x: paddle.Tensor - ) -> Tuple[paddle.Tensor, Any]: + def score(self, y: paddle.Tensor, state: Any, + x: paddle.Tensor) -> Tuple[paddle.Tensor, Any]: """Score new token (required). Args: @@ -96,9 +107,10 @@ class BatchScorerInterface(ScorerInterface): """ return self.init_state(x) - def batch_score( - self, ys: paddle.Tensor, states: List[Any], xs: paddle.Tensor - ) -> Tuple[paddle.Tensor, List[Any]]: + def batch_score(self, + ys: paddle.Tensor, + states: List[Any], + xs: paddle.Tensor) -> Tuple[paddle.Tensor, List[Any]]: """Score new token batch (required). Args: @@ -114,10 +126,8 @@ class BatchScorerInterface(ScorerInterface): """ warnings.warn( - "{} batch score is implemented through for loop not parallelized".format( - self.__class__.__name__ - ) - ) + "{} batch score is implemented through for loop not parallelized". + format(self.__class__.__name__)) scores = list() outstates = list() for i, (y, state, x) in enumerate(zip(ys, states, xs)): @@ -141,9 +151,11 @@ class PartialScorerInterface(ScorerInterface): """ - def score_partial( - self, y: paddle.Tensor, next_tokens: paddle.Tensor, state: Any, x: paddle.Tensor - ) -> Tuple[paddle.Tensor, Any]: + def score_partial(self, + y: paddle.Tensor, + next_tokens: paddle.Tensor, + state: Any, + x: paddle.Tensor) -> Tuple[paddle.Tensor, Any]: """Score new token (required). Args: @@ -165,12 +177,11 @@ class BatchPartialScorerInterface(BatchScorerInterface, PartialScorerInterface): """Batch partial scorer interface for beam search.""" def batch_score_partial( - self, - ys: paddle.Tensor, - next_tokens: paddle.Tensor, - states: List[Any], - xs: paddle.Tensor, - ) -> Tuple[paddle.Tensor, Any]: + self, + ys: paddle.Tensor, + next_tokens: paddle.Tensor, + states: List[Any], + xs: paddle.Tensor, ) -> Tuple[paddle.Tensor, Any]: """Score new token (required). Args: diff --git a/deepspeech/decoders/utils.py b/deepspeech/decoders/utils.py index eec13138..92f65814 100644 --- a/deepspeech/decoders/utils.py +++ b/deepspeech/decoders/utils.py @@ -1,6 +1,20 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. __all__ = ["end_detect"] + def end_detect(ended_hyps, i, M=3, D_end=np.log(1 * np.exp(-10))): """End detection. @@ -20,11 +34,12 @@ def end_detect(ended_hyps, i, M=3, D_end=np.log(1 * np.exp(-10))): for m in range(M): # get ended_hyps with their length is i - m hyp_length = i - m - hyps_same_length = [x for x in ended_hyps if len(x["yseq"]) == hyp_length] + hyps_same_length = [ + x for x in ended_hyps if len(x["yseq"]) == hyp_length + ] if len(hyps_same_length) > 0: best_hyp_same_length = sorted( - hyps_same_length, key=lambda x: x["score"], reverse=True - )[0] + hyps_same_length, key=lambda x: x["score"], reverse=True)[0] if best_hyp_same_length["score"] - best_hyp["score"] < D_end: count += 1 diff --git a/deepspeech/modules/ctc.py b/deepspeech/modules/ctc.py index 7c2fd4ad..1f988675 100644 --- a/deepspeech/modules/ctc.py +++ b/deepspeech/modules/ctc.py @@ -125,7 +125,7 @@ class CTCDecoderBase(nn.Layer): class CTCDecoder(CTCDecoderBase): - def __init__(self,*args, **kwargs): + def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) # CTCDecoder LM Score handle self._ext_scorer = None