PaddleSpeech/paddlespeech/s2t/decoders/scorers/ctc.py

# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Modified from espnet(https://github.com/espnet/espnet)
"""ScorerInterface implementation for CTC."""
import numpy as np
import paddle

from .ctc_prefix_score import CTCPrefixScore
from .ctc_prefix_score import CTCPrefixScorePD
from .scorer_interface import BatchPartialScorerInterface


class CTCPrefixScorer(BatchPartialScorerInterface):
    """Decoder interface wrapper for CTCPrefixScore."""

    def __init__(self, ctc: paddle.nn.Layer, eos: int):
        """Initialize class.

        Args:
            ctc (paddle.nn.Layer): The CTC implementation.
                For example, :class:`paddlespeech.s2t.modules.ctc.CTC`
            eos (int): The end-of-sequence id.

        """
        self.ctc = ctc
        self.eos = eos
        self.impl = None

    def init_state(self, x: paddle.Tensor):
        """Get an initial state for decoding.

        Args:
            x (paddle.Tensor): The encoded feature tensor

        Returns: initial state

        """
        logp = self.ctc.log_softmax(x.unsqueeze(0)).squeeze(0).numpy()
        # TODO(karita): use CTCPrefixScorePD
        self.impl = CTCPrefixScore(logp, 0, self.eos, np)
        return 0, self.impl.initial_state()

    def select_state(self, state, i, new_id=None):
        """Select state with relative ids in the main beam search.

        Args:
            state: Decoder state for prefix tokens
            i (int): Index to select a state in the main beam search
            new_id (int): New label id to select a state if necessary

        Returns:
            state: pruned state

        """
        if type(state) == tuple:
            if len(state) == 2:  # for CTCPrefixScore
                sc, st = state
                return sc[i], st[i]
            else:  # for CTCPrefixScorePD (need new_id > 0)
                r, log_psi, f_min, f_max, scoring_idmap = state
                s = log_psi[i, new_id].expand(paddle.shape(log_psi)[1])
                if scoring_idmap is not None:
                    return r[:, :, i, scoring_idmap[i, new_id]], s, f_min, f_max
                else:
                    return r[:, :, i, new_id], s, f_min, f_max
        return None if state is None else state[i]

    def score_partial(self, y, ids, state, x):
        """Score new token.

        Args:
            y (paddle.Tensor): 1D prefix token
            next_tokens (paddle.Tensor): paddle.int64 next token to score
            state: decoder state for prefix tokens
            x (paddle.Tensor): 2D encoder feature that generates ys

        Returns:
            tuple[paddle.Tensor, Any]:
                Tuple of a score tensor for y that has a shape `(len(next_tokens),)`
                and next state for ys

        """
        prev_score, state = state
        presub_score, new_st = self.impl(y.cpu(), ids.cpu(), state)
        tscore = paddle.to_tensor(
            presub_score - prev_score, place=x.place, dtype=x.dtype)
        return tscore, (presub_score, new_st)

    def batch_init_state(self, x: paddle.Tensor):
        """Get an initial state for decoding.

        Args:
            x (paddle.Tensor): The encoded feature tensor

        Returns: initial state

        """
        logp = self.ctc.log_softmax(x.unsqueeze(0))  # assuming batch_size = 1
        xlen = paddle.to_tensor([paddle.shape(logp)[1]])
        self.impl = CTCPrefixScorePD(logp, xlen, 0, self.eos)
        return None

    def batch_score_partial(self, y, ids, state, x):
        """Score new token.

        Args:
            y (paddle.Tensor): 1D prefix token
            ids (paddle.Tensor): paddle.int64 next token to score
            state: decoder state for prefix tokens
            x (paddle.Tensor): 2D encoder feature that generates ys

        Returns:
            tuple[paddle.Tensor, Any]:
                Tuple of a score tensor for y that has a shape `(len(next_tokens),)`
                and next state for ys

        """
        batch_state = (
            (paddle.stack([s[0] for s in state], axis=2),
             paddle.stack([s[1] for s in state]), state[0][2], state[0][3], )
            if state[0] is not None else None)
        return self.impl(y, batch_state, ids)

    def extend_prob(self, x: paddle.Tensor):
        """Extend probs for decoding.

        This extension is for streaming decoding
        as in Eq (14) in https://arxiv.org/abs/2006.14941

        Args:
            x (paddle.Tensor): The encoded feature tensor

        """
        logp = self.ctc.log_softmax(x.unsqueeze(0))
        self.impl.extend_prob(logp)

    def extend_state(self, state):
        """Extend state for decoding.

        This extension is for streaming decoding
        as in Eq (14) in https://arxiv.org/abs/2006.14941

        Args:
            state: The states of hyps

        Returns: extended state

        """
        new_state = []
        for s in state:
            new_state.append(self.impl.extend_state(s))

        return new_state
format code 3 years ago			`# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.`
			`#`
			`# Licensed under the Apache License, Version 2.0 (the "License");`
			`# you may not use this file except in compliance with the License.`
			`# You may obtain a copy of the License at`
			`#`
			`# http://www.apache.org/licenses/LICENSE-2.0`
			`#`
			`# Unless required by applicable law or agreed to in writing, software`
			`# distributed under the License is distributed on an "AS IS" BASIS,`
			`# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.`
			`# See the License for the specific language governing permissions and`
			`# limitations under the License.`
fix reference format 3 years ago			`# Modified from espnet(https://github.com/espnet/espnet)`
add decoder scores function 3 years ago			`"""ScorerInterface implementation for CTC."""`
			`import numpy as np`
			`import paddle`

decoder with ctc prefix score 3 years ago			`from .ctc_prefix_score import CTCPrefixScore`
			`from .ctc_prefix_score import CTCPrefixScorePD`
add decoder scores function 3 years ago			`from .scorer_interface import BatchPartialScorerInterface`


			`class CTCPrefixScorer(BatchPartialScorerInterface):`
			`"""Decoder interface wrapper for CTCPrefixScore."""`

			`def __init__(self, ctc: paddle.nn.Layer, eos: int):`
			`"""Initialize class.`

			`Args:`
			`ctc (paddle.nn.Layer): The CTC implementation.`
merge deepspeech, parakeet and text_processing into paddlespeech 3 years ago			For example, :class:`paddlespeech.s2t.modules.ctc.CTC`
add decoder scores function 3 years ago			`eos (int): The end-of-sequence id.`

			`"""`
			`self.ctc = ctc`
			`self.eos = eos`
			`self.impl = None`

			`def init_state(self, x: paddle.Tensor):`
			`"""Get an initial state for decoding.`

			`Args:`
			`x (paddle.Tensor): The encoded feature tensor`

			`Returns: initial state`

			`"""`
			`logp = self.ctc.log_softmax(x.unsqueeze(0)).squeeze(0).numpy()`
update vector ctc prefix score 3 years ago			`# TODO(karita): use CTCPrefixScorePD`
add decoder scores function 3 years ago			`self.impl = CTCPrefixScore(logp, 0, self.eos, np)`
			`return 0, self.impl.initial_state()`

			`def select_state(self, state, i, new_id=None):`
			`"""Select state with relative ids in the main beam search.`

			`Args:`
			`state: Decoder state for prefix tokens`
			`i (int): Index to select a state in the main beam search`
			`new_id (int): New label id to select a state if necessary`

			`Returns:`
			`state: pruned state`

			`"""`
			`if type(state) == tuple:`
			`if len(state) == 2: # for CTCPrefixScore`
			`sc, st = state`
			`return sc[i], st[i]`
update vector ctc prefix score 3 years ago			`else: # for CTCPrefixScorePD (need new_id > 0)`
add decoder scores function 3 years ago			`r, log_psi, f_min, f_max, scoring_idmap = state`
tensor.shape => paddle.shape(tensor) 3 years ago			`s = log_psi[i, new_id].expand(paddle.shape(log_psi)[1])`
add decoder scores function 3 years ago			`if scoring_idmap is not None:`
			`return r[:, :, i, scoring_idmap[i, new_id]], s, f_min, f_max`
			`else:`
			`return r[:, :, i, new_id], s, f_min, f_max`
			`return None if state is None else state[i]`

			`def score_partial(self, y, ids, state, x):`
			`"""Score new token.`

			`Args:`
			`y (paddle.Tensor): 1D prefix token`
			`next_tokens (paddle.Tensor): paddle.int64 next token to score`
			`state: decoder state for prefix tokens`
			`x (paddle.Tensor): 2D encoder feature that generates ys`

			`Returns:`
			`tuple[paddle.Tensor, Any]:`
			Tuple of a score tensor for y that has a shape `(len(next_tokens),)`
			`and next state for ys`

			`"""`
			`prev_score, state = state`
			`presub_score, new_st = self.impl(y.cpu(), ids.cpu(), state)`
			`tscore = paddle.to_tensor(`
format code 3 years ago			`presub_score - prev_score, place=x.place, dtype=x.dtype)`
add decoder scores function 3 years ago			`return tscore, (presub_score, new_st)`

			`def batch_init_state(self, x: paddle.Tensor):`
			`"""Get an initial state for decoding.`

			`Args:`
			`x (paddle.Tensor): The encoded feature tensor`

			`Returns: initial state`

			`"""`
			`logp = self.ctc.log_softmax(x.unsqueeze(0)) # assuming batch_size = 1`
tensor.shape => paddle.shape(tensor) 3 years ago			`xlen = paddle.to_tensor([paddle.shape(logp)[1]])`
update vector ctc prefix score 3 years ago			`self.impl = CTCPrefixScorePD(logp, xlen, 0, self.eos)`
add decoder scores function 3 years ago			`return None`

			`def batch_score_partial(self, y, ids, state, x):`
			`"""Score new token.`

			`Args:`
			`y (paddle.Tensor): 1D prefix token`
			`ids (paddle.Tensor): paddle.int64 next token to score`
			`state: decoder state for prefix tokens`
			`x (paddle.Tensor): 2D encoder feature that generates ys`

			`Returns:`
			`tuple[paddle.Tensor, Any]:`
			Tuple of a score tensor for y that has a shape `(len(next_tokens),)`
			`and next state for ys`

			`"""`
			`batch_state = (`
format code 3 years ago			`(paddle.stack([s[0] for s in state], axis=2),`
			`paddle.stack([s[1] for s in state]), state[0][2], state[0][3], )`
			`if state[0] is not None else None)`
add decoder scores function 3 years ago			`return self.impl(y, batch_state, ids)`

			`def extend_prob(self, x: paddle.Tensor):`
			`"""Extend probs for decoding.`

			`This extension is for streaming decoding`
			`as in Eq (14) in https://arxiv.org/abs/2006.14941`

			`Args:`
			`x (paddle.Tensor): The encoded feature tensor`

			`"""`
			`logp = self.ctc.log_softmax(x.unsqueeze(0))`
			`self.impl.extend_prob(logp)`

			`def extend_state(self, state):`
			`"""Extend state for decoding.`

			`This extension is for streaming decoding`
			`as in Eq (14) in https://arxiv.org/abs/2006.14941`

			`Args:`
			`state: The states of hyps`

fix some typos 3 years ago			`Returns: extended state`
add decoder scores function 3 years ago
			`"""`
			`new_state = []`
			`for s in state:`
			`new_state.append(self.impl.extend_state(s))`

			`return new_state`