@ -25,17 +25,19 @@ from paddlespeech.s2t.utils.log import Log
logger = Log ( __name__ ) . getlog ( )
try :
from paddlespeech . s2t . decoders . ctcdecoder . swig_wrapper import ctc_beam_search_decoder_batch # noqa: F401
from paddlespeech . s2t . decoders . ctcdecoder . swig_wrapper import ctc_greedy_decoder # noqa: F401
from paddlespeech . s2t . decoders . ctcdecoder . swig_wrapper import Scorer # noqa: F401
from paddlespeech . s2t . decoders . ctcdecoder import ctc_beam_search_decoding_batch # noqa: F401
from paddlespeech . s2t . decoders . ctcdecoder import ctc_greedy_decoding # noqa: F401
from paddlespeech . s2t . decoders . ctcdecoder import Scorer # noqa: F401
from paddlespeech . s2t . decoders . ctcdecoder import CTCBeamSearchDecoder # noqa: F401
except ImportError :
try :
from paddlespeech . s2t . utils import dynamic_pip_install
package_name = ' paddlespeech_ctcdecoders '
dynamic_pip_install . install ( package_name )
from paddlespeech . s2t . decoders . ctcdecoder . swig_wrapper import ctc_beam_search_decoder_batch # noqa: F401
from paddlespeech . s2t . decoders . ctcdecoder . swig_wrapper import ctc_greedy_decoder # noqa: F401
from paddlespeech . s2t . decoders . ctcdecoder . swig_wrapper import Scorer # noqa: F401
from paddlespeech . s2t . decoders . ctcdecoder import ctc_beam_search_decoding_batch # noqa: F401
from paddlespeech . s2t . decoders . ctcdecoder import ctc_greedy_decoding # noqa: F401
from paddlespeech . s2t . decoders . ctcdecoder import Scorer # noqa: F401
from paddlespeech . s2t . decoders . ctcdecoder import CTCBeamSearchDecoder # noqa: F401
except Exception as e :
logger . info ( " paddlespeech_ctcdecoders not installed! " )
@ -139,9 +141,11 @@ class CTCDecoder(CTCDecoderBase):
super ( ) . __init__ ( * args , * * kwargs )
# CTCDecoder LM Score handle
self . _ext_scorer = None
self . beam_search_decoder = None
def _decode_batch_greedy ( self , probs_split , vocab_list ) :
""" Decode by best path for a batch of probs matrix input.
def _decode_batch_greedy_offline ( self , probs_split , vocab_list ) :
""" This function will be deprecated in future.
Decode by best path for a batch of probs matrix input .
: param probs_split : List of 2 - D probability matrix , and each consists
of prob vectors for one speech utterancce .
: param probs_split : List of matrix
@ -152,7 +156,7 @@ class CTCDecoder(CTCDecoderBase):
"""
results = [ ]
for i , probs in enumerate ( probs_split ) :
output_transcription = ctc_greedy_decod er (
output_transcription = ctc_greedy_decod ing (
probs_seq = probs , vocabulary = vocab_list , blank_id = self . blank_id )
results . append ( output_transcription )
return results
@ -194,10 +198,12 @@ class CTCDecoder(CTCDecoderBase):
logger . info ( " no language model provided, "
" decoding by pure beam search without scorer. " )
def _decode_batch_beam_search ( self , probs_split , beam_alpha , beam_beta ,
beam_size , cutoff_prob , cutoff_top_n ,
vocab_list , num_processes ) :
""" Decode by beam search for a batch of probs matrix input.
def _decode_batch_beam_search_offline (
self , probs_split , beam_alpha , beam_beta , beam_size , cutoff_prob ,
cutoff_top_n , vocab_list , num_processes ) :
"""
This function will be deprecated in future .
Decode by beam search for a batch of probs matrix input .
: param probs_split : List of 2 - D probability matrix , and each consists
of prob vectors for one speech utterancce .
: param probs_split : List of matrix
@ -226,7 +232,7 @@ class CTCDecoder(CTCDecoderBase):
# beam search decode
num_processes = min ( num_processes , len ( probs_split ) )
beam_search_results = ctc_beam_search_decod er _batch(
beam_search_results = ctc_beam_search_decod ing _batch(
probs_split = probs_split ,
vocabulary = vocab_list ,
beam_size = beam_size ,
@ -239,30 +245,69 @@ class CTCDecoder(CTCDecoderBase):
results = [ result [ 0 ] [ 1 ] for result in beam_search_results ]
return results
def init_decode ( self , beam_alpha , beam_beta , lang_model_path , vocab_list ,
decoding_method ) :
def init_decoder ( self , batch_size , vocab_list , decoding_method ,
lang_model_path , beam_alpha , beam_beta , beam_size ,
cutoff_prob , cutoff_top_n , num_processes ) :
"""
init ctc decoders
Args :
batch_size ( int ) : Batch size for input data
vocab_list ( list ) : List of tokens in the vocabulary , for decoding
decoding_method ( str ) : ctc_beam_search
lang_model_path ( str ) : language model path
beam_alpha ( float ) : beam_alpha
beam_beta ( float ) : beam_beta
beam_size ( int ) : beam_size
cutoff_prob ( float ) : cutoff probability in beam search
cutoff_top_n ( int ) : cutoff_top_n
num_processes ( int ) : num_processes
Raises :
ValueError : when decoding_method not support .
Returns :
CTCBeamSearchDecoder
"""
self . batch_size = batch_size
self . vocab_list = vocab_list
self . decoding_method = decoding_method
self . beam_size = beam_size
self . cutoff_prob = cutoff_prob
self . cutoff_top_n = cutoff_top_n
self . num_processes = num_processes
if decoding_method == " ctc_beam_search " :
self . _init_ext_scorer ( beam_alpha , beam_beta , lang_model_path ,
vocab_list )
if self . beam_search_decoder is None :
self . beam_search_decoder = self . get_decoder (
vocab_list , batch_size , beam_alpha , beam_beta , beam_size ,
num_processes , cutoff_prob , cutoff_top_n )
return self . beam_search_decoder
elif decoding_method == " ctc_greedy " :
self . _init_ext_scorer ( beam_alpha , beam_beta , lang_model_path ,
vocab_list )
else :
raise ValueError ( f " Not support: { decoding_method } " )
def decode_probs ( self , probs , logits_lens , vocab_list , decoding_method ,
lang_model_path , beam_alpha , beam_beta , beam_size ,
cutoff_prob , cutoff_top_n , num_processes ) :
""" ctc decoding with probs.
def decode_probs_offline ( self , probs , logits_lens , vocab_list ,
decoding_method , lang_model_path , beam_alpha ,
beam_beta , beam_size , cutoff_prob , cutoff_top_n ,
num_processes ) :
"""
This function will be deprecated in future .
ctc decoding with probs .
Args :
probs ( Tensor ) : activation after softmax
logits_lens ( Tensor ) : audio output lens
vocab_list ( [ type ] ) : [ description ]
decoding_method ( [ type ] ) : [ description ]
lang_model_path ( [ type ] ) : [ description ]
beam_alpha ( [ type ] ) : [ description ]
beam_beta ( [ type ] ) : [ description ]
beam_size ( [ type ] ) : [ description ]
cutoff_prob ( [ type ] ) : [ description ]
cutoff_top_n ( [ type ] ) : [ description ]
num_processes ( [ type ] ) : [ description ]
vocab_list ( list ) : List of tokens in the vocabulary , for decoding
decoding_method ( str ) : ctc_beam_search
lang_model_path ( str ) : language model path
beam_alpha ( float ) : beam_alpha
beam_beta ( float ) : beam_beta
beam_size ( int ) : beam_size
cutoff_prob ( float ) : cutoff probability in beam search
cutoff_top_n ( int ) : cutoff_top_n
num_processes ( int ) : num_processes
Raises :
ValueError : when decoding_method not support .
@ -270,13 +315,14 @@ class CTCDecoder(CTCDecoderBase):
Returns :
List [ str ] : transcripts .
"""
logger . warn (
" This function will be deprecated in future: decode_probs_offline " )
probs_split = [ probs [ i , : l , : ] for i , l in enumerate ( logits_lens ) ]
if decoding_method == " ctc_greedy " :
result_transcripts = self . _decode_batch_greedy (
result_transcripts = self . _decode_batch_greedy _offline (
probs_split = probs_split , vocab_list = vocab_list )
elif decoding_method == " ctc_beam_search " :
result_transcripts = self . _decode_batch_beam_search (
result_transcripts = self . _decode_batch_beam_search _offline (
probs_split = probs_split ,
beam_alpha = beam_alpha ,
beam_beta = beam_beta ,
@ -288,3 +334,136 @@ class CTCDecoder(CTCDecoderBase):
else :
raise ValueError ( f " Not support: { decoding_method } " )
return result_transcripts
def get_decoder ( self , vocab_list , batch_size , beam_alpha , beam_beta ,
beam_size , num_processes , cutoff_prob , cutoff_top_n ) :
"""
init get ctc decoder
Args :
vocab_list ( list ) : List of tokens in the vocabulary , for decoding .
batch_size ( int ) : Batch size for input data
beam_alpha ( float ) : beam_alpha
beam_beta ( float ) : beam_beta
beam_size ( int ) : beam_size
num_processes ( int ) : num_processes
cutoff_prob ( float ) : cutoff probability in beam search
cutoff_top_n ( int ) : cutoff_top_n
Raises :
ValueError : when decoding_method not support .
Returns :
CTCBeamSearchDecoder
"""
num_processes = min ( num_processes , batch_size )
if self . _ext_scorer is not None :
self . _ext_scorer . reset_params ( beam_alpha , beam_beta )
if self . decoding_method == " ctc_beam_search " :
beam_search_decoder = CTCBeamSearchDecoder (
vocab_list , batch_size , beam_size , num_processes , cutoff_prob ,
cutoff_top_n , self . _ext_scorer , self . blank_id )
else :
raise ValueError ( f " Not support: { decoding_method } " )
return beam_search_decoder
def next ( self , probs , logits_lens ) :
"""
Input probs into ctc decoder
Args :
probs ( list ( list ( float ) ) ) : probs for a batch of data
logits_lens ( list ( int ) ) : logits lens for a batch of data
Raises :
Exception : when the ctc decoder is not initialized
ValueError : when decoding_method not support .
"""
if self . beam_search_decoder is None :
raise Exception (
" You need to initialize the beam_search_decoder firstly " )
beam_search_decoder = self . beam_search_decoder
has_value = ( logits_lens > 0 ) . tolist ( )
has_value = [
" true " if has_value [ i ] is True else " false "
for i in range ( len ( has_value ) )
]
probs_split = [
probs [ i , : l , : ] . tolist ( ) if has_value [ i ] else probs [ i ] . tolist ( )
for i , l in enumerate ( logits_lens )
]
if self . decoding_method == " ctc_beam_search " :
beam_search_decoder . next ( probs_split , has_value )
else :
raise ValueError ( f " Not support: { decoding_method } " )
return
def decode ( self ) :
"""
Get the decoding result
Raises :
Exception : when the ctc decoder is not initialized
ValueError : when decoding_method not support .
Returns :
results_best ( list ( str ) ) : The best result for a batch of data
results_beam ( list ( list ( str ) ) ) : The beam search result for a batch of data
"""
if self . beam_search_decoder is None :
raise Exception (
" You need to initialize the beam_search_decoder firstly " )
beam_search_decoder = self . beam_search_decoder
if self . decoding_method == " ctc_beam_search " :
batch_beam_results = beam_search_decoder . decode ( )
batch_beam_results = [ [ ( res [ 0 ] , res [ 1 ] ) for res in beam_results ]
for beam_results in batch_beam_results ]
results_best = [ result [ 0 ] [ 1 ] for result in batch_beam_results ]
results_beam = [ [ trans [ 1 ] for trans in result ]
for result in batch_beam_results ]
else :
raise ValueError ( f " Not support: { decoding_method } " )
return results_best , results_beam
def reset_decoder ( self ,
batch_size = - 1 ,
beam_size = - 1 ,
num_processes = - 1 ,
cutoff_prob = - 1.0 ,
cutoff_top_n = - 1 ) :
if batch_size > 0 :
self . batch_size = batch_size
if beam_size > 0 :
self . beam_size = beam_size
if num_processes > 0 :
self . num_processes = num_processes
if cutoff_prob > 0 :
self . cutoff_prob = cutoff_prob
if cutoff_top_n > 0 :
self . cutoff_top_n = cutoff_top_n
"""
Reset the decoder state
Args :
batch_size ( int ) : Batch size for input data
beam_size ( int ) : beam_size
num_processes ( int ) : num_processes
cutoff_prob ( float ) : cutoff probability in beam search
cutoff_top_n ( int ) : cutoff_top_n
Raises :
Exception : when the ctc decoder is not initialized
"""
if self . beam_search_decoder is None :
raise Exception (
" You need to initialize the beam_search_decoder firstly " )
self . beam_search_decoder . reset_state (
self . batch_size , self . beam_size , self . num_processes ,
self . cutoff_prob , self . cutoff_top_n )
def del_decoder ( self ) :
"""
Delete the decoder
"""
if self . beam_search_decoder is not None :
del self . beam_search_decoder
self . beam_search_decoder = None