fix some typos

pull/1285/head
billishyahao 4 years ago
parent 3568bb62b5
commit ddf184be60

@ -149,13 +149,13 @@ class DeepSpeech2Model(nn.Layer):
"""Compute Model loss """Compute Model loss
Args: Args:
audio (Tenosr): [B, T, D] audio (Tensor): [B, T, D]
audio_len (Tensor): [B] audio_len (Tensor): [B]
text (Tensor): [B, U] text (Tensor): [B, U]
text_len (Tensor): [B] text_len (Tensor): [B]
Returns: Returns:
loss (Tenosr): [1] loss (Tensor): [1]
""" """
eouts, eouts_len = self.encoder(audio, audio_len) eouts, eouts_len = self.encoder(audio, audio_len)
loss = self.decoder(eouts, eouts_len, text, text_len) loss = self.decoder(eouts, eouts_len, text, text_len)

@ -62,7 +62,7 @@ class Scorer(object):
"""Evaluation function, gathering all the different scores """Evaluation function, gathering all the different scores
and return the final one. and return the final one.
:param sentence: The input sentence for evalutation :param sentence: The input sentence for evaluation
:type sentence: str :type sentence: str
:param log: Whether return the score in log representation. :param log: Whether return the score in log representation.
:type log: bool :type log: bool

@ -183,7 +183,7 @@ std::vector<std::pair<double, std::string>> ctc_beam_search_decoder(
std::sort( std::sort(
prefixes.begin(), prefixes.begin() + num_prefixes, prefix_compare); prefixes.begin(), prefixes.begin() + num_prefixes, prefix_compare);
// compute aproximate ctc score as the return score, without affecting the // compute approximate ctc score as the return score, without affecting the
// return order of decoding result. To delete when decoder gets stable. // return order of decoding result. To delete when decoder gets stable.
for (size_t i = 0; i < beam_size && i < prefixes.size(); ++i) { for (size_t i = 0; i < beam_size && i < prefixes.size(); ++i) {
double approx_ctc = prefixes[i]->score; double approx_ctc = prefixes[i]->score;

@ -26,7 +26,7 @@ std::vector<std::pair<size_t, float>> get_pruned_log_probs(
for (size_t i = 0; i < prob_step.size(); ++i) { for (size_t i = 0; i < prob_step.size(); ++i) {
prob_idx.push_back(std::pair<int, double>(i, prob_step[i])); prob_idx.push_back(std::pair<int, double>(i, prob_step[i]));
} }
// pruning of vacobulary // pruning of vocabulary
size_t cutoff_len = prob_step.size(); size_t cutoff_len = prob_step.size();
if (cutoff_prob < 1.0 || cutoff_top_n < cutoff_len) { if (cutoff_prob < 1.0 || cutoff_top_n < cutoff_len) {
std::sort(prob_idx.begin(), std::sort(prob_idx.begin(),

@ -223,7 +223,7 @@ void Scorer::fill_dictionary(bool add_space) {
* This gets rid of "epsilon" transitions in the FST. * This gets rid of "epsilon" transitions in the FST.
* These are transitions that don't require a string input to be taken. * These are transitions that don't require a string input to be taken.
* Getting rid of them is necessary to make the FST determinisitc, but * Getting rid of them is necessary to make the FST deterministic, but
* can greatly increase the size of the FST * can greatly increase the size of the FST
*/ */
fst::RmEpsilon(&dictionary); fst::RmEpsilon(&dictionary);

@ -154,7 +154,7 @@ class CTCPrefixScorer(BatchPartialScorerInterface):
Args: Args:
state: The states of hyps state: The states of hyps
Returns: exteded state Returns: extended state
""" """
new_state = [] new_state = []

@ -11,7 +11,7 @@ class CTCPrefixScorePD():
which is based on Algorithm 2 in WATANABE et al. which is based on Algorithm 2 in WATANABE et al.
"HYBRID CTC/ATTENTION ARCHITECTURE FOR END-TO-END SPEECH RECOGNITION," "HYBRID CTC/ATTENTION ARCHITECTURE FOR END-TO-END SPEECH RECOGNITION,"
but extended to efficiently compute the label probablities for multiple but extended to efficiently compute the label probabilities for multiple
hypotheses simultaneously hypotheses simultaneously
See also Seki et al. "Vectorized Beam Search for CTC-Attention-Based See also Seki et al. "Vectorized Beam Search for CTC-Attention-Based
Speech Recognition," In INTERSPEECH (pp. 3825-3829), 2019. Speech Recognition," In INTERSPEECH (pp. 3825-3829), 2019.
@ -272,7 +272,7 @@ class CTCPrefixScore():
which is based on Algorithm 2 in WATANABE et al. which is based on Algorithm 2 in WATANABE et al.
"HYBRID CTC/ATTENTION ARCHITECTURE FOR END-TO-END SPEECH RECOGNITION," "HYBRID CTC/ATTENTION ARCHITECTURE FOR END-TO-END SPEECH RECOGNITION,"
but extended to efficiently compute the probablities of multiple labels but extended to efficiently compute the probabilities of multiple labels
simultaneously simultaneously
""" """

@ -151,13 +151,13 @@ class DeepSpeech2Model(nn.Layer):
"""Compute Model loss """Compute Model loss
Args: Args:
audio (Tenosr): [B, T, D] audio (Tensors): [B, T, D]
audio_len (Tensor): [B] audio_len (Tensor): [B]
text (Tensor): [B, U] text (Tensor): [B, U]
text_len (Tensor): [B] text_len (Tensor): [B]
Returns: Returns:
loss (Tenosr): [1] loss (Tensor): [1]
""" """
eouts, eouts_len = self.encoder(audio, audio_len) eouts, eouts_len = self.encoder(audio, audio_len)
loss = self.decoder(eouts, eouts_len, text, text_len) loss = self.decoder(eouts, eouts_len, text, text_len)

@ -279,13 +279,13 @@ class DeepSpeech2ModelOnline(nn.Layer):
"""Compute Model loss """Compute Model loss
Args: Args:
audio (Tenosr): [B, T, D] audio (Tensor): [B, T, D]
audio_len (Tensor): [B] audio_len (Tensor): [B]
text (Tensor): [B, U] text (Tensor): [B, U]
text_len (Tensor): [B] text_len (Tensor): [B]
Returns: Returns:
loss (Tenosr): [1] loss (Tensor): [1]
""" """
eouts, eouts_len, final_state_h_box, final_state_c_box = self.encoder( eouts, eouts_len, final_state_h_box, final_state_c_box = self.encoder(
audio, audio_len, None, None) audio, audio_len, None, None)

@ -680,8 +680,8 @@ class U2BaseModel(ASRInterface, nn.Layer):
"""u2 decoding. """u2 decoding.
Args: Args:
feats (Tenosr): audio features, (B, T, D) feats (Tensor): audio features, (B, T, D)
feats_lengths (Tenosr): (B) feats_lengths (Tensor): (B)
text_feature (TextFeaturizer): text feature object. text_feature (TextFeaturizer): text feature object.
decoding_method (str): decoding mode, e.g. decoding_method (str): decoding mode, e.g.
'attention', 'ctc_greedy_search', 'attention', 'ctc_greedy_search',

@ -478,8 +478,8 @@ class U2STBaseModel(nn.Layer):
"""u2 decoding. """u2 decoding.
Args: Args:
feats (Tenosr): audio features, (B, T, D) feats (Tensor): audio features, (B, T, D)
feats_lengths (Tenosr): (B) feats_lengths (Tensor): (B)
text_feature (TextFeaturizer): text feature object. text_feature (TextFeaturizer): text feature object.
decoding_method (str): decoding mode, e.g. decoding_method (str): decoding mode, e.g.
'fullsentence', 'fullsentence',

@ -81,10 +81,10 @@ class CTCDecoderBase(nn.Layer):
Args: Args:
hs_pad (Tensor): batch of padded hidden state sequences (B, Tmax, D) hs_pad (Tensor): batch of padded hidden state sequences (B, Tmax, D)
hlens (Tensor): batch of lengths of hidden state sequences (B) hlens (Tensor): batch of lengths of hidden state sequences (B)
ys_pad (Tenosr): batch of padded character id sequence tensor (B, Lmax) ys_pad (Tensor): batch of padded character id sequence tensor (B, Lmax)
ys_lens (Tensor): batch of lengths of character sequence (B) ys_lens (Tensor): batch of lengths of character sequence (B)
Returns: Returns:
loss (Tenosr): ctc loss value, scalar. loss (Tensor): ctc loss value, scalar.
""" """
logits = self.ctc_lo(self.dropout(hs_pad)) logits = self.ctc_lo(self.dropout(hs_pad))
loss = self.criterion(logits, ys_pad, hlens, ys_lens) loss = self.criterion(logits, ys_pad, hlens, ys_lens)
@ -252,8 +252,8 @@ class CTCDecoder(CTCDecoderBase):
"""ctc decoding with probs. """ctc decoding with probs.
Args: Args:
probs (Tenosr): activation after softmax probs (Tensor): activation after softmax
logits_lens (Tenosr): audio output lens logits_lens (Tensor): audio output lens
vocab_list ([type]): [description] vocab_list ([type]): [description]
decoding_method ([type]): [description] decoding_method ([type]): [description]
lang_model_path ([type]): [description] lang_model_path ([type]): [description]

@ -54,7 +54,7 @@ def make_pad_mask(lengths: paddle.Tensor) -> paddle.Tensor:
[0, 0, 0, 1, 1], [0, 0, 0, 1, 1],
[0, 0, 1, 1, 1]] [0, 0, 1, 1, 1]]
""" """
# (TODO: Hui Zhang): jit not support Tenosr.dim() and Tensor.ndim # (TODO: Hui Zhang): jit not support Tensor.dim() and Tensor.ndim
# assert lengths.dim() == 1 # assert lengths.dim() == 1
batch_size = int(lengths.shape[0]) batch_size = int(lengths.shape[0])
max_len = int(lengths.max()) max_len = int(lengths.max())

@ -57,7 +57,7 @@ def filter_valid_args(args: Dict[Text, Any], valid_keys: List[Text]):
return new_args return new_args
def filter_out_tenosr(args: Dict[Text, Any]): def filter_out_tensor(args: Dict[Text, Any]):
return {key: val for key, val in args.items() if not has_tensor(val)} return {key: val for key, val in args.items() if not has_tensor(val)}
@ -65,5 +65,5 @@ def instance_class(module_class, args: Dict[Text, Any]):
valid_keys = inspect.signature(module_class).parameters.keys() valid_keys = inspect.signature(module_class).parameters.keys()
new_args = filter_valid_args(args, valid_keys) new_args = filter_valid_args(args, valid_keys)
logger.info( logger.info(
f"Instance: {module_class.__name__} {filter_out_tenosr(new_args)}.") f"Instance: {module_class.__name__} {filter_out_tensor(new_args)}.")
return module_class(**new_args) return module_class(**new_args)

Loading…
Cancel
Save