diff --git a/examples/other/1xt2x/src_deepspeech2x/models/ds2/deepspeech2.py b/examples/other/1xt2x/src_deepspeech2x/models/ds2/deepspeech2.py index e25e4030..fb8b321c 100644 --- a/examples/other/1xt2x/src_deepspeech2x/models/ds2/deepspeech2.py +++ b/examples/other/1xt2x/src_deepspeech2x/models/ds2/deepspeech2.py @@ -149,13 +149,13 @@ class DeepSpeech2Model(nn.Layer): """Compute Model loss Args: - audio (Tenosr): [B, T, D] + audio (Tensor): [B, T, D] audio_len (Tensor): [B] text (Tensor): [B, U] text_len (Tensor): [B] Returns: - loss (Tenosr): [1] + loss (Tensor): [1] """ eouts, eouts_len = self.encoder(audio, audio_len) loss = self.decoder(eouts, eouts_len, text, text_len) diff --git a/paddlespeech/s2t/decoders/ctcdecoder/scorer_deprecated.py b/paddlespeech/s2t/decoders/ctcdecoder/scorer_deprecated.py index d81fb2e3..362098fe 100644 --- a/paddlespeech/s2t/decoders/ctcdecoder/scorer_deprecated.py +++ b/paddlespeech/s2t/decoders/ctcdecoder/scorer_deprecated.py @@ -62,7 +62,7 @@ class Scorer(object): """Evaluation function, gathering all the different scores and return the final one. - :param sentence: The input sentence for evalutation + :param sentence: The input sentence for evaluation :type sentence: str :param log: Whether return the score in log representation. :type log: bool diff --git a/paddlespeech/s2t/decoders/ctcdecoder/swig/ctc_beam_search_decoder.cpp b/paddlespeech/s2t/decoders/ctcdecoder/swig/ctc_beam_search_decoder.cpp index 8469a194..663c52bb 100644 --- a/paddlespeech/s2t/decoders/ctcdecoder/swig/ctc_beam_search_decoder.cpp +++ b/paddlespeech/s2t/decoders/ctcdecoder/swig/ctc_beam_search_decoder.cpp @@ -183,7 +183,7 @@ std::vector> ctc_beam_search_decoder( std::sort( prefixes.begin(), prefixes.begin() + num_prefixes, prefix_compare); - // compute aproximate ctc score as the return score, without affecting the + // compute approximate ctc score as the return score, without affecting the // return order of decoding result. To delete when decoder gets stable. for (size_t i = 0; i < beam_size && i < prefixes.size(); ++i) { double approx_ctc = prefixes[i]->score; diff --git a/paddlespeech/s2t/decoders/ctcdecoder/swig/decoder_utils.cpp b/paddlespeech/s2t/decoders/ctcdecoder/swig/decoder_utils.cpp index 5d69ad03..e86c2240 100644 --- a/paddlespeech/s2t/decoders/ctcdecoder/swig/decoder_utils.cpp +++ b/paddlespeech/s2t/decoders/ctcdecoder/swig/decoder_utils.cpp @@ -26,7 +26,7 @@ std::vector> get_pruned_log_probs( for (size_t i = 0; i < prob_step.size(); ++i) { prob_idx.push_back(std::pair(i, prob_step[i])); } - // pruning of vacobulary + // pruning of vocabulary size_t cutoff_len = prob_step.size(); if (cutoff_prob < 1.0 || cutoff_top_n < cutoff_len) { std::sort(prob_idx.begin(), diff --git a/paddlespeech/s2t/decoders/ctcdecoder/swig/scorer.cpp b/paddlespeech/s2t/decoders/ctcdecoder/swig/scorer.cpp index 7bd6542d..7c9a75d5 100644 --- a/paddlespeech/s2t/decoders/ctcdecoder/swig/scorer.cpp +++ b/paddlespeech/s2t/decoders/ctcdecoder/swig/scorer.cpp @@ -223,7 +223,7 @@ void Scorer::fill_dictionary(bool add_space) { * This gets rid of "epsilon" transitions in the FST. * These are transitions that don't require a string input to be taken. - * Getting rid of them is necessary to make the FST determinisitc, but + * Getting rid of them is necessary to make the FST deterministic, but * can greatly increase the size of the FST */ fst::RmEpsilon(&dictionary); diff --git a/paddlespeech/s2t/decoders/scorers/ctc.py b/paddlespeech/s2t/decoders/scorers/ctc.py index ace80bd3..81d8b078 100644 --- a/paddlespeech/s2t/decoders/scorers/ctc.py +++ b/paddlespeech/s2t/decoders/scorers/ctc.py @@ -154,7 +154,7 @@ class CTCPrefixScorer(BatchPartialScorerInterface): Args: state: The states of hyps - Returns: exteded state + Returns: extended state """ new_state = [] diff --git a/paddlespeech/s2t/decoders/scorers/ctc_prefix_score.py b/paddlespeech/s2t/decoders/scorers/ctc_prefix_score.py index 13429d49..78b8fe36 100644 --- a/paddlespeech/s2t/decoders/scorers/ctc_prefix_score.py +++ b/paddlespeech/s2t/decoders/scorers/ctc_prefix_score.py @@ -11,7 +11,7 @@ class CTCPrefixScorePD(): which is based on Algorithm 2 in WATANABE et al. "HYBRID CTC/ATTENTION ARCHITECTURE FOR END-TO-END SPEECH RECOGNITION," - but extended to efficiently compute the label probablities for multiple + but extended to efficiently compute the label probabilities for multiple hypotheses simultaneously See also Seki et al. "Vectorized Beam Search for CTC-Attention-Based Speech Recognition," In INTERSPEECH (pp. 3825-3829), 2019. @@ -272,7 +272,7 @@ class CTCPrefixScore(): which is based on Algorithm 2 in WATANABE et al. "HYBRID CTC/ATTENTION ARCHITECTURE FOR END-TO-END SPEECH RECOGNITION," - but extended to efficiently compute the probablities of multiple labels + but extended to efficiently compute the probabilities of multiple labels simultaneously """ diff --git a/paddlespeech/s2t/models/ds2/deepspeech2.py b/paddlespeech/s2t/models/ds2/deepspeech2.py index a478ba82..4a4d67ce 100644 --- a/paddlespeech/s2t/models/ds2/deepspeech2.py +++ b/paddlespeech/s2t/models/ds2/deepspeech2.py @@ -151,13 +151,13 @@ class DeepSpeech2Model(nn.Layer): """Compute Model loss Args: - audio (Tenosr): [B, T, D] + audio (Tensors): [B, T, D] audio_len (Tensor): [B] text (Tensor): [B, U] text_len (Tensor): [B] Returns: - loss (Tenosr): [1] + loss (Tensor): [1] """ eouts, eouts_len = self.encoder(audio, audio_len) loss = self.decoder(eouts, eouts_len, text, text_len) diff --git a/paddlespeech/s2t/models/ds2_online/deepspeech2.py b/paddlespeech/s2t/models/ds2_online/deepspeech2.py index 7d463755..5e4981c0 100644 --- a/paddlespeech/s2t/models/ds2_online/deepspeech2.py +++ b/paddlespeech/s2t/models/ds2_online/deepspeech2.py @@ -279,13 +279,13 @@ class DeepSpeech2ModelOnline(nn.Layer): """Compute Model loss Args: - audio (Tenosr): [B, T, D] + audio (Tensor): [B, T, D] audio_len (Tensor): [B] text (Tensor): [B, U] text_len (Tensor): [B] Returns: - loss (Tenosr): [1] + loss (Tensor): [1] """ eouts, eouts_len, final_state_h_box, final_state_c_box = self.encoder( audio, audio_len, None, None) diff --git a/paddlespeech/s2t/models/u2/u2.py b/paddlespeech/s2t/models/u2/u2.py index d920a200..ff4012e8 100644 --- a/paddlespeech/s2t/models/u2/u2.py +++ b/paddlespeech/s2t/models/u2/u2.py @@ -680,8 +680,8 @@ class U2BaseModel(ASRInterface, nn.Layer): """u2 decoding. Args: - feats (Tenosr): audio features, (B, T, D) - feats_lengths (Tenosr): (B) + feats (Tensor): audio features, (B, T, D) + feats_lengths (Tensor): (B) text_feature (TextFeaturizer): text feature object. decoding_method (str): decoding mode, e.g. 'attention', 'ctc_greedy_search', diff --git a/paddlespeech/s2t/models/u2_st/u2_st.py b/paddlespeech/s2t/models/u2_st/u2_st.py index 2aa0b347..79ca423f 100644 --- a/paddlespeech/s2t/models/u2_st/u2_st.py +++ b/paddlespeech/s2t/models/u2_st/u2_st.py @@ -478,8 +478,8 @@ class U2STBaseModel(nn.Layer): """u2 decoding. Args: - feats (Tenosr): audio features, (B, T, D) - feats_lengths (Tenosr): (B) + feats (Tensor): audio features, (B, T, D) + feats_lengths (Tensor): (B) text_feature (TextFeaturizer): text feature object. decoding_method (str): decoding mode, e.g. 'fullsentence', diff --git a/paddlespeech/s2t/modules/ctc.py b/paddlespeech/s2t/modules/ctc.py index 6e965579..1f983807 100644 --- a/paddlespeech/s2t/modules/ctc.py +++ b/paddlespeech/s2t/modules/ctc.py @@ -81,10 +81,10 @@ class CTCDecoderBase(nn.Layer): Args: hs_pad (Tensor): batch of padded hidden state sequences (B, Tmax, D) hlens (Tensor): batch of lengths of hidden state sequences (B) - ys_pad (Tenosr): batch of padded character id sequence tensor (B, Lmax) + ys_pad (Tensor): batch of padded character id sequence tensor (B, Lmax) ys_lens (Tensor): batch of lengths of character sequence (B) Returns: - loss (Tenosr): ctc loss value, scalar. + loss (Tensor): ctc loss value, scalar. """ logits = self.ctc_lo(self.dropout(hs_pad)) loss = self.criterion(logits, ys_pad, hlens, ys_lens) @@ -252,8 +252,8 @@ class CTCDecoder(CTCDecoderBase): """ctc decoding with probs. Args: - probs (Tenosr): activation after softmax - logits_lens (Tenosr): audio output lens + probs (Tensor): activation after softmax + logits_lens (Tensor): audio output lens vocab_list ([type]): [description] decoding_method ([type]): [description] lang_model_path ([type]): [description] diff --git a/paddlespeech/s2t/modules/mask.py b/paddlespeech/s2t/modules/mask.py index d6b63761..1f66c015 100644 --- a/paddlespeech/s2t/modules/mask.py +++ b/paddlespeech/s2t/modules/mask.py @@ -54,7 +54,7 @@ def make_pad_mask(lengths: paddle.Tensor) -> paddle.Tensor: [0, 0, 0, 1, 1], [0, 0, 1, 1, 1]] """ - # (TODO: Hui Zhang): jit not support Tenosr.dim() and Tensor.ndim + # (TODO: Hui Zhang): jit not support Tensor.dim() and Tensor.ndim # assert lengths.dim() == 1 batch_size = int(lengths.shape[0]) max_len = int(lengths.max()) diff --git a/paddlespeech/s2t/utils/dynamic_import.py b/paddlespeech/s2t/utils/dynamic_import.py index 50bd73a6..bd738edf 100644 --- a/paddlespeech/s2t/utils/dynamic_import.py +++ b/paddlespeech/s2t/utils/dynamic_import.py @@ -57,7 +57,7 @@ def filter_valid_args(args: Dict[Text, Any], valid_keys: List[Text]): return new_args -def filter_out_tenosr(args: Dict[Text, Any]): +def filter_out_tensor(args: Dict[Text, Any]): return {key: val for key, val in args.items() if not has_tensor(val)} @@ -65,5 +65,5 @@ def instance_class(module_class, args: Dict[Text, Any]): valid_keys = inspect.signature(module_class).parameters.keys() new_args = filter_valid_args(args, valid_keys) logger.info( - f"Instance: {module_class.__name__} {filter_out_tenosr(new_args)}.") + f"Instance: {module_class.__name__} {filter_out_tensor(new_args)}.") return module_class(**new_args)