Fix typos (#4024)

* Fix * Fix
9 months ago · 50ef94b68c
parent c2dc4dae2d
commit 50ef94b68c
34 changed files with 51 additions and 52 deletions
--- a/audio/paddleaudio/compliance/kaldi.py
+++ b/audio/paddleaudio/compliance/kaldi.py
@ -233,7 +233,7 @@ def spectrogram(waveform: Tensor,
        round_to_power_of_two (bool, optional): If True, round window size to power of two by zero-padding input
            to FFT. Defaults to True.
        sr (int, optional): Sample rate of input waveform. Defaults to 16000.
-        snip_edges (bool, optional): Drop samples in the end of waveform that cann't fit a signal frame when it
+        snip_edges (bool, optional): Drop samples in the end of waveform that can't fit a signal frame when it
            is set True. Otherwise performs reflect padding to the end of waveform. Defaults to True.
        subtract_mean (bool, optional): Whether to subtract mean of feature files. Defaults to False.
        window_type (str, optional): Choose type of window for FFT computation. Defaults to "povey".
@ -443,7 +443,7 @@ def fbank(waveform: Tensor,
        round_to_power_of_two (bool, optional): If True, round window size to power of two by zero-padding input
            to FFT. Defaults to True.
        sr (int, optional): Sample rate of input waveform. Defaults to 16000.
-        snip_edges (bool, optional): Drop samples in the end of waveform that cann't fit a signal frame when it
+        snip_edges (bool, optional): Drop samples in the end of waveform that can't fit a signal frame when it
            is set True. Otherwise performs reflect padding to the end of waveform. Defaults to True.
        subtract_mean (bool, optional): Whether to subtract mean of feature files. Defaults to False.
        use_energy (bool, optional): Add an dimension with energy of spectrogram to the output. Defaults to False.
@ -566,7 +566,7 @@ def mfcc(waveform: Tensor,
        round_to_power_of_two (bool, optional): If True, round window size to power of two by zero-padding input
            to FFT. Defaults to True.
        sr (int, optional): Sample rate of input waveform. Defaults to 16000.
-        snip_edges (bool, optional): Drop samples in the end of waveform that cann't fit a signal frame when it
+        snip_edges (bool, optional): Drop samples in the end of waveform that can't fit a signal frame when it
            is set True. Otherwise performs reflect padding to the end of waveform. Defaults to True.
        subtract_mean (bool, optional): Whether to subtract mean of feature files. Defaults to False.
        use_energy (bool, optional): Add an dimension with energy of spectrogram to the output. Defaults to False.
--- a/audio/paddleaudio/compliance/librosa.py
+++ b/audio/paddleaudio/compliance/librosa.py
@ -527,7 +527,7 @@ def melspectrogram(x: np.ndarray,
    if fmax is None:
        fmax = sr // 2
    if fmin < 0 or fmin >= fmax:
-        raise ParameterError('fmin and fmax must statisfy 0<fmin<fmax')
+        raise ParameterError('fmin and fmax must satisfy 0<fmin<fmax')

    s = stft(
        x,
--- a/examples/aishell/asr3/cmd.sh
+++ b/examples/aishell/asr3/cmd.sh
@ -54,8 +54,8 @@ elif [ "${cmd_backend}" = sge ]; then
 # "sbatch" (Slurm)
 elif [ "${cmd_backend}" = slurm ]; then
    # The default setting is written in conf/slurm.conf.
-    # You must change "-p cpu" and "-p gpu" for the "partion" for your environment.
-    # To know the "partion" names, type "sinfo".
+    # You must change "-p cpu" and "-p gpu" for the "partition" for your environment.
+    # To know the "partition" names, type "sinfo".
    # You can use "--gpu * " by default for slurm and it is interpreted as "--gres gpu:*"
    # The devices are allocated exclusively using "${CUDA_VISIBLE_DEVICES}".

--- a/examples/librispeech/asr1/cmd.sh
+++ b/examples/librispeech/asr1/cmd.sh
@ -54,8 +54,8 @@ elif [ "${cmd_backend}" = sge ]; then
 # "sbatch" (Slurm)
 elif [ "${cmd_backend}" = slurm ]; then
    # The default setting is written in conf/slurm.conf.
-    # You must change "-p cpu" and "-p gpu" for the "partion" for your environment.
-    # To know the "partion" names, type "sinfo".
+    # You must change "-p cpu" and "-p gpu" for the "partition" for your environment.
+    # To know the "partition" names, type "sinfo".
    # You can use "--gpu * " by default for slurm and it is interpreted as "--gres gpu:*"
    # The devices are allocated exclusively using "${CUDA_VISIBLE_DEVICES}".

--- a/examples/librispeech/asr2/cmd.sh
+++ b/examples/librispeech/asr2/cmd.sh
@ -54,8 +54,8 @@ elif [ "${cmd_backend}" = sge ]; then
 # "sbatch" (Slurm)
 elif [ "${cmd_backend}" = slurm ]; then
    # The default setting is written in conf/slurm.conf.
-    # You must change "-p cpu" and "-p gpu" for the "partion" for your environment.
-    # To know the "partion" names, type "sinfo".
+    # You must change "-p cpu" and "-p gpu" for the "partition" for your environment.
+    # To know the "partition" names, type "sinfo".
    # You can use "--gpu * " by default for slurm and it is interpreted as "--gres gpu:*"
    # The devices are allocated exclusively using "${CUDA_VISIBLE_DEVICES}".

--- a/examples/librispeech/asr3/cmd.sh
+++ b/examples/librispeech/asr3/cmd.sh
@ -54,8 +54,8 @@ elif [ "${cmd_backend}" = sge ]; then
 # "sbatch" (Slurm)
 elif [ "${cmd_backend}" = slurm ]; then
    # The default setting is written in conf/slurm.conf.
-    # You must change "-p cpu" and "-p gpu" for the "partion" for your environment.
-    # To know the "partion" names, type "sinfo".
+    # You must change "-p cpu" and "-p gpu" for the "partition" for your environment.
+    # To know the "partition" names, type "sinfo".
    # You can use "--gpu * " by default for slurm and it is interpreted as "--gres gpu:*"
    # The devices are allocated exclusively using "${CUDA_VISIBLE_DEVICES}".

--- a/examples/librispeech/asr4/cmd.sh
+++ b/examples/librispeech/asr4/cmd.sh
@ -54,8 +54,8 @@ elif [ "${cmd_backend}" = sge ]; then
 # "sbatch" (Slurm)
 elif [ "${cmd_backend}" = slurm ]; then
    # The default setting is written in conf/slurm.conf.
-    # You must change "-p cpu" and "-p gpu" for the "partion" for your environment.
-    # To know the "partion" names, type "sinfo".
+    # You must change "-p cpu" and "-p gpu" for the "partition" for your environment.
+    # To know the "partition" names, type "sinfo".
    # You can use "--gpu * " by default for slurm and it is interpreted as "--gres gpu:*"
    # The devices are allocated exclusively using "${CUDA_VISIBLE_DEVICES}".

--- a/examples/librispeech/asr5/cmd.sh
+++ b/examples/librispeech/asr5/cmd.sh
@ -54,8 +54,8 @@ elif [ "${cmd_backend}" = sge ]; then
 # "sbatch" (Slurm)
 elif [ "${cmd_backend}" = slurm ]; then
    # The default setting is written in conf/slurm.conf.
-    # You must change "-p cpu" and "-p gpu" for the "partion" for your environment.
-    # To know the "partion" names, type "sinfo".
+    # You must change "-p cpu" and "-p gpu" for the "partition" for your environment.
+    # To know the "partition" names, type "sinfo".
    # You can use "--gpu * " by default for slurm and it is interpreted as "--gres gpu:*"
    # The devices are allocated exclusively using "${CUDA_VISIBLE_DEVICES}".

--- a/examples/mustc/st1/cmd.sh
+++ b/examples/mustc/st1/cmd.sh
@ -54,8 +54,8 @@ elif [ "${cmd_backend}" = sge ]; then
 # "sbatch" (Slurm)
 elif [ "${cmd_backend}" = slurm ]; then
    # The default setting is written in conf/slurm.conf.
-    # You must change "-p cpu" and "-p gpu" for the "partion" for your environment.
-    # To know the "partion" names, type "sinfo".
+    # You must change "-p cpu" and "-p gpu" for the "partition" for your environment.
+    # To know the "partition" names, type "sinfo".
    # You can use "--gpu * " by default for slurm and it is interpreted as "--gres gpu:*"
    # The devices are allocated exclusively using "${CUDA_VISIBLE_DEVICES}".

--- a/examples/other/ngram_lm/s0/local/kenlm_score_test.py
+++ b/examples/other/ngram_lm/s0/local/kenlm_score_test.py
@ -97,7 +97,7 @@ def test_full_scores_words():
        if w not in model:
            print('"{0}" is an OOV'.format(w))
            oov.append(w)
-    # zh_giga.no_cna_cmn.prune01244.klm is chinese charactor LM 
+    # zh_giga.no_cna_cmn.prune01244.klm is chinese character LM 
    assert oov == ["盘点", "不怕", "网站", "❗", "️", "海淘", "向来", "便宜", "保真",
                   "！"], 'error oov'

--- a/examples/ted_en_zh/st1/cmd.sh
+++ b/examples/ted_en_zh/st1/cmd.sh
@ -54,8 +54,8 @@ elif [ "${cmd_backend}" = sge ]; then
 # "sbatch" (Slurm)
 elif [ "${cmd_backend}" = slurm ]; then
    # The default setting is written in conf/slurm.conf.
-    # You must change "-p cpu" and "-p gpu" for the "partion" for your environment.
-    # To know the "partion" names, type "sinfo".
+    # You must change "-p cpu" and "-p gpu" for the "partition" for your environment.
+    # To know the "partition" names, type "sinfo".
    # You can use "--gpu * " by default for slurm and it is interpreted as "--gres gpu:*"
    # The devices are allocated exclusively using "${CUDA_VISIBLE_DEVICES}".

--- a/paddlespeech/dataset/aidatatang_200zh/aidatatang_200zh.py
+++ b/paddlespeech/dataset/aidatatang_200zh/aidatatang_200zh.py
@ -62,7 +62,7 @@ def create_manifest(data_dir, manifest_path_prefix):
        if line == '':
            continue
        audio_id, text = line.split(' ', 1)
-        # remove withespace, charactor text
+        # remove withespace, character text
        text = ''.join(text.split())
        transcript_dict[audio_id] = text

--- a/paddlespeech/dataset/aishell/aishell.py
+++ b/paddlespeech/dataset/aishell/aishell.py
@ -65,7 +65,7 @@ def create_manifest(data_dir, manifest_path_prefix):
        if line == '':
            continue
        audio_id, text = line.split(' ', 1)
-        # remove withespace, charactor text
+        # remove withespace, character text
        text = ''.join(text.split())
        transcript_dict[audio_id] = text

@ -159,7 +159,7 @@ def check_dataset(data_dir):
        if line == '':
            continue
        audio_id, text = line.split(' ', 1)
-        # remove withespace, charactor text
+        # remove withespace, character text
        text = ''.join(text.split())
        transcript_dict[audio_id] = text

--- a/paddlespeech/s2t/utils/error_rate.py
+++ b/paddlespeech/s2t/utils/error_rate.py
@ -171,7 +171,7 @@ def wer(reference, hypothesis, ignore_case=False, delimiter=' '):


 def cer(reference, hypothesis, ignore_case=False, remove_space=False):
-    """Calculate charactor error rate (CER). CER compares reference text and
+    """Calculate character error rate (CER). CER compares reference text and
    hypothesis text in char-level. CER is defined as:

    .. math::
--- a/paddlespeech/t2s/frontend/zh_frontend.py
+++ b/paddlespeech/t2s/frontend/zh_frontend.py
@ -523,7 +523,7 @@ class Frontend():
        initials = []
        finals = []

-        # to charactor list
+        # to character list
        words = self._split_word_to_char(words[0])

        for pinyin, char in zip(pinyin_spec, words):
--- a/paddlespeech/t2s/utils/error_rate.py
+++ b/paddlespeech/t2s/utils/error_rate.py
@ -159,7 +159,7 @@ def wer(reference, hypothesis, ignore_case=False, delimiter=' '):


 def cer(reference, hypothesis, ignore_case=False, remove_space=False):
-    """Calculate charactor error rate (CER). CER compares reference text and
+    """Calculate character error rate (CER). CER compares reference text and
    hypothesis text in char-level. CER is defined as:
    .. math::
        CER = (Sc + Dc + Ic) / Nc
--- a/runtime/engine/asr/decoder/ctc_prefix_beam_search_decoder.cc
+++ b/runtime/engine/asr/decoder/ctc_prefix_beam_search_decoder.cc
@ -157,7 +157,7 @@ void CTCPrefixBeamSearch::AdvanceDecoding(
                    next_score.v_b = prefix_score.ViterbiScore() + prob;
                    next_score.times_b = prefix_score.Times();

-                    // Prefix not changed, copy the context from pefix
+                    // Prefix not changed, copy the context from prefix
                    if (context_graph_ && !next_score.has_context) {
                        next_score.CopyContext(prefix_score);
                        next_score.has_context = true;
@ -183,7 +183,7 @@ void CTCPrefixBeamSearch::AdvanceDecoding(
                        }
                    }

-                    // Prefix not changed, copy the context from pefix
+                    // Prefix not changed, copy the context from prefix
                    if (context_graph_ && !next_score1.has_context) {
                        next_score1.CopyContext(prefix_score);
                        next_score1.has_context = true;
--- a/runtime/engine/common/frontend/cmvn.cc
+++ b/runtime/engine/common/frontend/cmvn.cc
@ -72,7 +72,7 @@ bool CMVN::Read(std::vector<BaseFloat>* feats) {
        return false;
    }

-    // appply cmvn
+    // apply cmvn
    kaldi::Timer timer;
    Compute(feats);
    VLOG(1) << "CMVN::Read cost: " << timer.Elapsed() << " sec.";
--- a/runtime/engine/common/frontend/cmvn.h
+++ b/runtime/engine/common/frontend/cmvn.h
@ -29,7 +29,7 @@ class CMVN : public FrontendInterface {
    // the length of feats = feature_row * feature_dim,
    // the Matrix is squashed into Vector
    virtual bool Read(std::vector<kaldi::BaseFloat>* feats);
-    // the dim_ is the feautre dim.
+    // the dim_ is the feature dim.
    virtual size_t Dim() const { return dim_; }
    virtual void SetFinished() { base_extractor_->SetFinished(); }
    virtual bool IsFinished() const { return base_extractor_->IsFinished(); }
--- a/runtime/engine/common/frontend/db_norm.h
+++ b/runtime/engine/common/frontend/db_norm.h
@ -47,7 +47,7 @@ class DecibelNormalizer : public FrontendInterface {
        std::unique_ptr<FrontendInterface> base_extractor);
    virtual void Accept(const kaldi::VectorBase<kaldi::BaseFloat>& waves);
    virtual bool Read(kaldi::Vector<kaldi::BaseFloat>* waves);
-    // noramlize audio, the dim is 1.
+    // normalize audio, the dim is 1.
    virtual size_t Dim() const { return dim_; }
    virtual void SetFinished() { base_extractor_->SetFinished(); }
    virtual bool IsFinished() const { return base_extractor_->IsFinished(); }
--- a/runtime/engine/common/matrix/kaldi-matrix.cc
+++ b/runtime/engine/common/matrix/kaldi-matrix.cc
@ -244,8 +244,8 @@ void MatrixBase<Real>::SymAddMat2(const Real alpha,
  /// function will produce NaN in the output. This is a bug in the
  /// ATLAS library. To overcome this, the AddMatMat function, which calls
  /// cblas_Xgemm(...) rather than cblas_Xsyrk(...), is used in this special
-  /// sitation.
-  /// Wei Shi: Note this bug is observerd for single precision matrix
+  /// situation.
+  /// Wei Shi: Note this bug is observed for single precision matrix
  /// on a 64-bit machine
 #ifdef HAVE_ATLAS
  if (transA == kTrans && num_rows_ >= 56) {
@ -683,7 +683,7 @@ empty.
  if (V_in == NULL) tmpV.Resize(1, this->num_cols_);  // work-space if V_in
 empty.

-  /// Impementation notes:
+  /// Implementation notes:
  /// Lapack works in column-order, therefore the dimensions of *this are
  /// swapped as well as the U and V matrices.

@ -2378,7 +2378,7 @@ bool ReadHtk(std::istream &is, Matrix<Real> *M_ptr, HtkHeader *header_ptr)
  Matrix<Real> &M = *M_ptr;
  HtkHeader htk_hdr;

-  // TODO(arnab): this fails if the HTK file has CRC cheksum or is compressed.
+  // TODO(arnab): this fails if the HTK file has CRC checksum or is compressed.
  is.read((char*)&htk_hdr, sizeof(htk_hdr));  // we're being really POSIX here!
  if (is.fail()) {
    KALDI_WARN << "Could not read header from HTK feature file ";
--- a/runtime/engine/common/matrix/kaldi-vector.cc
+++ b/runtime/engine/common/matrix/kaldi-vector.cc
@ -235,7 +235,7 @@ void VectorBase<Real>::CopyRowsFromMat(const MatrixBase<Real> &mat) {
        memcpy(inc_data, mat.Data(), cols * rows * sizeof(Real));
    } else {
        for (MatrixIndexT i = 0; i < rows; i++) {
-            // copy the data to the propper position
+            // copy the data to the proper position
            memcpy(inc_data, mat.RowData(i), cols * sizeof(Real));
            // set new copy position
            inc_data += cols;
--- a/runtime/engine/common/utils/file_utils.cc
+++ b/runtime/engine/common/utils/file_utils.cc
@ -44,7 +44,7 @@ std::string ReadFile2String(const std::string& path) {
 }

 bool FileExists(const std::string& strFilename) { 
-    // this funciton if from:
+    // this function if from:
    // https://github.com/kaldi-asr/kaldi/blob/master/src/fstext/deterministic-fst-test.cc
    struct stat stFileInfo; 
    bool blnReturn; 
--- a/runtime/engine/kaldi/lat/kaldi-lattice.cc
+++ b/runtime/engine/kaldi/lat/kaldi-lattice.cc
@ -407,7 +407,7 @@ bool WriteLattice(std::ostream &os, bool binary, const Lattice &t) {
    if (os.fail())
      KALDI_WARN << "Stream failure detected.";
    // Write another newline as a terminating character.  The read routine will
-    // detect this [this is a Kaldi mechanism, not somethig in the original
+    // detect this [this is a Kaldi mechanism, not something in the original
    // OpenFst code].
    os << '\n';
    return os.good();
--- a/runtime/examples/README.md
+++ b/runtime/examples/README.md
@ -34,7 +34,7 @@ bash run.sh --stop_stage 4

 ## Display Model with [Netron](https://github.com/lutzroeder/netron)  

-If you have a model, we can using this commnd to show model graph.
+If you have a model, we can using this commend to show model graph.

 For example:
 ```
--- a/runtime/examples/audio_classification/README.md
+++ b/runtime/examples/audio_classification/README.md
@ -74,7 +74,7 @@ includes/
 #### set path
 push resource into android phone

-1. change resource path in conf to gloabal path, such as:
+1. change resource path in conf to global path, such as:

    [CONF]
    wav_normal=true
@ -92,9 +92,9 @@ push resource into android phone
    high_freq=14000
    dither=0.0
 2. adb push conf label_list scp test.wav /data/local/tmp/
-3. set reource path in android demo(android_demo/app/src/main/cpp/native-lib.cpp) to actual path, such as:
+3. set resource path in android demo(android_demo/app/src/main/cpp/native-lib.cpp) to actual path, such as:

 std::string conf_path = "/data/local/tmp/conf";
 std::string wav_path = "/data/local/tmp/test.wav";

-4. excecute android_demo in android studio
+4. execute android_demo in android studio
--- a/runtime/examples/text_lm/local/mmseg.py
+++ b/runtime/examples/text_lm/local/mmseg.py
@ -253,7 +253,6 @@ class Analysis:
                            # print(word3.length, word3.text)
                            if word3.length == -1:
                                chunk = Chunk(word1, word2)
-                                # print("Ture")
                            else:
                                chunk = Chunk(word1, word2, word3)
                            chunks.append(chunk)
--- a/runtime/patch/openfst/src/include/fst/flags.h
+++ b/runtime/patch/openfst/src/include/fst/flags.h
@ -181,8 +181,8 @@ template <typename T>
 class FlagRegisterer {
 public:
  FlagRegisterer(const string &name, const FlagDescription<T> &desc) {
-    auto registr = FlagRegister<T>::GetRegister();
-    registr->SetDescription(name, desc);
+    auto r = FlagRegister<T>::GetRegister();
+    r->SetDescription(name, desc);
  }

 private:
--- a/tests/test_tipc/conformer/scripts/aishell_tiny.py
+++ b/tests/test_tipc/conformer/scripts/aishell_tiny.py
@ -62,7 +62,7 @@ def create_manifest(data_dir, manifest_path_prefix):
        if line == '':
            continue
        audio_id, text = line.split(' ', 1)
-        # remove withespace, charactor text
+        # remove withespace, character text
        text = ''.join(text.split())
        transcript_dict[audio_id] = text

--- a/tests/unit/cli/aishell_test_prepare.py
+++ b/tests/unit/cli/aishell_test_prepare.py
@ -63,7 +63,7 @@ def create_manifest(data_dir, manifest_path_prefix):
        if line == '':
            continue
        audio_id, text = line.split(' ', 1)
-        # remove withespace, charactor text
+        # remove withespace, character text
        text = ''.join(text.split())
        transcript_dict[audio_id] = text

--- a/tests/unit/tts/test_snapshot.py
+++ b/tests/unit/tts/test_snapshot.py
@ -30,7 +30,7 @@ def _test_snapshot():
    # use a simplest iterable object as dataloader
    dataloader = count()

-    # hack the training proecss: training does nothing except increse iteration
+    # hack the training proecss: training does nothing except increase iteration
    updater = StandardUpdater(model, optimizer, dataloader=dataloader)
    updater.update_core = lambda x: None

--- a/tools/extras/install_liblbfgs.sh
+++ b/tools/extras/install_liblbfgs.sh
@ -17,7 +17,7 @@ cd liblbfgs-$VER
 ./configure --prefix=`pwd`
 make
 # due to the liblbfgs project directory structure, we have to use -i
-# but the erros are completely harmless
+# but the errors are completely harmless
 make -i install
 cd ..

--- a/utils/format_triplet_data.py
+++ b/utils/format_triplet_data.py
@ -44,7 +44,7 @@ add_arg('manifest_paths',   str,
 # bpe
 add_arg('spm_model_prefix', str, None,
     "spm model prefix, spm_model_%(bpe_mode)_%(count_threshold), only need when `unit_type` is spm")
-add_arg('output_path',  str, None, "filepath of formated manifest.", required=True)
+add_arg('output_path',  str, None, "filepath of formatted manifest.", required=True)
 # yapf: disable
 args = parser.parse_args()

--- a/utils/tokenizer.perl
+++ b/utils/tokenizer.perl
@ -79,7 +79,7 @@ if ($HELP)
        print "  -b     ... disable Perl buffering.\n";
        print "  -time  ... enable processing time calculation.\n";
        print "  -penn  ... use Penn treebank-like tokenization.\n";
-        print "  -protected FILE  ... specify file with patters to be protected in tokenisation.\n";
+        print "  -protected FILE  ... specify file with patterns to be protected in tokenisation.\n";
 	print "  -no-escape ... don't perform HTML escaping on apostrophy, quotes, etc.\n";
 	exit;
 }