Fix typos (#4021)

* Fix * Fix * Fix
9 months ago · c2dc4dae2d
parent de3851336f
commit c2dc4dae2d
55 changed files with 136 additions and 136 deletions
--- a/audio/paddleaudio/datasets/esc50.py
+++ b/audio/paddleaudio/datasets/esc50.py
@ -35,7 +35,7 @@ class ESC50(AudioClassificationDataset):
        http://dx.doi.org/10.1145/2733373.2806390
    """

-    archieves = [
+    archives = [
        {
            'url':
            'https://paddleaudio.bj.bcebos.com/datasets/ESC-50-master.zip',
@ -133,7 +133,7 @@ class ESC50(AudioClassificationDataset):
    def _get_data(self, mode: str, split: int) -> Tuple[List[str], List[int]]:
        if not os.path.isdir(os.path.join(DATA_HOME, self.audio_path)) or \
            not os.path.isfile(os.path.join(DATA_HOME, self.meta)):
-            download_and_decompress(self.archieves, DATA_HOME)
+            download_and_decompress(self.archives, DATA_HOME)

        meta_info = self._get_meta_info()

--- a/audio/paddleaudio/datasets/gtzan.py
+++ b/audio/paddleaudio/datasets/gtzan.py
@ -35,7 +35,7 @@ class GTZAN(AudioClassificationDataset):
        https://ieeexplore.ieee.org/document/1021072/
    """

-    archieves = [
+    archives = [
        {
            'url': 'http://opihi.cs.uvic.ca/sound/genres.tar.gz',
            'md5': '5b3d6dddb579ab49814ab86dba69e7c7',
@ -85,7 +85,7 @@ class GTZAN(AudioClassificationDataset):
                  split) -> Tuple[List[str], List[int]]:
        if not os.path.isdir(os.path.join(DATA_HOME, self.audio_path)) or \
            not os.path.isfile(os.path.join(DATA_HOME, self.meta)):
-            download_and_decompress(self.archieves, DATA_HOME)
+            download_and_decompress(self.archives, DATA_HOME)

        meta_info = self._get_meta_info()
        random.seed(seed)  # shuffle samples to split data
--- a/audio/paddleaudio/datasets/rirs_noises.py
+++ b/audio/paddleaudio/datasets/rirs_noises.py
@ -30,7 +30,7 @@ __all__ = ['OpenRIRNoise']


 class OpenRIRNoise(Dataset):
-    archieves = [
+    archives = [
        {
            'url': 'http://www.openslr.org/resources/28/rirs_noises.zip',
            'md5': 'e6f48e257286e05de56413b4779d8ffb',
@ -76,7 +76,7 @@ class OpenRIRNoise(Dataset):
        print(f"rirs noises base path: {self.base_path}")
        if not os.path.isdir(self.base_path):
            download_and_decompress(
-                self.archieves, self.base_path, decompress=True)
+                self.archives, self.base_path, decompress=True)
        else:
            print(
                f"{self.base_path} already exists, we will not download and decompress again"
--- a/audio/paddleaudio/datasets/tess.py
+++ b/audio/paddleaudio/datasets/tess.py
@ -37,7 +37,7 @@ class TESS(AudioClassificationDataset):
        https://doi.org/10.5683/SP2/E8H2MF
    """

-    archieves = [
+    archives = [
        {
            'url':
            'https://bj.bcebos.com/paddleaudio/datasets/TESS_Toronto_emotional_speech_set.zip',
@ -93,7 +93,7 @@ class TESS(AudioClassificationDataset):
    def _get_data(self, mode, seed, n_folds,
                  split) -> Tuple[List[str], List[int]]:
        if not os.path.isdir(os.path.join(DATA_HOME, self.audio_path)):
-            download_and_decompress(self.archieves, DATA_HOME)
+            download_and_decompress(self.archives, DATA_HOME)

        wav_files = []
        for root, _, files in os.walk(os.path.join(DATA_HOME, self.audio_path)):
--- a/audio/paddleaudio/datasets/urban_sound.py
+++ b/audio/paddleaudio/datasets/urban_sound.py
@ -35,7 +35,7 @@ class UrbanSound8K(AudioClassificationDataset):
        https://dl.acm.org/doi/10.1145/2647868.2655045
    """

-    archieves = [
+    archives = [
        {
            'url':
            'https://zenodo.org/record/1203745/files/UrbanSound8K.tar.gz',
@ -81,7 +81,7 @@ class UrbanSound8K(AudioClassificationDataset):
    def _get_data(self, mode: str, split: int) -> Tuple[List[str], List[int]]:
        if not os.path.isdir(os.path.join(DATA_HOME, self.audio_path)) or \
            not os.path.isfile(os.path.join(DATA_HOME, self.meta)):
-            download_and_decompress(self.archieves, DATA_HOME)
+            download_and_decompress(self.archives, DATA_HOME)

        meta_info = self._get_meta_info()

--- a/audio/paddleaudio/datasets/voxceleb.py
+++ b/audio/paddleaudio/datasets/voxceleb.py
@ -34,7 +34,7 @@ __all__ = ['VoxCeleb']

 class VoxCeleb(Dataset):
    source_url = 'https://thor.robots.ox.ac.uk/~vgg/data/voxceleb/vox1a/'
-    archieves_audio_dev = [
+    archives_audio_dev = [
        {
            'url': source_url + 'vox1_dev_wav_partaa',
            'md5': 'e395d020928bc15670b570a21695ed96',
@ -52,13 +52,13 @@ class VoxCeleb(Dataset):
            'md5': '7bb1e9f70fddc7a678fa998ea8b3ba19',
        },
    ]
-    archieves_audio_test = [
+    archives_audio_test = [
        {
            'url': source_url + 'vox1_test_wav.zip',
            'md5': '185fdc63c3c739954633d50379a3d102',
        },
    ]
-    archieves_meta = [
+    archives_meta = [
        {
            'url':
            'https://www.robots.ox.ac.uk/~vgg/data/voxceleb/meta/veri_test2.txt',
@ -135,11 +135,11 @@ class VoxCeleb(Dataset):
        if not os.path.isdir(self.wav_path):
            print("start to download the voxceleb1 dataset")
            download_and_decompress(  # multi-zip parts concatenate to vox1_dev_wav.zip
-                self.archieves_audio_dev,
+                self.archives_audio_dev,
                self.base_path,
                decompress=False)
            download_and_decompress(  # download the vox1_test_wav.zip and unzip
-                self.archieves_audio_test,
+                self.archives_audio_test,
                self.base_path,
                decompress=True)

@ -157,7 +157,7 @@ class VoxCeleb(Dataset):
        if not os.path.isdir(self.meta_path):
            print("prepare the meta data")
            download_and_decompress(
-                self.archieves_meta, self.meta_path, decompress=False)
+                self.archives_meta, self.meta_path, decompress=False)

        # Data preparation.
        if not os.path.isdir(self.csv_path):
--- a/dataset/chime3_background/chime3_background.py
+++ b/dataset/chime3_background/chime3_background.py
@ -109,7 +109,7 @@ def create_manifest(data_dir, manifest_path):


 def prepare_chime3(url, md5sum, target_dir, manifest_path):
-    """Download, unpack and create summmary manifest file."""
+    """Download, unpack and create summary manifest file."""
    if not os.path.exists(os.path.join(target_dir, "CHiME3")):
        # download
        filepath = download(url, md5sum, target_dir,
--- a/dataset/timit/timit.py
+++ b/dataset/timit/timit.py
@ -210,7 +210,7 @@ def create_manifest(data_dir, manifest_path_prefix):


 def prepare_dataset(url, md5sum, target_dir, manifest_path):
-    """Download, unpack and create summmary manifest file.
+    """Download, unpack and create summary manifest file.
    """
    filepath = os.path.join(target_dir, "TIMIT.zip")
    if not os.path.exists(filepath):
--- a/demos/TTSCppFrontend/src/front/front_interface.cpp
+++ b/demos/TTSCppFrontend/src/front/front_interface.cpp
@ -115,27 +115,27 @@ int FrontEngineInterface::init() {

    // 生成词典（词到音素的映射）
    if (0 != GenDict(_word2phone_path, &word_phone_map)) {
-        LOG(ERROR) << "Genarate word2phone dict failed";
+        LOG(ERROR) << "Generate word2phone dict failed";
        return -1;
    }

    // 生成音素字典（音素到音素id的映射）
    if (0 != GenDict(_phone2id_path, &phone_id_map)) {
-        LOG(ERROR) << "Genarate phone2id dict failed";
+        LOG(ERROR) << "Generate phone2id dict failed";
        return -1;
    }

    // 生成音调字典（音调到音调id的映射）
    if (_separate_tone == "true") {
        if (0 != GenDict(_tone2id_path, &tone_id_map)) {
-            LOG(ERROR) << "Genarate tone2id dict failed";
+            LOG(ERROR) << "Generate tone2id dict failed";
            return -1;
        }
    }

    // 生成繁简字典（繁体到简体id的映射）
    if (0 != GenDict(_trand2simp_path, &trand_simp_map)) {
-        LOG(ERROR) << "Genarate trand2simp dict failed";
+        LOG(ERROR) << "Generate trand2simp dict failed";
        return -1;
    }

@ -263,7 +263,7 @@ int FrontEngineInterface::GetWordsIds(
                if (0 !=
                    GetInitialsFinals(word, &word_initials, &word_finals)) {
                    LOG(ERROR)
-                        << "Genarate the word_initials and word_finals of "
+                        << "Generate the word_initials and word_finals of "
                        << word << " failed";
                    return -1;
                }
@ -304,7 +304,7 @@ int FrontEngineInterface::GetWordsIds(

            // 音素到音素id
            if (0 != Phone2Phoneid(phone, phoneids, toneids)) {
-                LOG(ERROR) << "Genarate the phone id of " << word << " failed";
+                LOG(ERROR) << "Generate the phone id of " << word << " failed";
                return -1;
            }
        }
@ -916,11 +916,11 @@ int FrontEngineInterface::NeuralSandhi(const std::string &word,
        if (find(must_neural_tone_words.begin(),
                 must_neural_tone_words.end(),
                 word) != must_neural_tone_words.end() ||
-            (word_num >= 2 &&
-             find(must_neural_tone_words.begin(),
-                  must_neural_tone_words.end(),
-                  ppspeech::wstring2utf8string(word_wstr.substr(
-                      word_num - 2))) != must_neural_tone_words.end())) {
+            (word_num >= 2 && find(must_neural_tone_words.begin(),
+                                   must_neural_tone_words.end(),
+                                   ppspeech::wstring2utf8string(
+                                       word_wstr.substr(word_num - 2))) !=
+                                  must_neural_tone_words.end())) {
            (*finals).back() =
                (*finals).back().replace((*finals).back().length() - 1, 1, "5");
        }
--- a/demos/audio_searching/src/milvus_helpers.py
+++ b/demos/audio_searching/src/milvus_helpers.py
@ -77,13 +77,13 @@ class MilvusHelper:
                field1 = FieldSchema(
                    name="id",
                    dtype=DataType.INT64,
-                    descrition="int64",
+                    description="int64",
                    is_primary=True,
                    auto_id=True)
                field2 = FieldSchema(
                    name="embedding",
                    dtype=DataType.FLOAT_VECTOR,
-                    descrition="speaker embeddings",
+                    description="speaker embeddings",
                    dim=VECTOR_DIMENSION,
                    is_primary=False)
                schema = CollectionSchema(
--- a/demos/speech_web/speech_server/main.py
+++ b/demos/speech_web/speech_server/main.py
@ -225,7 +225,7 @@ async def websocket_endpoint_online(websocket: WebSocket):
        websocket (WebSocket): the websocket instance
    """

-    #1. the interface wait to accept the websocket protocal header
+    #1. the interface wait to accept the websocket protocol header
    #   and only we receive the header, it establish the connection with specific thread
    await websocket.accept()

@ -238,7 +238,7 @@ async def websocket_endpoint_online(websocket: WebSocket):
    connection_handler = None

    try:
-        #4. we do a loop to process the audio package by package according the protocal
+        #4. we do a loop to process the audio package by package according the protocol
        #   and only if the client send finished signal, we will break the loop
        while True:
            # careful here, changed the source code from starlette.websockets
--- a/demos/streaming_tts_serving_fastdeploy/streaming_tts_serving/1/model.py
+++ b/demos/streaming_tts_serving_fastdeploy/streaming_tts_serving/1/model.py
@ -75,7 +75,7 @@ class TritonPythonModel:
    def initialize(self, args):
        """`initialize` is called only once when the model is being loaded.
        Implementing `initialize` function is optional. This function allows
-        the model to intialize any state associated with this model.
+        the model to initialize any state associated with this model.
        Parameters
        ----------
        args : dict
--- a/examples/aishell3/tts3/conf/conformer.yaml
+++ b/examples/aishell3/tts3/conf/conformer.yaml
@ -42,7 +42,7 @@ model:
    duration_predictor_layers: 2      # number of layers of duration predictor
    duration_predictor_chans: 256     # number of channels of duration predictor
    duration_predictor_kernel_size: 3 # filter size of duration predictor
-    postnet_layers: 5                 # number of layers of postnset
+    postnet_layers: 5                 # number of layers of postnet
    postnet_filts: 5                  # filter size of conv layers in postnet
    postnet_chans: 256                # number of channels of conv layers in postnet
    encoder_normalize_before: True    # whether to perform layer normalization before the input
@ -66,14 +66,14 @@ model:
    transformer_dec_attn_dropout_rate: 0.2       # dropout rate for transformer decoder attention layer
    pitch_predictor_layers: 5                  # number of conv layers in pitch predictor
    pitch_predictor_chans: 256                 # number of channels of conv layers in pitch predictor
-    pitch_predictor_kernel_size: 5             # kernel size of conv leyers in pitch predictor
+    pitch_predictor_kernel_size: 5             # kernel size of conv layers in pitch predictor
    pitch_predictor_dropout: 0.5               # dropout rate in pitch predictor
    pitch_embed_kernel_size: 1                 # kernel size of conv embedding layer for pitch
    pitch_embed_dropout: 0.0                   # dropout rate after conv embedding layer for pitch
    stop_gradient_from_pitch_predictor: true   # whether to stop the gradient from pitch predictor to encoder
    energy_predictor_layers: 2                 # number of conv layers in energy predictor
    energy_predictor_chans: 256                # number of channels of conv layers in energy predictor
-    energy_predictor_kernel_size: 3            # kernel size of conv leyers in energy predictor
+    energy_predictor_kernel_size: 3            # kernel size of conv layers in energy predictor
    energy_predictor_dropout: 0.5              # dropout rate in energy predictor
    energy_embed_kernel_size: 1                # kernel size of conv embedding layer for energy
    energy_embed_dropout: 0.0                  # dropout rate after conv embedding layer for energy
--- a/examples/aishell3/tts3/conf/default.yaml
+++ b/examples/aishell3/tts3/conf/default.yaml
@ -42,7 +42,7 @@ model:
    duration_predictor_layers: 2      # number of layers of duration predictor
    duration_predictor_chans: 256     # number of channels of duration predictor
    duration_predictor_kernel_size: 3 # filter size of duration predictor
-    postnet_layers: 5                 # number of layers of postnset
+    postnet_layers: 5                 # number of layers of postnet
    postnet_filts: 5                  # filter size of conv layers in postnet
    postnet_chans: 256                # number of channels of conv layers in postnet
    use_scaled_pos_enc: True          # whether to use scaled positional encoding
@ -60,14 +60,14 @@ model:
    transformer_dec_attn_dropout_rate: 0.2       # dropout rate for transformer decoder attention layer
    pitch_predictor_layers: 5                  # number of conv layers in pitch predictor
    pitch_predictor_chans: 256                 # number of channels of conv layers in pitch predictor
-    pitch_predictor_kernel_size: 5             # kernel size of conv leyers in pitch predictor
+    pitch_predictor_kernel_size: 5             # kernel size of conv layers in pitch predictor
    pitch_predictor_dropout: 0.5               # dropout rate in pitch predictor
    pitch_embed_kernel_size: 1                 # kernel size of conv embedding layer for pitch
    pitch_embed_dropout: 0.0                   # dropout rate after conv embedding layer for pitch
    stop_gradient_from_pitch_predictor: True   # whether to stop the gradient from pitch predictor to encoder
    energy_predictor_layers: 2                 # number of conv layers in energy predictor
    energy_predictor_chans: 256                # number of channels of conv layers in energy predictor
-    energy_predictor_kernel_size: 3            # kernel size of conv leyers in energy predictor
+    energy_predictor_kernel_size: 3            # kernel size of conv layers in energy predictor
    energy_predictor_dropout: 0.5              # dropout rate in energy predictor
    energy_embed_kernel_size: 1                # kernel size of conv embedding layer for energy
    energy_embed_dropout: 0.0                  # dropout rate after conv embedding layer for energy
--- a/examples/aishell3/vc1/conf/default.yaml
+++ b/examples/aishell3/vc1/conf/default.yaml
@ -42,7 +42,7 @@ model:
    duration_predictor_layers: 2      # number of layers of duration predictor
    duration_predictor_chans: 256     # number of channels of duration predictor
    duration_predictor_kernel_size: 3 # filter size of duration predictor
-    postnet_layers: 5                 # number of layers of postnset
+    postnet_layers: 5                 # number of layers of postnet
    postnet_filts: 5                  # filter size of conv layers in postnet
    postnet_chans: 256                # number of channels of conv layers in postnet
    use_scaled_pos_enc: True          # whether to use scaled positional encoding
@ -60,14 +60,14 @@ model:
    transformer_dec_attn_dropout_rate: 0.2       # dropout rate for transformer decoder attention layer
    pitch_predictor_layers: 5                  # number of conv layers in pitch predictor
    pitch_predictor_chans: 256                 # number of channels of conv layers in pitch predictor
-    pitch_predictor_kernel_size: 5             # kernel size of conv leyers in pitch predictor
+    pitch_predictor_kernel_size: 5             # kernel size of conv layers in pitch predictor
    pitch_predictor_dropout: 0.5               # dropout rate in pitch predictor
    pitch_embed_kernel_size: 1                 # kernel size of conv embedding layer for pitch
    pitch_embed_dropout: 0.0                   # dropout rate after conv embedding layer for pitch
    stop_gradient_from_pitch_predictor: True   # whether to stop the gradient from pitch predictor to encoder
    energy_predictor_layers: 2                 # number of conv layers in energy predictor
    energy_predictor_chans: 256                # number of channels of conv layers in energy predictor
-    energy_predictor_kernel_size: 3            # kernel size of conv leyers in energy predictor
+    energy_predictor_kernel_size: 3            # kernel size of conv layers in energy predictor
    energy_predictor_dropout: 0.5              # dropout rate in energy predictor
    energy_embed_kernel_size: 1                # kernel size of conv embedding layer for energy
    energy_embed_dropout: 0.0                  # dropout rate after conv embedding layer for energy
--- a/examples/aishell3/vc2/conf/default.yaml
+++ b/examples/aishell3/vc2/conf/default.yaml
@ -42,7 +42,7 @@ model:
    duration_predictor_layers: 2      # number of layers of duration predictor
    duration_predictor_chans: 256     # number of channels of duration predictor
    duration_predictor_kernel_size: 3 # filter size of duration predictor
-    postnet_layers: 5                 # number of layers of postnset
+    postnet_layers: 5                 # number of layers of postnet
    postnet_filts: 5                  # filter size of conv layers in postnet
    postnet_chans: 256                # number of channels of conv layers in postnet
    use_scaled_pos_enc: True          # whether to use scaled positional encoding
@ -60,14 +60,14 @@ model:
    transformer_dec_attn_dropout_rate: 0.2       # dropout rate for transformer decoder attention layer
    pitch_predictor_layers: 5                  # number of conv layers in pitch predictor
    pitch_predictor_chans: 256                 # number of channels of conv layers in pitch predictor
-    pitch_predictor_kernel_size: 5             # kernel size of conv leyers in pitch predictor
+    pitch_predictor_kernel_size: 5             # kernel size of conv layers in pitch predictor
    pitch_predictor_dropout: 0.5               # dropout rate in pitch predictor
    pitch_embed_kernel_size: 1                 # kernel size of conv embedding layer for pitch
    pitch_embed_dropout: 0.0                   # dropout rate after conv embedding layer for pitch
    stop_gradient_from_pitch_predictor: True   # whether to stop the gradient from pitch predictor to encoder
    energy_predictor_layers: 2                 # number of conv layers in energy predictor
    energy_predictor_chans: 256                # number of channels of conv layers in energy predictor
-    energy_predictor_kernel_size: 3            # kernel size of conv leyers in energy predictor
+    energy_predictor_kernel_size: 3            # kernel size of conv layers in energy predictor
    energy_predictor_dropout: 0.5              # dropout rate in energy predictor
    energy_embed_kernel_size: 1                # kernel size of conv embedding layer for energy
    energy_embed_dropout: 0.0                  # dropout rate after conv embedding layer for energy
--- a/examples/aishell3/voc5/conf/default.yaml
+++ b/examples/aishell3/voc5/conf/default.yaml
@ -39,7 +39,7 @@ generator_params:
    use_additional_convs: True            # Whether to use additional conv layer in residual blocks.
    bias: True                            # Whether to use bias parameter in conv.
    nonlinear_activation: "leakyrelu"     # Nonlinear activation type.
-    nonlinear_activation_params:          # Nonlinear activation paramters.
+    nonlinear_activation_params:          # Nonlinear activation parameters.
        negative_slope: 0.1
    use_weight_norm: True                 # Whether to apply weight normalization.

@ -77,7 +77,7 @@ discriminator_params:
        max_downsample_channels: 1024      # Maximum number of channels in downsampling conv layers.
        bias: True                         # Whether to use bias parameter in conv layer."
        nonlinear_activation: "leakyrelu"  # Nonlinear activation.
-        nonlinear_activation_params:       # Nonlinear activation paramters.
+        nonlinear_activation_params:       # Nonlinear activation parameters.
            negative_slope: 0.1
        use_weight_norm: True              # Whether to apply weight normalization.
        use_spectral_norm: False           # Whether to apply spectral normalization.
--- a/examples/canton/tts3/conf/default.yaml
+++ b/examples/canton/tts3/conf/default.yaml
@ -45,7 +45,7 @@ model:
    duration_predictor_layers: 2      # number of layers of duration predictor
    duration_predictor_chans: 256     # number of channels of duration predictor
    duration_predictor_kernel_size: 3 # filter size of duration predictor
-    postnet_layers: 5                 # number of layers of postnset
+    postnet_layers: 5                 # number of layers of postnet
    postnet_filts: 5                  # filter size of conv layers in postnet
    postnet_chans: 256                # number of channels of conv layers in postnet
    use_scaled_pos_enc: True          # whether to use scaled positional encoding
@ -63,14 +63,14 @@ model:
    transformer_dec_attn_dropout_rate: 0.2       # dropout rate for transformer decoder attention layer
    pitch_predictor_layers: 5                  # number of conv layers in pitch predictor
    pitch_predictor_chans: 256                 # number of channels of conv layers in pitch predictor
-    pitch_predictor_kernel_size: 5             # kernel size of conv leyers in pitch predictor
+    pitch_predictor_kernel_size: 5             # kernel size of conv layers in pitch predictor
    pitch_predictor_dropout: 0.5               # dropout rate in pitch predictor
    pitch_embed_kernel_size: 1                 # kernel size of conv embedding layer for pitch
    pitch_embed_dropout: 0.0                   # dropout rate after conv embedding layer for pitch
    stop_gradient_from_pitch_predictor: True   # whether to stop the gradient from pitch predictor to encoder
    energy_predictor_layers: 2                 # number of conv layers in energy predictor
    energy_predictor_chans: 256                # number of channels of conv layers in energy predictor
-    energy_predictor_kernel_size: 3            # kernel size of conv leyers in energy predictor
+    energy_predictor_kernel_size: 3            # kernel size of conv layers in energy predictor
    energy_predictor_dropout: 0.5              # dropout rate in energy predictor
    energy_embed_kernel_size: 1                # kernel size of conv embedding layer for energy
    energy_embed_dropout: 0.0                  # dropout rate after conv embedding layer for energy
--- a/examples/csmsc/jets/conf/default.yaml
+++ b/examples/csmsc/jets/conf/default.yaml
@ -60,14 +60,14 @@ model:
        transformer_dec_attn_dropout_rate: 0.2       # dropout rate for transformer decoder attention layer
        pitch_predictor_layers: 5                    # number of conv layers in pitch predictor
        pitch_predictor_chans: 256                   # number of channels of conv layers in pitch predictor
-        pitch_predictor_kernel_size: 5               # kernel size of conv leyers in pitch predictor
+        pitch_predictor_kernel_size: 5               # kernel size of conv layers in pitch predictor
        pitch_predictor_dropout: 0.5                 # dropout rate in pitch predictor
        pitch_embed_kernel_size: 1                   # kernel size of conv embedding layer for pitch
        pitch_embed_dropout: 0.0                     # dropout rate after conv embedding layer for pitch
        stop_gradient_from_pitch_predictor: true     # whether to stop the gradient from pitch predictor to encoder
        energy_predictor_layers: 2                   # number of conv layers in energy predictor
        energy_predictor_chans: 256                  # number of channels of conv layers in energy predictor
-        energy_predictor_kernel_size: 3              # kernel size of conv leyers in energy predictor
+        energy_predictor_kernel_size: 3              # kernel size of conv layers in energy predictor
        energy_predictor_dropout: 0.5                # dropout rate in energy predictor
        energy_embed_kernel_size: 1                  # kernel size of conv embedding layer for energy
        energy_embed_dropout: 0.0                    # dropout rate after conv embedding layer for energy
--- a/examples/csmsc/tts3/conf/cnndecoder.yaml
+++ b/examples/csmsc/tts3/conf/cnndecoder.yaml
@ -43,7 +43,7 @@ model:
    duration_predictor_layers: 2      # number of layers of duration predictor
    duration_predictor_chans: 256     # number of channels of duration predictor
    duration_predictor_kernel_size: 3 # filter size of duration predictor
-    postnet_layers: 5                 # number of layers of postnset
+    postnet_layers: 5                 # number of layers of postnet
    postnet_filts: 5                  # filter size of conv layers in postnet
    postnet_chans: 256                # number of channels of conv layers in postnet
    use_scaled_pos_enc: True          # whether to use scaled positional encoding
@ -65,14 +65,14 @@ model:
    cnn_decoder_embedding_dim: 256
    pitch_predictor_layers: 5                  # number of conv layers in pitch predictor
    pitch_predictor_chans: 256                 # number of channels of conv layers in pitch predictor
-    pitch_predictor_kernel_size: 5             # kernel size of conv leyers in pitch predictor
+    pitch_predictor_kernel_size: 5             # kernel size of conv layers in pitch predictor
    pitch_predictor_dropout: 0.5               # dropout rate in pitch predictor
    pitch_embed_kernel_size: 1                 # kernel size of conv embedding layer for pitch
    pitch_embed_dropout: 0.0                   # dropout rate after conv embedding layer for pitch
    stop_gradient_from_pitch_predictor: True   # whether to stop the gradient from pitch predictor to encoder
    energy_predictor_layers: 2                 # number of conv layers in energy predictor
    energy_predictor_chans: 256                # number of channels of conv layers in energy predictor
-    energy_predictor_kernel_size: 3            # kernel size of conv leyers in energy predictor
+    energy_predictor_kernel_size: 3            # kernel size of conv layers in energy predictor
    energy_predictor_dropout: 0.5              # dropout rate in energy predictor
    energy_embed_kernel_size: 1                # kernel size of conv embedding layer for energy
    energy_embed_dropout: 0.0                  # dropout rate after conv embedding layer for energy
--- a/examples/csmsc/tts3/conf/conformer.yaml
+++ b/examples/csmsc/tts3/conf/conformer.yaml
@ -42,7 +42,7 @@ model:
    duration_predictor_layers: 2      # number of layers of duration predictor
    duration_predictor_chans: 256     # number of channels of duration predictor
    duration_predictor_kernel_size: 3 # filter size of duration predictor
-    postnet_layers: 5                 # number of layers of postnset
+    postnet_layers: 5                 # number of layers of postnet
    postnet_filts: 5                  # filter size of conv layers in postnet
    postnet_chans: 256                # number of channels of conv layers in postnet
    encoder_normalize_before: True    # whether to perform layer normalization before the input
@ -66,14 +66,14 @@ model:
    transformer_dec_attn_dropout_rate: 0.2       # dropout rate for transformer decoder attention layer
    pitch_predictor_layers: 5                  # number of conv layers in pitch predictor
    pitch_predictor_chans: 256                 # number of channels of conv layers in pitch predictor
-    pitch_predictor_kernel_size: 5             # kernel size of conv leyers in pitch predictor
+    pitch_predictor_kernel_size: 5             # kernel size of conv layers in pitch predictor
    pitch_predictor_dropout: 0.5               # dropout rate in pitch predictor
    pitch_embed_kernel_size: 1                 # kernel size of conv embedding layer for pitch
    pitch_embed_dropout: 0.0                   # dropout rate after conv embedding layer for pitch
    stop_gradient_from_pitch_predictor: True   # whether to stop the gradient from pitch predictor to encoder
    energy_predictor_layers: 2                 # number of conv layers in energy predictor
    energy_predictor_chans: 256                # number of channels of conv layers in energy predictor
-    energy_predictor_kernel_size: 3            # kernel size of conv leyers in energy predictor
+    energy_predictor_kernel_size: 3            # kernel size of conv layers in energy predictor
    energy_predictor_dropout: 0.5              # dropout rate in energy predictor
    energy_embed_kernel_size: 1                # kernel size of conv embedding layer for energy
    energy_embed_dropout: 0.0                  # dropout rate after conv embedding layer for energy
--- a/examples/csmsc/tts3/conf/default.yaml
+++ b/examples/csmsc/tts3/conf/default.yaml
@ -42,7 +42,7 @@ model:
    duration_predictor_layers: 2      # number of layers of duration predictor
    duration_predictor_chans: 256     # number of channels of duration predictor
    duration_predictor_kernel_size: 3 # filter size of duration predictor
-    postnet_layers: 5                 # number of layers of postnset
+    postnet_layers: 5                 # number of layers of postnet
    postnet_filts: 5                  # filter size of conv layers in postnet
    postnet_chans: 256                # number of channels of conv layers in postnet
    use_scaled_pos_enc: True          # whether to use scaled positional encoding
@ -60,14 +60,14 @@ model:
    transformer_dec_attn_dropout_rate: 0.2       # dropout rate for transformer decoder attention layer
    pitch_predictor_layers: 5                  # number of conv layers in pitch predictor
    pitch_predictor_chans: 256                 # number of channels of conv layers in pitch predictor
-    pitch_predictor_kernel_size: 5             # kernel size of conv leyers in pitch predictor
+    pitch_predictor_kernel_size: 5             # kernel size of conv layers in pitch predictor
    pitch_predictor_dropout: 0.5               # dropout rate in pitch predictor
    pitch_embed_kernel_size: 1                 # kernel size of conv embedding layer for pitch
    pitch_embed_dropout: 0.0                   # dropout rate after conv embedding layer for pitch
    stop_gradient_from_pitch_predictor: True   # whether to stop the gradient from pitch predictor to encoder
    energy_predictor_layers: 2                 # number of conv layers in energy predictor
    energy_predictor_chans: 256                # number of channels of conv layers in energy predictor
-    energy_predictor_kernel_size: 3            # kernel size of conv leyers in energy predictor
+    energy_predictor_kernel_size: 3            # kernel size of conv layers in energy predictor
    energy_predictor_dropout: 0.5              # dropout rate in energy predictor
    energy_embed_kernel_size: 1                # kernel size of conv embedding layer for energy
    energy_embed_dropout: 0.0                  # dropout rate after conv embedding layer for energy
--- a/examples/csmsc/voc5/conf/default.yaml
+++ b/examples/csmsc/voc5/conf/default.yaml
@ -38,7 +38,7 @@ generator_params:
    use_additional_convs: True            # Whether to use additional conv layer in residual blocks.
    bias: True                            # Whether to use bias parameter in conv.
    nonlinear_activation: "leakyrelu"     # Nonlinear activation type.
-    nonlinear_activation_params:          # Nonlinear activation paramters.
+    nonlinear_activation_params:          # Nonlinear activation parameters.
        negative_slope: 0.1
    use_weight_norm: True                 # Whether to apply weight normalization.

@ -76,7 +76,7 @@ discriminator_params:
        max_downsample_channels: 1024      # Maximum number of channels in downsampling conv layers.
        bias: True                         # Whether to use bias parameter in conv layer."
        nonlinear_activation: "leakyrelu"  # Nonlinear activation.
-        nonlinear_activation_params:       # Nonlinear activation paramters.
+        nonlinear_activation_params:       # Nonlinear activation parameters.
            negative_slope: 0.1
        use_weight_norm: True              # Whether to apply weight normalization.
        use_spectral_norm: False           # Whether to apply spectral normalization.
--- a/examples/csmsc/voc5/conf/finetune.yaml
+++ b/examples/csmsc/voc5/conf/finetune.yaml
@ -38,7 +38,7 @@ generator_params:
    use_additional_convs: True            # Whether to use additional conv layer in residual blocks.
    bias: True                            # Whether to use bias parameter in conv.
    nonlinear_activation: "leakyrelu"     # Nonlinear activation type.
-    nonlinear_activation_params:          # Nonlinear activation paramters.
+    nonlinear_activation_params:          # Nonlinear activation parameters.
        negative_slope: 0.1
    use_weight_norm: True                 # Whether to apply weight normalization.

@ -76,7 +76,7 @@ discriminator_params:
        max_downsample_channels: 1024      # Maximum number of channels in downsampling conv layers.
        bias: True                         # Whether to use bias parameter in conv layer."
        nonlinear_activation: "leakyrelu"  # Nonlinear activation.
-        nonlinear_activation_params:       # Nonlinear activation paramters.
+        nonlinear_activation_params:       # Nonlinear activation parameters.
            negative_slope: 0.1
        use_weight_norm: True              # Whether to apply weight normalization.
        use_spectral_norm: False           # Whether to apply spectral normalization.
--- a/examples/csmsc/voc5/conf/iSTFT.yaml
+++ b/examples/csmsc/voc5/conf/iSTFT.yaml
@ -42,7 +42,7 @@ generator_params:
    use_additional_convs: True            # Whether to use additional conv layer in residual blocks.
    bias: True                            # Whether to use bias parameter in conv.
    nonlinear_activation: "leakyrelu"     # Nonlinear activation type.
-    nonlinear_activation_params:          # Nonlinear activation paramters.
+    nonlinear_activation_params:          # Nonlinear activation parameters.
        negative_slope: 0.1
    use_weight_norm: True                 # Whether to apply weight normalization.

@ -83,7 +83,7 @@ discriminator_params:
        max_downsample_channels: 1024      # Maximum number of channels in downsampling conv layers.
        bias: True                         # Whether to use bias parameter in conv layer."
        nonlinear_activation: "leakyrelu"  # Nonlinear activation.
-        nonlinear_activation_params:       # Nonlinear activation paramters.
+        nonlinear_activation_params:       # Nonlinear activation parameters.
            negative_slope: 0.1
        use_weight_norm: True              # Whether to apply weight normalization.
        use_spectral_norm: False           # Whether to apply spectral normalization.
--- a/examples/ljspeech/tts1/conf/default.yaml
+++ b/examples/ljspeech/tts1/conf/default.yaml
@ -34,7 +34,7 @@ model:                     # keyword arguments for the selected model
    dunits: 1024           # number of decoder ff units
    positionwise_layer_type: conv1d  # type of position-wise layer
    positionwise_conv_kernel_size: 1 # kernel size of position wise conv layer
-    postnet_layers: 5                # number of layers of postnset
+    postnet_layers: 5                # number of layers of postnet
    postnet_filts: 5                 # filter size of conv layers in postnet
    postnet_chans: 256               # number of channels of conv layers in postnet
    use_scaled_pos_enc: True         # whether to use scaled positional encoding
--- a/examples/ljspeech/tts3/conf/default.yaml
+++ b/examples/ljspeech/tts3/conf/default.yaml
@ -42,7 +42,7 @@ model:
    duration_predictor_layers: 2      # number of layers of duration predictor
    duration_predictor_chans: 256     # number of channels of duration predictor
    duration_predictor_kernel_size: 3 # filter size of duration predictor
-    postnet_layers: 5                 # number of layers of postnset
+    postnet_layers: 5                 # number of layers of postnet
    postnet_filts: 5                  # filter size of conv layers in postnet
    postnet_chans: 256                # number of channels of conv layers in postnet
    use_scaled_pos_enc: True          # whether to use scaled positional encoding
@ -60,14 +60,14 @@ model:
    transformer_dec_attn_dropout_rate: 0.2       # dropout rate for transformer decoder attention layer
    pitch_predictor_layers: 5                  # number of conv layers in pitch predictor
    pitch_predictor_chans: 256                 # number of channels of conv layers in pitch predictor
-    pitch_predictor_kernel_size: 5             # kernel size of conv leyers in pitch predictor
+    pitch_predictor_kernel_size: 5             # kernel size of conv layers in pitch predictor
    pitch_predictor_dropout: 0.5               # dropout rate in pitch predictor
    pitch_embed_kernel_size: 1                 # kernel size of conv embedding layer for pitch
    pitch_embed_dropout: 0.0                   # dropout rate after conv embedding layer for pitch
    stop_gradient_from_pitch_predictor: True   # whether to stop the gradient from pitch predictor to encoder
    energy_predictor_layers: 2                 # number of conv layers in energy predictor
    energy_predictor_chans: 256                # number of channels of conv layers in energy predictor
-    energy_predictor_kernel_size: 3            # kernel size of conv leyers in energy predictor
+    energy_predictor_kernel_size: 3            # kernel size of conv layers in energy predictor
    energy_predictor_dropout: 0.5              # dropout rate in energy predictor
    energy_embed_kernel_size: 1                # kernel size of conv embedding layer for energy
    energy_embed_dropout: 0.0                  # dropout rate after conv embedding layer for energy
--- a/examples/ljspeech/voc5/conf/default.yaml
+++ b/examples/ljspeech/voc5/conf/default.yaml
@ -38,7 +38,7 @@ generator_params:
    use_additional_convs: True            # Whether to use additional conv layer in residual blocks.
    bias: True                            # Whether to use bias parameter in conv.
    nonlinear_activation: "leakyrelu"     # Nonlinear activation type.
-    nonlinear_activation_params:          # Nonlinear activation paramters.
+    nonlinear_activation_params:          # Nonlinear activation parameters.
        negative_slope: 0.1
    use_weight_norm: True                 # Whether to apply weight normalization.

@ -76,7 +76,7 @@ discriminator_params:
        max_downsample_channels: 1024      # Maximum number of channels in downsampling conv layers.
        bias: True                         # Whether to use bias parameter in conv layer."
        nonlinear_activation: "leakyrelu"  # Nonlinear activation.
-        nonlinear_activation_params:       # Nonlinear activation paramters.
+        nonlinear_activation_params:       # Nonlinear activation parameters.
            negative_slope: 0.1
        use_weight_norm: True              # Whether to apply weight normalization.
        use_spectral_norm: False           # Whether to apply spectral normalization.
--- a/examples/opencpop/svs1/conf/default.yaml
+++ b/examples/opencpop/svs1/conf/default.yaml
@ -68,14 +68,14 @@ model:
        duration_predictor_dropout_rate: 0.5          # dropout rate in energy predictor
        pitch_predictor_layers: 5                     # number of conv layers in pitch predictor
        pitch_predictor_chans: 256                    # number of channels of conv layers in pitch predictor
-        pitch_predictor_kernel_size: 5                # kernel size of conv leyers in pitch predictor
+        pitch_predictor_kernel_size: 5                # kernel size of conv layers in pitch predictor
        pitch_predictor_dropout: 0.5                  # dropout rate in pitch predictor
        pitch_embed_kernel_size: 1                    # kernel size of conv embedding layer for pitch
        pitch_embed_dropout: 0.0                      # dropout rate after conv embedding layer for pitch
        stop_gradient_from_pitch_predictor: True      # whether to stop the gradient from pitch predictor to encoder
        energy_predictor_layers: 2                    # number of conv layers in energy predictor
        energy_predictor_chans: 256                   # number of channels of conv layers in energy predictor
-        energy_predictor_kernel_size: 3               # kernel size of conv leyers in energy predictor
+        energy_predictor_kernel_size: 3               # kernel size of conv layers in energy predictor
        energy_predictor_dropout: 0.5                 # dropout rate in energy predictor
        energy_embed_kernel_size: 1                   # kernel size of conv embedding layer for energy
        energy_embed_dropout: 0.0                     # dropout rate after conv embedding layer for energy
--- a/examples/opencpop/voc5/conf/default.yaml
+++ b/examples/opencpop/voc5/conf/default.yaml
@ -38,7 +38,7 @@ generator_params:
    use_additional_convs: True            # Whether to use additional conv layer in residual blocks.
    bias: True                            # Whether to use bias parameter in conv.
    nonlinear_activation: "leakyrelu"     # Nonlinear activation type.
-    nonlinear_activation_params:          # Nonlinear activation paramters.
+    nonlinear_activation_params:          # Nonlinear activation parameters.
        negative_slope: 0.1
    use_weight_norm: True                 # Whether to apply weight normalization.

@ -76,7 +76,7 @@ discriminator_params:
        max_downsample_channels: 1024      # Maximum number of channels in downsampling conv layers.
        bias: True                         # Whether to use bias parameter in conv layer."
        nonlinear_activation: "leakyrelu"  # Nonlinear activation.
-        nonlinear_activation_params:       # Nonlinear activation paramters.
+        nonlinear_activation_params:       # Nonlinear activation parameters.
            negative_slope: 0.1
        use_weight_norm: True              # Whether to apply weight normalization.
        use_spectral_norm: False           # Whether to apply spectral normalization.
--- a/examples/opencpop/voc5/conf/finetune.yaml
+++ b/examples/opencpop/voc5/conf/finetune.yaml
@ -38,7 +38,7 @@ generator_params:
    use_additional_convs: True            # Whether to use additional conv layer in residual blocks.
    bias: True                            # Whether to use bias parameter in conv.
    nonlinear_activation: "leakyrelu"     # Nonlinear activation type.
-    nonlinear_activation_params:          # Nonlinear activation paramters.
+    nonlinear_activation_params:          # Nonlinear activation parameters.
        negative_slope: 0.1
    use_weight_norm: True                 # Whether to apply weight normalization.

@ -76,7 +76,7 @@ discriminator_params:
        max_downsample_channels: 1024      # Maximum number of channels in downsampling conv layers.
        bias: True                         # Whether to use bias parameter in conv layer."
        nonlinear_activation: "leakyrelu"  # Nonlinear activation.
-        nonlinear_activation_params:       # Nonlinear activation paramters.
+        nonlinear_activation_params:       # Nonlinear activation parameters.
            negative_slope: 0.1
        use_weight_norm: True              # Whether to apply weight normalization.
        use_spectral_norm: False           # Whether to apply spectral normalization.
--- a/examples/vctk/tts3/conf/default.yaml
+++ b/examples/vctk/tts3/conf/default.yaml
@ -42,7 +42,7 @@ model:
    duration_predictor_layers: 2      # number of layers of duration predictor
    duration_predictor_chans: 256     # number of channels of duration predictor
    duration_predictor_kernel_size: 3 # filter size of duration predictor
-    postnet_layers: 5                 # number of layers of postnset
+    postnet_layers: 5                 # number of layers of postnet
    postnet_filts: 5                  # filter size of conv layers in postnet
    postnet_chans: 256                # number of channels of conv layers in postnet
    use_scaled_pos_enc: True          # whether to use scaled positional encoding
@ -60,14 +60,14 @@ model:
    transformer_dec_attn_dropout_rate: 0.2       # dropout rate for transformer decoder attention layer
    pitch_predictor_layers: 5                  # number of conv layers in pitch predictor
    pitch_predictor_chans: 256                 # number of channels of conv layers in pitch predictor
-    pitch_predictor_kernel_size: 5             # kernel size of conv leyers in pitch predictor
+    pitch_predictor_kernel_size: 5             # kernel size of conv layers in pitch predictor
    pitch_predictor_dropout: 0.5               # dropout rate in pitch predictor
    pitch_embed_kernel_size: 1                 # kernel size of conv embedding layer for pitch
    pitch_embed_dropout: 0.0                   # dropout rate after conv embedding layer for pitch
    stop_gradient_from_pitch_predictor: True   # whether to stop the gradient from pitch predictor to encoder
    energy_predictor_layers: 2                 # number of conv layers in energy predictor
    energy_predictor_chans: 256                # number of channels of conv layers in energy predictor
-    energy_predictor_kernel_size: 3            # kernel size of conv leyers in energy predictor
+    energy_predictor_kernel_size: 3            # kernel size of conv layers in energy predictor
    energy_predictor_dropout: 0.5              # dropout rate in energy predictor
    energy_embed_kernel_size: 1                # kernel size of conv embedding layer for energy
    energy_embed_dropout: 0.0                  # dropout rate after conv embedding layer for energy
--- a/examples/vctk/voc5/conf/default.yaml
+++ b/examples/vctk/voc5/conf/default.yaml
@ -39,7 +39,7 @@ generator_params:
    use_additional_convs: True            # Whether to use additional conv layer in residual blocks.
    bias: True                            # Whether to use bias parameter in conv.
    nonlinear_activation: "leakyrelu"     # Nonlinear activation type.
-    nonlinear_activation_params:          # Nonlinear activation paramters.
+    nonlinear_activation_params:          # Nonlinear activation parameters.
        negative_slope: 0.1
    use_weight_norm: True                 # Whether to apply weight normalization.

@ -77,7 +77,7 @@ discriminator_params:
        max_downsample_channels: 1024      # Maximum number of channels in downsampling conv layers.
        bias: True                         # Whether to use bias parameter in conv layer."
        nonlinear_activation: "leakyrelu"  # Nonlinear activation.
-        nonlinear_activation_params:       # Nonlinear activation paramters.
+        nonlinear_activation_params:       # Nonlinear activation parameters.
            negative_slope: 0.1
        use_weight_norm: True              # Whether to apply weight normalization.
        use_spectral_norm: False           # Whether to apply spectral normalization.
--- a/examples/voxceleb/sv0/local/data_prepare.py
+++ b/examples/voxceleb/sv0/local/data_prepare.py
@ -32,8 +32,8 @@ def main(args, config):
    seed_everything(config.seed)

    # stage 1: generate the voxceleb csv file
-    # Note: this may occurs c++ execption, but the program will execute fine
-    # so we ignore the execption 
+    # Note: this may occurs c++ exception, but the program will execute fine
+    # so we ignore the exception 
    # we explicitly pass the vox2 base path to data prepare and generate the audio info
    logger.info("start to generate the voxceleb dataset info")
    train_dataset = VoxCeleb(
--- a/examples/zh_en_tts/tts3/conf/default.yaml
+++ b/examples/zh_en_tts/tts3/conf/default.yaml
@ -42,7 +42,7 @@ model:
    duration_predictor_layers: 2      # number of layers of duration predictor
    duration_predictor_chans: 256     # number of channels of duration predictor
    duration_predictor_kernel_size: 3 # filter size of duration predictor
-    postnet_layers: 5                 # number of layers of postnset
+    postnet_layers: 5                 # number of layers of postnet
    postnet_filts: 5                  # filter size of conv layers in postnet
    postnet_chans: 256                # number of channels of conv layers in postnet
    use_scaled_pos_enc: True          # whether to use scaled positional encoding
@ -60,14 +60,14 @@ model:
    transformer_dec_attn_dropout_rate: 0.2       # dropout rate for transformer decoder attention layer
    pitch_predictor_layers: 5                  # number of conv layers in pitch predictor
    pitch_predictor_chans: 256                 # number of channels of conv layers in pitch predictor
-    pitch_predictor_kernel_size: 5             # kernel size of conv leyers in pitch predictor
+    pitch_predictor_kernel_size: 5             # kernel size of conv layers in pitch predictor
    pitch_predictor_dropout: 0.5               # dropout rate in pitch predictor
    pitch_embed_kernel_size: 1                 # kernel size of conv embedding layer for pitch
    pitch_embed_dropout: 0.0                   # dropout rate after conv embedding layer for pitch
    stop_gradient_from_pitch_predictor: True   # whether to stop the gradient from pitch predictor to encoder
    energy_predictor_layers: 2                 # number of conv layers in energy predictor
    energy_predictor_chans: 256                # number of channels of conv layers in energy predictor
-    energy_predictor_kernel_size: 3            # kernel size of conv leyers in energy predictor
+    energy_predictor_kernel_size: 3            # kernel size of conv layers in energy predictor
    energy_predictor_dropout: 0.5              # dropout rate in energy predictor
    energy_embed_kernel_size: 1                # kernel size of conv embedding layer for energy
    energy_embed_dropout: 0.0                  # dropout rate after conv embedding layer for energy
--- a/paddlespeech/audio/utils/tensor_utils.py
+++ b/paddlespeech/audio/utils/tensor_utils.py
@ -79,7 +79,7 @@ def pad_sequence(sequences: List[paddle.Tensor],
    # assuming trailing dimensions and type of all the Tensors
    # in sequences are same and fetching those from sequences[0]
    max_size = paddle.shape(sequences[0])
-    # (TODO Hui Zhang): slice not supprot `end==start`
+    # (TODO Hui Zhang): slice not support `end==start`
    # trailing_dims = max_size[1:]
    trailing_dims = tuple(
        max_size[1:].numpy().tolist()) if sequences[0].ndim >= 2 else ()
@ -93,7 +93,7 @@ def pad_sequence(sequences: List[paddle.Tensor],
        length = tensor.shape[0]
        # use index notation to prevent duplicate references to the tensor
        if batch_first:
-            # TODO (Hui Zhang): set_value op not supprot `end==start`
+            # TODO (Hui Zhang): set_value op not support `end==start`
            # TODO (Hui Zhang): set_value op not support int16
            # TODO (Hui Zhang): set_varbase 2 rank not support [0,0,...]
            # out_tensor[i, :length, ...] = tensor
@ -102,7 +102,7 @@ def pad_sequence(sequences: List[paddle.Tensor],
            else:
                out_tensor[i, length] = tensor
        else:
-            # TODO (Hui Zhang): set_value op not supprot `end==start`
+            # TODO (Hui Zhang): set_value op not support `end==start`
            # out_tensor[:length, i, ...] = tensor
            if length != 0:
                out_tensor[:length, i] = tensor
--- a/paddlespeech/s2t/exps/hubert/model.py
+++ b/paddlespeech/s2t/exps/hubert/model.py
@ -362,7 +362,7 @@ class HubertASRTrainer(Trainer):
        scratch = None
        if self.args.resume:
            # just restore ckpt
-            # lr will resotre from optimizer ckpt
+            # lr will restore from optimizer ckpt
            resume_json_path = os.path.join(self.checkpoint_dir,
                                            self.args.resume + '.json')
            with open(resume_json_path, 'r', encoding='utf8') as f:
@ -370,20 +370,20 @@ class HubertASRTrainer(Trainer):
            self.iteration = 0
            self.epoch = resume_json["epoch"]

-            # resotre model from *.pdparams
+            # restore model from *.pdparams
            params_path = os.path.join(self.checkpoint_dir,
                                       "{}".format(self.epoch)) + '.pdparams'
            model_dict = paddle.load(params_path)
            self.model.set_state_dict(model_dict)

-            # resotre optimizer from *.pdopt
+            # restore optimizer from *.pdopt
            optimizer_path = os.path.join(self.checkpoint_dir,
                                          "{}".format(self.epoch)) + '.pdopt'
            optimizer_dict = paddle.load(optimizer_path)
            self.model_optimizer.set_state_dict(optimizer_dict['model'])
            self.hubert_optimizer.set_state_dict(optimizer_dict['hubert'])

-            # resotre lr_scheduler from *.pdlrs
+            # restore lr_scheduler from *.pdlrs
            scheduler_path = os.path.join(self.checkpoint_dir,
                                          "{}".format(self.epoch)) + '.pdlrs'
            if os.path.isfile(os.path.join(scheduler_path)):
--- a/paddlespeech/s2t/exps/wav2vec2/model.py
+++ b/paddlespeech/s2t/exps/wav2vec2/model.py
@ -361,7 +361,7 @@ class Wav2Vec2ASRTrainer(Trainer):
        scratch = None
        if self.args.resume:
            # just restore ckpt
-            # lr will resotre from optimizer ckpt
+            # lr will restore from optimizer ckpt
            resume_json_path = os.path.join(self.checkpoint_dir,
                                            self.args.resume + '.json')
            with open(resume_json_path, 'r', encoding='utf8') as f:
@ -369,20 +369,20 @@ class Wav2Vec2ASRTrainer(Trainer):
            self.iteration = 0
            self.epoch = resume_json["epoch"]

-            # resotre model from *.pdparams
+            # restore model from *.pdparams
            params_path = os.path.join(self.checkpoint_dir,
                                       "{}".format(self.epoch)) + '.pdparams'
            model_dict = paddle.load(params_path)
            self.model.set_state_dict(model_dict)

-            # resotre optimizer from *.pdopt
+            # restore optimizer from *.pdopt
            optimizer_path = os.path.join(self.checkpoint_dir,
                                          "{}".format(self.epoch)) + '.pdopt'
            optimizer_dict = paddle.load(optimizer_path)
            self.model_optimizer.set_state_dict(optimizer_dict['model'])
            self.wav2vec2_optimizer.set_state_dict(optimizer_dict['wav2vec2'])

-            # resotre lr_scheduler from *.pdlrs
+            # restore lr_scheduler from *.pdlrs
            scheduler_path = os.path.join(self.checkpoint_dir,
                                          "{}".format(self.epoch)) + '.pdlrs'
            if os.path.isfile(os.path.join(scheduler_path)):
--- a/paddlespeech/s2t/exps/wavlm/model.py
+++ b/paddlespeech/s2t/exps/wavlm/model.py
@ -361,7 +361,7 @@ class WavLMASRTrainer(Trainer):
        scratch = None
        if self.args.resume:
            # just restore ckpt
-            # lr will resotre from optimizer ckpt
+            # lr will restore from optimizer ckpt
            resume_json_path = os.path.join(self.checkpoint_dir,
                                            self.args.resume + '.json')
            with open(resume_json_path, 'r', encoding='utf8') as f:
@ -369,20 +369,20 @@ class WavLMASRTrainer(Trainer):
            self.iteration = 0
            self.epoch = resume_json["epoch"]

-            # resotre model from *.pdparams
+            # restore model from *.pdparams
            params_path = os.path.join(self.checkpoint_dir,
                                       "{}".format(self.epoch)) + '.pdparams'
            model_dict = paddle.load(params_path)
            self.model.set_state_dict(model_dict)

-            # resotre optimizer from *.pdopt
+            # restore optimizer from *.pdopt
            optimizer_path = os.path.join(self.checkpoint_dir,
                                          "{}".format(self.epoch)) + '.pdopt'
            optimizer_dict = paddle.load(optimizer_path)
            self.model_optimizer.set_state_dict(optimizer_dict['model'])
            self.wavlm_optimizer.set_state_dict(optimizer_dict['wavlm'])

-            # resotre lr_scheduler from *.pdlrs
+            # restore lr_scheduler from *.pdlrs
            scheduler_path = os.path.join(self.checkpoint_dir,
                                          "{}".format(self.epoch)) + '.pdlrs'
            if os.path.isfile(os.path.join(scheduler_path)):
--- a/paddlespeech/s2t/training/trainer.py
+++ b/paddlespeech/s2t/training/trainer.py
@ -215,7 +215,7 @@ class Trainer():
            checkpoint_path=self.args.checkpoint_path)
        if infos:
            # just restore ckpt
-            # lr will resotre from optimizer ckpt
+            # lr will restore from optimizer ckpt
            self.iteration = infos["step"]
            self.epoch = infos["epoch"]

--- a/paddlespeech/s2t/utils/tensor_utils.py
+++ b/paddlespeech/s2t/utils/tensor_utils.py
@ -80,7 +80,7 @@ def pad_sequence(sequences: List[paddle.Tensor],
    # assuming trailing dimensions and type of all the Tensors
    # in sequences are same and fetching those from sequences[0]
    max_size = paddle.shape(sequences[0])
-    # (TODO Hui Zhang): slice not supprot `end==start`
+    # (TODO Hui Zhang): slice not support `end==start`
    # trailing_dims = max_size[1:]
    trailing_dims = tuple(
        max_size[1:].numpy().tolist()) if sequences[0].ndim >= 2 else ()
@ -98,7 +98,7 @@ def pad_sequence(sequences: List[paddle.Tensor],
            f"length {length}, out_tensor {out_tensor.shape}, tensor {tensor.shape}"
        )
        if batch_first:
-            # TODO (Hui Zhang): set_value op not supprot `end==start`
+            # TODO (Hui Zhang): set_value op not support `end==start`
            # TODO (Hui Zhang): set_value op not support int16
            # TODO (Hui Zhang): set_varbase 2 rank not support [0,0,...]
            # out_tensor[i, :length, ...] = tensor
@ -107,7 +107,7 @@ def pad_sequence(sequences: List[paddle.Tensor],
            else:
                out_tensor[i, length] = tensor
        else:
-            # TODO (Hui Zhang): set_value op not supprot `end==start`
+            # TODO (Hui Zhang): set_value op not support `end==start`
            # out_tensor[:length, i, ...] = tensor
            if length != 0:
                out_tensor[:length, i] = tensor
--- a/paddlespeech/server/utils/audio_handler.py
+++ b/paddlespeech/server/utils/audio_handler.py
@ -79,7 +79,7 @@ class ASRWsAudioHandler:
                 punc_server_ip=None,
                 punc_server_port=None):
        """PaddleSpeech Online ASR Server Client  audio handler
-           Online asr server use the websocket protocal
+           Online asr server use the websocket protocol
        Args:
            url (str, optional): the server ip. Defaults to None.
            port (int, optional): the server port. Defaults to None.
@ -144,10 +144,10 @@ class ASRWsAudioHandler:
            logger.error("No asr server, please input valid ip and port")
            return ""

-        # 1. send websocket handshake protocal
+        # 1. send websocket handshake protocol
        start_time = time.time()
        async with websockets.connect(self.url) as ws:
-            # 2. server has already received handshake protocal
+            # 2. server has already received handshake protocol
            # client start to send the command
            audio_info = json.dumps(
                {
@ -255,7 +255,7 @@ class ASRHttpHandler:
 class TTSWsHandler:
    def __init__(self, server="127.0.0.1", port=8092, play: bool=False):
        """PaddleSpeech Online TTS Server Client  audio handler
-           Online tts server use the websocket protocal
+           Online tts server use the websocket protocol
        Args:
            server (str, optional): the server ip. Defaults to "127.0.0.1".
            port (int, optional): the server port. Defaults to 8092.
@ -405,7 +405,7 @@ class TTSWsHandler:
 class TTSHttpHandler:
    def __init__(self, server="127.0.0.1", port=8092, play: bool=False):
        """PaddleSpeech Online TTS Server Client  audio handler
-           Online tts server use the websocket protocal
+           Online tts server use the websocket protocol
        Args:
            server (str, optional): the server ip. Defaults to "127.0.0.1".
            port (int, optional): the server port. Defaults to 8092.
--- a/paddlespeech/server/ws/asr_api.py
+++ b/paddlespeech/server/ws/asr_api.py
@ -31,7 +31,7 @@ async def websocket_endpoint(websocket: WebSocket):
        websocket (WebSocket): the websocket instance
    """

-    #1. the interface wait to accept the websocket protocal header
+    #1. the interface wait to accept the websocket protocol header
    #   and only we receive the header, it establish the connection with specific thread
    await websocket.accept()

@ -45,7 +45,7 @@ async def websocket_endpoint(websocket: WebSocket):
    connection_handler = None

    try:
-        #4. we do a loop to process the audio package by package according the protocal
+        #4. we do a loop to process the audio package by package according the protocol
        #   and only if the client send finished signal, we will break the loop
        while True:
            # careful here, changed the source code from starlette.websockets
--- a/paddlespeech/server/ws/tts_api.py
+++ b/paddlespeech/server/ws/tts_api.py
@ -32,7 +32,7 @@ async def websocket_endpoint(websocket: WebSocket):
        websocket (WebSocket): the websocket instance
    """

-    #1. the interface wait to accept the websocket protocal header
+    #1. the interface wait to accept the websocket protocol header
    #   and only we receive the header, it establish the connection with specific thread
    await websocket.accept()

--- a/paddlespeech/t2s/models/vits/residual_coupling.py
+++ b/paddlespeech/t2s/models/vits/residual_coupling.py
@ -76,7 +76,7 @@ class ResidualAffineCouplingBlock(nn.Layer):
            use_weight_norm (bool):
                Whether to use weight normalization in WaveNet.
            bias (bool):
-                Whether to use bias paramters in WaveNet.
+                Whether to use bias parameters in WaveNet.
            use_only_mean (bool):
                Whether to estimate only mean.

@ -169,7 +169,7 @@ class ResidualAffineCouplingLayer(nn.Layer):
            use_weight_norm (bool):
                Whether to use weight normalization in WaveNet.
            bias (bool):
-                Whether to use bias paramters in WaveNet.
+                Whether to use bias parameters in WaveNet.
            use_only_mean (bool):
                Whether to estimate only mean.

--- a/runtime/examples/text_lm/local/mmseg.py
+++ b/runtime/examples/text_lm/local/mmseg.py
@ -156,8 +156,8 @@ class Analysis:
        return self.text[self.pos]

    #判断该字符是否是中文字符（不包括中文标点）    
-    def isChineseChar(self, charater):
-        return 0x4e00 <= ord(charater) < 0x9fa6
+    def isChineseChar(self, character):
+        return 0x4e00 <= ord(character) < 0x9fa6

    #判断是否是ASCII码  
    def isASCIIChar(self, ch):
--- a/tests/unit/server/offline/test_server_client.sh
+++ b/tests/unit/server/offline/test_server_client.sh
@ -66,8 +66,8 @@ config_file=./conf/application.yaml
 server_ip=$(cat $config_file | grep "host" | awk -F " " '{print $2}')
 port=$(cat $config_file | grep "port" | awk '/port:/ {print $2}')

-echo "Sevice ip: $server_ip" | tee ./log/test_result.log
-echo "Sevice port: $port" | tee -a ./log/test_result.log
+echo "Service ip: $server_ip" | tee ./log/test_result.log
+echo "Service port: $port" | tee -a ./log/test_result.log

 # whether a process is listening on $port
 pid=`lsof -i :"$port"|grep -v "PID" | awk '{print $2}'`
@ -190,7 +190,7 @@ echo "**************************************************************************

 echo "All tests completed."  | tee -a ./log/test_result.log

-# sohw all the test results
+# show all the test results
 echo "***************** Here are all the test results ********************"
 cat ./log/test_result.log

--- a/tests/unit/server/online/tts/check_server/test.sh
+++ b/tests/unit/server/online/tts/check_server/test.sh
@ -76,8 +76,8 @@ config_file=./conf/application.yaml
 server_ip=$(cat $config_file | grep "host" | awk -F " " '{print $2}')
 port=$(cat $config_file | grep "port" | awk '/port:/ {print $2}')

-echo "Sevice ip: $server_ip" | tee $log/test_result.log
-echo "Sevice port: $port" | tee -a $log/test_result.log
+echo "Service ip: $server_ip" | tee $log/test_result.log
+echo "Service port: $port" | tee -a $log/test_result.log

 # whether a process is listening on $port
 pid=`lsof -i :"$port"|grep -v "PID" | awk '{print $2}'`
@ -307,7 +307,7 @@ echo "**************************************************************************
 echo "All tests completed."  | tee -a $log/test_result.log


-# sohw all the test results
+# show all the test results
 echo "***************** Here are all the test results ********************"
 cat $log/test_result.log

--- a/tools/extras/install_liblbfgs.sh
+++ b/tools/extras/install_liblbfgs.sh
@ -23,7 +23,7 @@ cd ..

 (
  [ ! -z "${LIBLBFGS}" ] && \
-    echo >&2 "LIBLBFGS variable is aleady defined. Undefining..." && \
+    echo >&2 "LIBLBFGS variable is already defined. Undefining..." && \
    unset LIBLBFGS

  [ -f ./env.sh ] && . ./env.sh
--- a/tools/extras/install_srilm.sh
+++ b/tools/extras/install_srilm.sh
@ -68,7 +68,7 @@ make || exit
 cd ..
 (
  [ ! -z "${SRILM}" ] && \
-    echo >&2 "SRILM variable is aleady defined. Undefining..." && \
+    echo >&2 "SRILM variable is already defined. Undefining..." && \
    unset SRILM

  [ -f ./env.sh ] && . ./env.sh
--- a/utils/fst/ctc_token_fst.py
+++ b/utils/fst/ctc_token_fst.py
@ -32,7 +32,7 @@ def main(args):
                # leaving `token`
                print('{} {} {} {}'.format(node, 2, '<eps>', '<eps>'))
            node += 1
-    # Fianl node
+    # Final node
    print('0')


--- a/utils/fst/make_tlg.sh
+++ b/utils/fst/make_tlg.sh
@ -21,7 +21,7 @@ cp -r $src_lang $tgt_lang
 # eps2disambig.pl: replace epsilons on the input side with the special disambiguation symbol #0. 
 # s2eps.pl: replaces <s> and </s> with <eps> (on both input and output sides), for the G.fst acceptor.
 # G.fst, the disambiguation symbol #0 only appears on the input side
-# do eps2disambig.pl and s2eps.pl maybe just for fallowing `fstrmepsilon`.
+# do eps2disambig.pl and s2eps.pl maybe just for following `fstrmepsilon`.
 cat $arpa_lm | \
   grep -v '<s> <s>' | \
   grep -v '</s> <s>' | \
--- a/utils/generate_infer_yaml.py
+++ b/utils/generate_infer_yaml.py
@ -3,7 +3,7 @@
 '''
    Merge training configs into a single inference config.
    The single inference config is for CLI, which only takes a single config to do inferencing.
-    The trainig configs includes: model config, preprocess config, decode config, vocab file and cmvn file.
+    The training configs includes: model config, preprocess config, decode config, vocab file and cmvn file.

    Process:
    # step 1: prepare dir
@ -11,7 +11,7 @@
    cp -r exp conf data release_dir
    cd release_dir 
 
-    # step 2: get "model.yaml" which conatains all configuration info.
+    # step 2: get "model.yaml" which contains all configuration info.
    # if does not contain preprocess.yaml file. e.g ds2:
    python generate_infer_yaml.py --cfg_pth conf/deepspeech2_online.yaml --dcd_pth conf/tuning/chunk_decode.yaml --vb_pth data/lang_char/vocab.txt --cmvn_pth data/mean_std.json --save_pth model.yaml --pre_pth null        
    # if contains preprocess.yaml file. e.g  u2:
--- a/utils/train_arpa_with_kenlm.sh
+++ b/utils/train_arpa_with_kenlm.sh
@ -37,7 +37,7 @@ fi
 # the text should be properly pre-processed, e.g:
 #   cleand, normalized and possibly word-segmented

-# get rid off irrelavent symbols
+# get rid off irrelevant symbols
 grep -v '<eps>' $symbol_table \
  | grep -v '#0' \
  | grep -v '<unk>' | grep -v '<UNK>' \
@ -51,7 +51,7 @@ grep -v '<eps>' $symbol_table \
 # 
 # TL;DR reason:
 # Unlike SRILM's -limit-vocab, kenlm's --limit_vocab_file option 
-# spcifies a *valid* set of vocabulary, whereas *valid but unseen* 
+# specifies a *valid* set of vocabulary, whereas *valid but unseen* 
 # words are discarded in final arpa.
 # So the trick is, 
 # we explicitly add kaldi's vocab(one word per line) to training text, 
--- a/utils/zh_tn.py
+++ b/utils/zh_tn.py
@ -1288,7 +1288,7 @@ def normalize_corpus(corpus,


 def char_token(s: Text) -> List[Text]:
-    """chinese charactor
+    """chinese character
    Args:
        s (Text): "我爱中国“