diff --git a/audio/paddleaudio/compliance/kaldi.py b/audio/paddleaudio/compliance/kaldi.py index a94ec4053..d2184ebe3 100644 --- a/audio/paddleaudio/compliance/kaldi.py +++ b/audio/paddleaudio/compliance/kaldi.py @@ -233,7 +233,7 @@ def spectrogram(waveform: Tensor, round_to_power_of_two (bool, optional): If True, round window size to power of two by zero-padding input to FFT. Defaults to True. sr (int, optional): Sample rate of input waveform. Defaults to 16000. - snip_edges (bool, optional): Drop samples in the end of waveform that cann't fit a signal frame when it + snip_edges (bool, optional): Drop samples in the end of waveform that can't fit a signal frame when it is set True. Otherwise performs reflect padding to the end of waveform. Defaults to True. subtract_mean (bool, optional): Whether to subtract mean of feature files. Defaults to False. window_type (str, optional): Choose type of window for FFT computation. Defaults to "povey". @@ -443,7 +443,7 @@ def fbank(waveform: Tensor, round_to_power_of_two (bool, optional): If True, round window size to power of two by zero-padding input to FFT. Defaults to True. sr (int, optional): Sample rate of input waveform. Defaults to 16000. - snip_edges (bool, optional): Drop samples in the end of waveform that cann't fit a signal frame when it + snip_edges (bool, optional): Drop samples in the end of waveform that can't fit a signal frame when it is set True. Otherwise performs reflect padding to the end of waveform. Defaults to True. subtract_mean (bool, optional): Whether to subtract mean of feature files. Defaults to False. use_energy (bool, optional): Add an dimension with energy of spectrogram to the output. Defaults to False. @@ -566,7 +566,7 @@ def mfcc(waveform: Tensor, round_to_power_of_two (bool, optional): If True, round window size to power of two by zero-padding input to FFT. Defaults to True. sr (int, optional): Sample rate of input waveform. Defaults to 16000. - snip_edges (bool, optional): Drop samples in the end of waveform that cann't fit a signal frame when it + snip_edges (bool, optional): Drop samples in the end of waveform that can't fit a signal frame when it is set True. Otherwise performs reflect padding to the end of waveform. Defaults to True. subtract_mean (bool, optional): Whether to subtract mean of feature files. Defaults to False. use_energy (bool, optional): Add an dimension with energy of spectrogram to the output. Defaults to False. diff --git a/audio/paddleaudio/compliance/librosa.py b/audio/paddleaudio/compliance/librosa.py index c24d6d497..d0cee642a 100644 --- a/audio/paddleaudio/compliance/librosa.py +++ b/audio/paddleaudio/compliance/librosa.py @@ -527,7 +527,7 @@ def melspectrogram(x: np.ndarray, if fmax is None: fmax = sr // 2 if fmin < 0 or fmin >= fmax: - raise ParameterError('fmin and fmax must statisfy 0 Tuple[List[str], List[int]]: if not os.path.isdir(os.path.join(DATA_HOME, self.audio_path)) or \ not os.path.isfile(os.path.join(DATA_HOME, self.meta)): - download_and_decompress(self.archieves, DATA_HOME) + download_and_decompress(self.archives, DATA_HOME) meta_info = self._get_meta_info() diff --git a/audio/paddleaudio/datasets/gtzan.py b/audio/paddleaudio/datasets/gtzan.py index a76e9208e..6146c4b98 100644 --- a/audio/paddleaudio/datasets/gtzan.py +++ b/audio/paddleaudio/datasets/gtzan.py @@ -35,7 +35,7 @@ class GTZAN(AudioClassificationDataset): https://ieeexplore.ieee.org/document/1021072/ """ - archieves = [ + archives = [ { 'url': 'http://opihi.cs.uvic.ca/sound/genres.tar.gz', 'md5': '5b3d6dddb579ab49814ab86dba69e7c7', @@ -85,7 +85,7 @@ class GTZAN(AudioClassificationDataset): split) -> Tuple[List[str], List[int]]: if not os.path.isdir(os.path.join(DATA_HOME, self.audio_path)) or \ not os.path.isfile(os.path.join(DATA_HOME, self.meta)): - download_and_decompress(self.archieves, DATA_HOME) + download_and_decompress(self.archives, DATA_HOME) meta_info = self._get_meta_info() random.seed(seed) # shuffle samples to split data diff --git a/audio/paddleaudio/datasets/rirs_noises.py b/audio/paddleaudio/datasets/rirs_noises.py index 74418daa2..e5d193537 100644 --- a/audio/paddleaudio/datasets/rirs_noises.py +++ b/audio/paddleaudio/datasets/rirs_noises.py @@ -30,7 +30,7 @@ __all__ = ['OpenRIRNoise'] class OpenRIRNoise(Dataset): - archieves = [ + archives = [ { 'url': 'http://www.openslr.org/resources/28/rirs_noises.zip', 'md5': 'e6f48e257286e05de56413b4779d8ffb', @@ -76,7 +76,7 @@ class OpenRIRNoise(Dataset): print(f"rirs noises base path: {self.base_path}") if not os.path.isdir(self.base_path): download_and_decompress( - self.archieves, self.base_path, decompress=True) + self.archives, self.base_path, decompress=True) else: print( f"{self.base_path} already exists, we will not download and decompress again" diff --git a/audio/paddleaudio/datasets/tess.py b/audio/paddleaudio/datasets/tess.py index e34eaea37..e311a8df6 100644 --- a/audio/paddleaudio/datasets/tess.py +++ b/audio/paddleaudio/datasets/tess.py @@ -37,7 +37,7 @@ class TESS(AudioClassificationDataset): https://doi.org/10.5683/SP2/E8H2MF """ - archieves = [ + archives = [ { 'url': 'https://bj.bcebos.com/paddleaudio/datasets/TESS_Toronto_emotional_speech_set.zip', @@ -93,7 +93,7 @@ class TESS(AudioClassificationDataset): def _get_data(self, mode, seed, n_folds, split) -> Tuple[List[str], List[int]]: if not os.path.isdir(os.path.join(DATA_HOME, self.audio_path)): - download_and_decompress(self.archieves, DATA_HOME) + download_and_decompress(self.archives, DATA_HOME) wav_files = [] for root, _, files in os.walk(os.path.join(DATA_HOME, self.audio_path)): diff --git a/audio/paddleaudio/datasets/urban_sound.py b/audio/paddleaudio/datasets/urban_sound.py index 43d1b36c4..4c4467588 100644 --- a/audio/paddleaudio/datasets/urban_sound.py +++ b/audio/paddleaudio/datasets/urban_sound.py @@ -35,7 +35,7 @@ class UrbanSound8K(AudioClassificationDataset): https://dl.acm.org/doi/10.1145/2647868.2655045 """ - archieves = [ + archives = [ { 'url': 'https://zenodo.org/record/1203745/files/UrbanSound8K.tar.gz', @@ -81,7 +81,7 @@ class UrbanSound8K(AudioClassificationDataset): def _get_data(self, mode: str, split: int) -> Tuple[List[str], List[int]]: if not os.path.isdir(os.path.join(DATA_HOME, self.audio_path)) or \ not os.path.isfile(os.path.join(DATA_HOME, self.meta)): - download_and_decompress(self.archieves, DATA_HOME) + download_and_decompress(self.archives, DATA_HOME) meta_info = self._get_meta_info() diff --git a/audio/paddleaudio/datasets/voxceleb.py b/audio/paddleaudio/datasets/voxceleb.py index 1fafb5176..225859700 100644 --- a/audio/paddleaudio/datasets/voxceleb.py +++ b/audio/paddleaudio/datasets/voxceleb.py @@ -34,7 +34,7 @@ __all__ = ['VoxCeleb'] class VoxCeleb(Dataset): source_url = 'https://thor.robots.ox.ac.uk/~vgg/data/voxceleb/vox1a/' - archieves_audio_dev = [ + archives_audio_dev = [ { 'url': source_url + 'vox1_dev_wav_partaa', 'md5': 'e395d020928bc15670b570a21695ed96', @@ -52,13 +52,13 @@ class VoxCeleb(Dataset): 'md5': '7bb1e9f70fddc7a678fa998ea8b3ba19', }, ] - archieves_audio_test = [ + archives_audio_test = [ { 'url': source_url + 'vox1_test_wav.zip', 'md5': '185fdc63c3c739954633d50379a3d102', }, ] - archieves_meta = [ + archives_meta = [ { 'url': 'https://www.robots.ox.ac.uk/~vgg/data/voxceleb/meta/veri_test2.txt', @@ -135,11 +135,11 @@ class VoxCeleb(Dataset): if not os.path.isdir(self.wav_path): print("start to download the voxceleb1 dataset") download_and_decompress( # multi-zip parts concatenate to vox1_dev_wav.zip - self.archieves_audio_dev, + self.archives_audio_dev, self.base_path, decompress=False) download_and_decompress( # download the vox1_test_wav.zip and unzip - self.archieves_audio_test, + self.archives_audio_test, self.base_path, decompress=True) @@ -157,7 +157,7 @@ class VoxCeleb(Dataset): if not os.path.isdir(self.meta_path): print("prepare the meta data") download_and_decompress( - self.archieves_meta, self.meta_path, decompress=False) + self.archives_meta, self.meta_path, decompress=False) # Data preparation. if not os.path.isdir(self.csv_path): diff --git a/dataset/chime3_background/chime3_background.py b/dataset/chime3_background/chime3_background.py index 4f081e6c3..6ec8fdfed 100644 --- a/dataset/chime3_background/chime3_background.py +++ b/dataset/chime3_background/chime3_background.py @@ -109,7 +109,7 @@ def create_manifest(data_dir, manifest_path): def prepare_chime3(url, md5sum, target_dir, manifest_path): - """Download, unpack and create summmary manifest file.""" + """Download, unpack and create summary manifest file.""" if not os.path.exists(os.path.join(target_dir, "CHiME3")): # download filepath = download(url, md5sum, target_dir, diff --git a/dataset/timit/timit.py b/dataset/timit/timit.py index 2943ff548..492c6702d 100644 --- a/dataset/timit/timit.py +++ b/dataset/timit/timit.py @@ -210,7 +210,7 @@ def create_manifest(data_dir, manifest_path_prefix): def prepare_dataset(url, md5sum, target_dir, manifest_path): - """Download, unpack and create summmary manifest file. + """Download, unpack and create summary manifest file. """ filepath = os.path.join(target_dir, "TIMIT.zip") if not os.path.exists(filepath): diff --git a/demos/TTSCppFrontend/src/front/front_interface.cpp b/demos/TTSCppFrontend/src/front/front_interface.cpp index e7b08c798..dd6329ff7 100644 --- a/demos/TTSCppFrontend/src/front/front_interface.cpp +++ b/demos/TTSCppFrontend/src/front/front_interface.cpp @@ -115,27 +115,27 @@ int FrontEngineInterface::init() { // 生成词典(词到音素的映射) if (0 != GenDict(_word2phone_path, &word_phone_map)) { - LOG(ERROR) << "Genarate word2phone dict failed"; + LOG(ERROR) << "Generate word2phone dict failed"; return -1; } // 生成音素字典(音素到音素id的映射) if (0 != GenDict(_phone2id_path, &phone_id_map)) { - LOG(ERROR) << "Genarate phone2id dict failed"; + LOG(ERROR) << "Generate phone2id dict failed"; return -1; } // 生成音调字典(音调到音调id的映射) if (_separate_tone == "true") { if (0 != GenDict(_tone2id_path, &tone_id_map)) { - LOG(ERROR) << "Genarate tone2id dict failed"; + LOG(ERROR) << "Generate tone2id dict failed"; return -1; } } // 生成繁简字典(繁体到简体id的映射) if (0 != GenDict(_trand2simp_path, &trand_simp_map)) { - LOG(ERROR) << "Genarate trand2simp dict failed"; + LOG(ERROR) << "Generate trand2simp dict failed"; return -1; } @@ -263,7 +263,7 @@ int FrontEngineInterface::GetWordsIds( if (0 != GetInitialsFinals(word, &word_initials, &word_finals)) { LOG(ERROR) - << "Genarate the word_initials and word_finals of " + << "Generate the word_initials and word_finals of " << word << " failed"; return -1; } @@ -304,7 +304,7 @@ int FrontEngineInterface::GetWordsIds( // 音素到音素id if (0 != Phone2Phoneid(phone, phoneids, toneids)) { - LOG(ERROR) << "Genarate the phone id of " << word << " failed"; + LOG(ERROR) << "Generate the phone id of " << word << " failed"; return -1; } } @@ -916,11 +916,11 @@ int FrontEngineInterface::NeuralSandhi(const std::string &word, if (find(must_neural_tone_words.begin(), must_neural_tone_words.end(), word) != must_neural_tone_words.end() || - (word_num >= 2 && - find(must_neural_tone_words.begin(), - must_neural_tone_words.end(), - ppspeech::wstring2utf8string(word_wstr.substr( - word_num - 2))) != must_neural_tone_words.end())) { + (word_num >= 2 && find(must_neural_tone_words.begin(), + must_neural_tone_words.end(), + ppspeech::wstring2utf8string( + word_wstr.substr(word_num - 2))) != + must_neural_tone_words.end())) { (*finals).back() = (*finals).back().replace((*finals).back().length() - 1, 1, "5"); } diff --git a/demos/audio_searching/src/milvus_helpers.py b/demos/audio_searching/src/milvus_helpers.py index 1699e892e..801284ffd 100644 --- a/demos/audio_searching/src/milvus_helpers.py +++ b/demos/audio_searching/src/milvus_helpers.py @@ -77,13 +77,13 @@ class MilvusHelper: field1 = FieldSchema( name="id", dtype=DataType.INT64, - descrition="int64", + description="int64", is_primary=True, auto_id=True) field2 = FieldSchema( name="embedding", dtype=DataType.FLOAT_VECTOR, - descrition="speaker embeddings", + description="speaker embeddings", dim=VECTOR_DIMENSION, is_primary=False) schema = CollectionSchema( diff --git a/demos/speech_web/speech_server/main.py b/demos/speech_web/speech_server/main.py index f4678628f..1e4d5ed3d 100644 --- a/demos/speech_web/speech_server/main.py +++ b/demos/speech_web/speech_server/main.py @@ -225,7 +225,7 @@ async def websocket_endpoint_online(websocket: WebSocket): websocket (WebSocket): the websocket instance """ - #1. the interface wait to accept the websocket protocal header + #1. the interface wait to accept the websocket protocol header # and only we receive the header, it establish the connection with specific thread await websocket.accept() @@ -238,7 +238,7 @@ async def websocket_endpoint_online(websocket: WebSocket): connection_handler = None try: - #4. we do a loop to process the audio package by package according the protocal + #4. we do a loop to process the audio package by package according the protocol # and only if the client send finished signal, we will break the loop while True: # careful here, changed the source code from starlette.websockets diff --git a/demos/streaming_tts_serving_fastdeploy/streaming_tts_serving/1/model.py b/demos/streaming_tts_serving_fastdeploy/streaming_tts_serving/1/model.py index 46473fdb2..324cbf978 100644 --- a/demos/streaming_tts_serving_fastdeploy/streaming_tts_serving/1/model.py +++ b/demos/streaming_tts_serving_fastdeploy/streaming_tts_serving/1/model.py @@ -75,7 +75,7 @@ class TritonPythonModel: def initialize(self, args): """`initialize` is called only once when the model is being loaded. Implementing `initialize` function is optional. This function allows - the model to intialize any state associated with this model. + the model to initialize any state associated with this model. Parameters ---------- args : dict diff --git a/examples/aishell/asr3/cmd.sh b/examples/aishell/asr3/cmd.sh index 7b70ef5e0..c87b0f233 100755 --- a/examples/aishell/asr3/cmd.sh +++ b/examples/aishell/asr3/cmd.sh @@ -54,8 +54,8 @@ elif [ "${cmd_backend}" = sge ]; then # "sbatch" (Slurm) elif [ "${cmd_backend}" = slurm ]; then # The default setting is written in conf/slurm.conf. - # You must change "-p cpu" and "-p gpu" for the "partion" for your environment. - # To know the "partion" names, type "sinfo". + # You must change "-p cpu" and "-p gpu" for the "partition" for your environment. + # To know the "partition" names, type "sinfo". # You can use "--gpu * " by default for slurm and it is interpreted as "--gres gpu:*" # The devices are allocated exclusively using "${CUDA_VISIBLE_DEVICES}". diff --git a/examples/aishell3/tts3/conf/conformer.yaml b/examples/aishell3/tts3/conf/conformer.yaml index 0834bfe3f..ae305ac81 100644 --- a/examples/aishell3/tts3/conf/conformer.yaml +++ b/examples/aishell3/tts3/conf/conformer.yaml @@ -42,7 +42,7 @@ model: duration_predictor_layers: 2 # number of layers of duration predictor duration_predictor_chans: 256 # number of channels of duration predictor duration_predictor_kernel_size: 3 # filter size of duration predictor - postnet_layers: 5 # number of layers of postnset + postnet_layers: 5 # number of layers of postnet postnet_filts: 5 # filter size of conv layers in postnet postnet_chans: 256 # number of channels of conv layers in postnet encoder_normalize_before: True # whether to perform layer normalization before the input @@ -66,14 +66,14 @@ model: transformer_dec_attn_dropout_rate: 0.2 # dropout rate for transformer decoder attention layer pitch_predictor_layers: 5 # number of conv layers in pitch predictor pitch_predictor_chans: 256 # number of channels of conv layers in pitch predictor - pitch_predictor_kernel_size: 5 # kernel size of conv leyers in pitch predictor + pitch_predictor_kernel_size: 5 # kernel size of conv layers in pitch predictor pitch_predictor_dropout: 0.5 # dropout rate in pitch predictor pitch_embed_kernel_size: 1 # kernel size of conv embedding layer for pitch pitch_embed_dropout: 0.0 # dropout rate after conv embedding layer for pitch stop_gradient_from_pitch_predictor: true # whether to stop the gradient from pitch predictor to encoder energy_predictor_layers: 2 # number of conv layers in energy predictor energy_predictor_chans: 256 # number of channels of conv layers in energy predictor - energy_predictor_kernel_size: 3 # kernel size of conv leyers in energy predictor + energy_predictor_kernel_size: 3 # kernel size of conv layers in energy predictor energy_predictor_dropout: 0.5 # dropout rate in energy predictor energy_embed_kernel_size: 1 # kernel size of conv embedding layer for energy energy_embed_dropout: 0.0 # dropout rate after conv embedding layer for energy diff --git a/examples/aishell3/tts3/conf/default.yaml b/examples/aishell3/tts3/conf/default.yaml index e65b5d0ec..68a3c7fb9 100644 --- a/examples/aishell3/tts3/conf/default.yaml +++ b/examples/aishell3/tts3/conf/default.yaml @@ -42,7 +42,7 @@ model: duration_predictor_layers: 2 # number of layers of duration predictor duration_predictor_chans: 256 # number of channels of duration predictor duration_predictor_kernel_size: 3 # filter size of duration predictor - postnet_layers: 5 # number of layers of postnset + postnet_layers: 5 # number of layers of postnet postnet_filts: 5 # filter size of conv layers in postnet postnet_chans: 256 # number of channels of conv layers in postnet use_scaled_pos_enc: True # whether to use scaled positional encoding @@ -60,14 +60,14 @@ model: transformer_dec_attn_dropout_rate: 0.2 # dropout rate for transformer decoder attention layer pitch_predictor_layers: 5 # number of conv layers in pitch predictor pitch_predictor_chans: 256 # number of channels of conv layers in pitch predictor - pitch_predictor_kernel_size: 5 # kernel size of conv leyers in pitch predictor + pitch_predictor_kernel_size: 5 # kernel size of conv layers in pitch predictor pitch_predictor_dropout: 0.5 # dropout rate in pitch predictor pitch_embed_kernel_size: 1 # kernel size of conv embedding layer for pitch pitch_embed_dropout: 0.0 # dropout rate after conv embedding layer for pitch stop_gradient_from_pitch_predictor: True # whether to stop the gradient from pitch predictor to encoder energy_predictor_layers: 2 # number of conv layers in energy predictor energy_predictor_chans: 256 # number of channels of conv layers in energy predictor - energy_predictor_kernel_size: 3 # kernel size of conv leyers in energy predictor + energy_predictor_kernel_size: 3 # kernel size of conv layers in energy predictor energy_predictor_dropout: 0.5 # dropout rate in energy predictor energy_embed_kernel_size: 1 # kernel size of conv embedding layer for energy energy_embed_dropout: 0.0 # dropout rate after conv embedding layer for energy diff --git a/examples/aishell3/vc1/conf/default.yaml b/examples/aishell3/vc1/conf/default.yaml index ac4956742..bde47b6db 100644 --- a/examples/aishell3/vc1/conf/default.yaml +++ b/examples/aishell3/vc1/conf/default.yaml @@ -42,7 +42,7 @@ model: duration_predictor_layers: 2 # number of layers of duration predictor duration_predictor_chans: 256 # number of channels of duration predictor duration_predictor_kernel_size: 3 # filter size of duration predictor - postnet_layers: 5 # number of layers of postnset + postnet_layers: 5 # number of layers of postnet postnet_filts: 5 # filter size of conv layers in postnet postnet_chans: 256 # number of channels of conv layers in postnet use_scaled_pos_enc: True # whether to use scaled positional encoding @@ -60,14 +60,14 @@ model: transformer_dec_attn_dropout_rate: 0.2 # dropout rate for transformer decoder attention layer pitch_predictor_layers: 5 # number of conv layers in pitch predictor pitch_predictor_chans: 256 # number of channels of conv layers in pitch predictor - pitch_predictor_kernel_size: 5 # kernel size of conv leyers in pitch predictor + pitch_predictor_kernel_size: 5 # kernel size of conv layers in pitch predictor pitch_predictor_dropout: 0.5 # dropout rate in pitch predictor pitch_embed_kernel_size: 1 # kernel size of conv embedding layer for pitch pitch_embed_dropout: 0.0 # dropout rate after conv embedding layer for pitch stop_gradient_from_pitch_predictor: True # whether to stop the gradient from pitch predictor to encoder energy_predictor_layers: 2 # number of conv layers in energy predictor energy_predictor_chans: 256 # number of channels of conv layers in energy predictor - energy_predictor_kernel_size: 3 # kernel size of conv leyers in energy predictor + energy_predictor_kernel_size: 3 # kernel size of conv layers in energy predictor energy_predictor_dropout: 0.5 # dropout rate in energy predictor energy_embed_kernel_size: 1 # kernel size of conv embedding layer for energy energy_embed_dropout: 0.0 # dropout rate after conv embedding layer for energy diff --git a/examples/aishell3/vc2/conf/default.yaml b/examples/aishell3/vc2/conf/default.yaml index 5ef37f812..6374b4c87 100644 --- a/examples/aishell3/vc2/conf/default.yaml +++ b/examples/aishell3/vc2/conf/default.yaml @@ -42,7 +42,7 @@ model: duration_predictor_layers: 2 # number of layers of duration predictor duration_predictor_chans: 256 # number of channels of duration predictor duration_predictor_kernel_size: 3 # filter size of duration predictor - postnet_layers: 5 # number of layers of postnset + postnet_layers: 5 # number of layers of postnet postnet_filts: 5 # filter size of conv layers in postnet postnet_chans: 256 # number of channels of conv layers in postnet use_scaled_pos_enc: True # whether to use scaled positional encoding @@ -60,14 +60,14 @@ model: transformer_dec_attn_dropout_rate: 0.2 # dropout rate for transformer decoder attention layer pitch_predictor_layers: 5 # number of conv layers in pitch predictor pitch_predictor_chans: 256 # number of channels of conv layers in pitch predictor - pitch_predictor_kernel_size: 5 # kernel size of conv leyers in pitch predictor + pitch_predictor_kernel_size: 5 # kernel size of conv layers in pitch predictor pitch_predictor_dropout: 0.5 # dropout rate in pitch predictor pitch_embed_kernel_size: 1 # kernel size of conv embedding layer for pitch pitch_embed_dropout: 0.0 # dropout rate after conv embedding layer for pitch stop_gradient_from_pitch_predictor: True # whether to stop the gradient from pitch predictor to encoder energy_predictor_layers: 2 # number of conv layers in energy predictor energy_predictor_chans: 256 # number of channels of conv layers in energy predictor - energy_predictor_kernel_size: 3 # kernel size of conv leyers in energy predictor + energy_predictor_kernel_size: 3 # kernel size of conv layers in energy predictor energy_predictor_dropout: 0.5 # dropout rate in energy predictor energy_embed_kernel_size: 1 # kernel size of conv embedding layer for energy energy_embed_dropout: 0.0 # dropout rate after conv embedding layer for energy diff --git a/examples/aishell3/voc5/conf/default.yaml b/examples/aishell3/voc5/conf/default.yaml index 728a90369..73953a1e7 100644 --- a/examples/aishell3/voc5/conf/default.yaml +++ b/examples/aishell3/voc5/conf/default.yaml @@ -39,7 +39,7 @@ generator_params: use_additional_convs: True # Whether to use additional conv layer in residual blocks. bias: True # Whether to use bias parameter in conv. nonlinear_activation: "leakyrelu" # Nonlinear activation type. - nonlinear_activation_params: # Nonlinear activation paramters. + nonlinear_activation_params: # Nonlinear activation parameters. negative_slope: 0.1 use_weight_norm: True # Whether to apply weight normalization. @@ -77,7 +77,7 @@ discriminator_params: max_downsample_channels: 1024 # Maximum number of channels in downsampling conv layers. bias: True # Whether to use bias parameter in conv layer." nonlinear_activation: "leakyrelu" # Nonlinear activation. - nonlinear_activation_params: # Nonlinear activation paramters. + nonlinear_activation_params: # Nonlinear activation parameters. negative_slope: 0.1 use_weight_norm: True # Whether to apply weight normalization. use_spectral_norm: False # Whether to apply spectral normalization. diff --git a/examples/canton/tts3/conf/default.yaml b/examples/canton/tts3/conf/default.yaml index a101e6eea..ce1536abb 100644 --- a/examples/canton/tts3/conf/default.yaml +++ b/examples/canton/tts3/conf/default.yaml @@ -45,7 +45,7 @@ model: duration_predictor_layers: 2 # number of layers of duration predictor duration_predictor_chans: 256 # number of channels of duration predictor duration_predictor_kernel_size: 3 # filter size of duration predictor - postnet_layers: 5 # number of layers of postnset + postnet_layers: 5 # number of layers of postnet postnet_filts: 5 # filter size of conv layers in postnet postnet_chans: 256 # number of channels of conv layers in postnet use_scaled_pos_enc: True # whether to use scaled positional encoding @@ -63,14 +63,14 @@ model: transformer_dec_attn_dropout_rate: 0.2 # dropout rate for transformer decoder attention layer pitch_predictor_layers: 5 # number of conv layers in pitch predictor pitch_predictor_chans: 256 # number of channels of conv layers in pitch predictor - pitch_predictor_kernel_size: 5 # kernel size of conv leyers in pitch predictor + pitch_predictor_kernel_size: 5 # kernel size of conv layers in pitch predictor pitch_predictor_dropout: 0.5 # dropout rate in pitch predictor pitch_embed_kernel_size: 1 # kernel size of conv embedding layer for pitch pitch_embed_dropout: 0.0 # dropout rate after conv embedding layer for pitch stop_gradient_from_pitch_predictor: True # whether to stop the gradient from pitch predictor to encoder energy_predictor_layers: 2 # number of conv layers in energy predictor energy_predictor_chans: 256 # number of channels of conv layers in energy predictor - energy_predictor_kernel_size: 3 # kernel size of conv leyers in energy predictor + energy_predictor_kernel_size: 3 # kernel size of conv layers in energy predictor energy_predictor_dropout: 0.5 # dropout rate in energy predictor energy_embed_kernel_size: 1 # kernel size of conv embedding layer for energy energy_embed_dropout: 0.0 # dropout rate after conv embedding layer for energy diff --git a/examples/csmsc/jets/conf/default.yaml b/examples/csmsc/jets/conf/default.yaml index 1dafd20c1..0999b5bc0 100644 --- a/examples/csmsc/jets/conf/default.yaml +++ b/examples/csmsc/jets/conf/default.yaml @@ -60,14 +60,14 @@ model: transformer_dec_attn_dropout_rate: 0.2 # dropout rate for transformer decoder attention layer pitch_predictor_layers: 5 # number of conv layers in pitch predictor pitch_predictor_chans: 256 # number of channels of conv layers in pitch predictor - pitch_predictor_kernel_size: 5 # kernel size of conv leyers in pitch predictor + pitch_predictor_kernel_size: 5 # kernel size of conv layers in pitch predictor pitch_predictor_dropout: 0.5 # dropout rate in pitch predictor pitch_embed_kernel_size: 1 # kernel size of conv embedding layer for pitch pitch_embed_dropout: 0.0 # dropout rate after conv embedding layer for pitch stop_gradient_from_pitch_predictor: true # whether to stop the gradient from pitch predictor to encoder energy_predictor_layers: 2 # number of conv layers in energy predictor energy_predictor_chans: 256 # number of channels of conv layers in energy predictor - energy_predictor_kernel_size: 3 # kernel size of conv leyers in energy predictor + energy_predictor_kernel_size: 3 # kernel size of conv layers in energy predictor energy_predictor_dropout: 0.5 # dropout rate in energy predictor energy_embed_kernel_size: 1 # kernel size of conv embedding layer for energy energy_embed_dropout: 0.0 # dropout rate after conv embedding layer for energy diff --git a/examples/csmsc/tts3/conf/cnndecoder.yaml b/examples/csmsc/tts3/conf/cnndecoder.yaml index 8b46fea44..d0bfe5079 100644 --- a/examples/csmsc/tts3/conf/cnndecoder.yaml +++ b/examples/csmsc/tts3/conf/cnndecoder.yaml @@ -43,7 +43,7 @@ model: duration_predictor_layers: 2 # number of layers of duration predictor duration_predictor_chans: 256 # number of channels of duration predictor duration_predictor_kernel_size: 3 # filter size of duration predictor - postnet_layers: 5 # number of layers of postnset + postnet_layers: 5 # number of layers of postnet postnet_filts: 5 # filter size of conv layers in postnet postnet_chans: 256 # number of channels of conv layers in postnet use_scaled_pos_enc: True # whether to use scaled positional encoding @@ -65,14 +65,14 @@ model: cnn_decoder_embedding_dim: 256 pitch_predictor_layers: 5 # number of conv layers in pitch predictor pitch_predictor_chans: 256 # number of channels of conv layers in pitch predictor - pitch_predictor_kernel_size: 5 # kernel size of conv leyers in pitch predictor + pitch_predictor_kernel_size: 5 # kernel size of conv layers in pitch predictor pitch_predictor_dropout: 0.5 # dropout rate in pitch predictor pitch_embed_kernel_size: 1 # kernel size of conv embedding layer for pitch pitch_embed_dropout: 0.0 # dropout rate after conv embedding layer for pitch stop_gradient_from_pitch_predictor: True # whether to stop the gradient from pitch predictor to encoder energy_predictor_layers: 2 # number of conv layers in energy predictor energy_predictor_chans: 256 # number of channels of conv layers in energy predictor - energy_predictor_kernel_size: 3 # kernel size of conv leyers in energy predictor + energy_predictor_kernel_size: 3 # kernel size of conv layers in energy predictor energy_predictor_dropout: 0.5 # dropout rate in energy predictor energy_embed_kernel_size: 1 # kernel size of conv embedding layer for energy energy_embed_dropout: 0.0 # dropout rate after conv embedding layer for energy diff --git a/examples/csmsc/tts3/conf/conformer.yaml b/examples/csmsc/tts3/conf/conformer.yaml index fcad86150..f184450c2 100644 --- a/examples/csmsc/tts3/conf/conformer.yaml +++ b/examples/csmsc/tts3/conf/conformer.yaml @@ -42,7 +42,7 @@ model: duration_predictor_layers: 2 # number of layers of duration predictor duration_predictor_chans: 256 # number of channels of duration predictor duration_predictor_kernel_size: 3 # filter size of duration predictor - postnet_layers: 5 # number of layers of postnset + postnet_layers: 5 # number of layers of postnet postnet_filts: 5 # filter size of conv layers in postnet postnet_chans: 256 # number of channels of conv layers in postnet encoder_normalize_before: True # whether to perform layer normalization before the input @@ -66,14 +66,14 @@ model: transformer_dec_attn_dropout_rate: 0.2 # dropout rate for transformer decoder attention layer pitch_predictor_layers: 5 # number of conv layers in pitch predictor pitch_predictor_chans: 256 # number of channels of conv layers in pitch predictor - pitch_predictor_kernel_size: 5 # kernel size of conv leyers in pitch predictor + pitch_predictor_kernel_size: 5 # kernel size of conv layers in pitch predictor pitch_predictor_dropout: 0.5 # dropout rate in pitch predictor pitch_embed_kernel_size: 1 # kernel size of conv embedding layer for pitch pitch_embed_dropout: 0.0 # dropout rate after conv embedding layer for pitch stop_gradient_from_pitch_predictor: True # whether to stop the gradient from pitch predictor to encoder energy_predictor_layers: 2 # number of conv layers in energy predictor energy_predictor_chans: 256 # number of channels of conv layers in energy predictor - energy_predictor_kernel_size: 3 # kernel size of conv leyers in energy predictor + energy_predictor_kernel_size: 3 # kernel size of conv layers in energy predictor energy_predictor_dropout: 0.5 # dropout rate in energy predictor energy_embed_kernel_size: 1 # kernel size of conv embedding layer for energy energy_embed_dropout: 0.0 # dropout rate after conv embedding layer for energy diff --git a/examples/csmsc/tts3/conf/default.yaml b/examples/csmsc/tts3/conf/default.yaml index 08b6f75ba..3a034cffc 100644 --- a/examples/csmsc/tts3/conf/default.yaml +++ b/examples/csmsc/tts3/conf/default.yaml @@ -42,7 +42,7 @@ model: duration_predictor_layers: 2 # number of layers of duration predictor duration_predictor_chans: 256 # number of channels of duration predictor duration_predictor_kernel_size: 3 # filter size of duration predictor - postnet_layers: 5 # number of layers of postnset + postnet_layers: 5 # number of layers of postnet postnet_filts: 5 # filter size of conv layers in postnet postnet_chans: 256 # number of channels of conv layers in postnet use_scaled_pos_enc: True # whether to use scaled positional encoding @@ -60,14 +60,14 @@ model: transformer_dec_attn_dropout_rate: 0.2 # dropout rate for transformer decoder attention layer pitch_predictor_layers: 5 # number of conv layers in pitch predictor pitch_predictor_chans: 256 # number of channels of conv layers in pitch predictor - pitch_predictor_kernel_size: 5 # kernel size of conv leyers in pitch predictor + pitch_predictor_kernel_size: 5 # kernel size of conv layers in pitch predictor pitch_predictor_dropout: 0.5 # dropout rate in pitch predictor pitch_embed_kernel_size: 1 # kernel size of conv embedding layer for pitch pitch_embed_dropout: 0.0 # dropout rate after conv embedding layer for pitch stop_gradient_from_pitch_predictor: True # whether to stop the gradient from pitch predictor to encoder energy_predictor_layers: 2 # number of conv layers in energy predictor energy_predictor_chans: 256 # number of channels of conv layers in energy predictor - energy_predictor_kernel_size: 3 # kernel size of conv leyers in energy predictor + energy_predictor_kernel_size: 3 # kernel size of conv layers in energy predictor energy_predictor_dropout: 0.5 # dropout rate in energy predictor energy_embed_kernel_size: 1 # kernel size of conv embedding layer for energy energy_embed_dropout: 0.0 # dropout rate after conv embedding layer for energy diff --git a/examples/csmsc/voc5/conf/default.yaml b/examples/csmsc/voc5/conf/default.yaml index 38b94cf5c..4dc95ac32 100644 --- a/examples/csmsc/voc5/conf/default.yaml +++ b/examples/csmsc/voc5/conf/default.yaml @@ -38,7 +38,7 @@ generator_params: use_additional_convs: True # Whether to use additional conv layer in residual blocks. bias: True # Whether to use bias parameter in conv. nonlinear_activation: "leakyrelu" # Nonlinear activation type. - nonlinear_activation_params: # Nonlinear activation paramters. + nonlinear_activation_params: # Nonlinear activation parameters. negative_slope: 0.1 use_weight_norm: True # Whether to apply weight normalization. @@ -76,7 +76,7 @@ discriminator_params: max_downsample_channels: 1024 # Maximum number of channels in downsampling conv layers. bias: True # Whether to use bias parameter in conv layer." nonlinear_activation: "leakyrelu" # Nonlinear activation. - nonlinear_activation_params: # Nonlinear activation paramters. + nonlinear_activation_params: # Nonlinear activation parameters. negative_slope: 0.1 use_weight_norm: True # Whether to apply weight normalization. use_spectral_norm: False # Whether to apply spectral normalization. diff --git a/examples/csmsc/voc5/conf/finetune.yaml b/examples/csmsc/voc5/conf/finetune.yaml index 110ae052b..51be0706e 100644 --- a/examples/csmsc/voc5/conf/finetune.yaml +++ b/examples/csmsc/voc5/conf/finetune.yaml @@ -38,7 +38,7 @@ generator_params: use_additional_convs: True # Whether to use additional conv layer in residual blocks. bias: True # Whether to use bias parameter in conv. nonlinear_activation: "leakyrelu" # Nonlinear activation type. - nonlinear_activation_params: # Nonlinear activation paramters. + nonlinear_activation_params: # Nonlinear activation parameters. negative_slope: 0.1 use_weight_norm: True # Whether to apply weight normalization. @@ -76,7 +76,7 @@ discriminator_params: max_downsample_channels: 1024 # Maximum number of channels in downsampling conv layers. bias: True # Whether to use bias parameter in conv layer." nonlinear_activation: "leakyrelu" # Nonlinear activation. - nonlinear_activation_params: # Nonlinear activation paramters. + nonlinear_activation_params: # Nonlinear activation parameters. negative_slope: 0.1 use_weight_norm: True # Whether to apply weight normalization. use_spectral_norm: False # Whether to apply spectral normalization. diff --git a/examples/csmsc/voc5/conf/iSTFT.yaml b/examples/csmsc/voc5/conf/iSTFT.yaml index 06677d796..10b69f917 100644 --- a/examples/csmsc/voc5/conf/iSTFT.yaml +++ b/examples/csmsc/voc5/conf/iSTFT.yaml @@ -42,7 +42,7 @@ generator_params: use_additional_convs: True # Whether to use additional conv layer in residual blocks. bias: True # Whether to use bias parameter in conv. nonlinear_activation: "leakyrelu" # Nonlinear activation type. - nonlinear_activation_params: # Nonlinear activation paramters. + nonlinear_activation_params: # Nonlinear activation parameters. negative_slope: 0.1 use_weight_norm: True # Whether to apply weight normalization. @@ -83,7 +83,7 @@ discriminator_params: max_downsample_channels: 1024 # Maximum number of channels in downsampling conv layers. bias: True # Whether to use bias parameter in conv layer." nonlinear_activation: "leakyrelu" # Nonlinear activation. - nonlinear_activation_params: # Nonlinear activation paramters. + nonlinear_activation_params: # Nonlinear activation parameters. negative_slope: 0.1 use_weight_norm: True # Whether to apply weight normalization. use_spectral_norm: False # Whether to apply spectral normalization. diff --git a/examples/librispeech/asr1/cmd.sh b/examples/librispeech/asr1/cmd.sh index 7b70ef5e0..c87b0f233 100644 --- a/examples/librispeech/asr1/cmd.sh +++ b/examples/librispeech/asr1/cmd.sh @@ -54,8 +54,8 @@ elif [ "${cmd_backend}" = sge ]; then # "sbatch" (Slurm) elif [ "${cmd_backend}" = slurm ]; then # The default setting is written in conf/slurm.conf. - # You must change "-p cpu" and "-p gpu" for the "partion" for your environment. - # To know the "partion" names, type "sinfo". + # You must change "-p cpu" and "-p gpu" for the "partition" for your environment. + # To know the "partition" names, type "sinfo". # You can use "--gpu * " by default for slurm and it is interpreted as "--gres gpu:*" # The devices are allocated exclusively using "${CUDA_VISIBLE_DEVICES}". diff --git a/examples/librispeech/asr2/cmd.sh b/examples/librispeech/asr2/cmd.sh index 7b70ef5e0..c87b0f233 100644 --- a/examples/librispeech/asr2/cmd.sh +++ b/examples/librispeech/asr2/cmd.sh @@ -54,8 +54,8 @@ elif [ "${cmd_backend}" = sge ]; then # "sbatch" (Slurm) elif [ "${cmd_backend}" = slurm ]; then # The default setting is written in conf/slurm.conf. - # You must change "-p cpu" and "-p gpu" for the "partion" for your environment. - # To know the "partion" names, type "sinfo". + # You must change "-p cpu" and "-p gpu" for the "partition" for your environment. + # To know the "partition" names, type "sinfo". # You can use "--gpu * " by default for slurm and it is interpreted as "--gres gpu:*" # The devices are allocated exclusively using "${CUDA_VISIBLE_DEVICES}". diff --git a/examples/librispeech/asr3/cmd.sh b/examples/librispeech/asr3/cmd.sh index 7b70ef5e0..c87b0f233 100644 --- a/examples/librispeech/asr3/cmd.sh +++ b/examples/librispeech/asr3/cmd.sh @@ -54,8 +54,8 @@ elif [ "${cmd_backend}" = sge ]; then # "sbatch" (Slurm) elif [ "${cmd_backend}" = slurm ]; then # The default setting is written in conf/slurm.conf. - # You must change "-p cpu" and "-p gpu" for the "partion" for your environment. - # To know the "partion" names, type "sinfo". + # You must change "-p cpu" and "-p gpu" for the "partition" for your environment. + # To know the "partition" names, type "sinfo". # You can use "--gpu * " by default for slurm and it is interpreted as "--gres gpu:*" # The devices are allocated exclusively using "${CUDA_VISIBLE_DEVICES}". diff --git a/examples/librispeech/asr4/cmd.sh b/examples/librispeech/asr4/cmd.sh index 7b70ef5e0..c87b0f233 100644 --- a/examples/librispeech/asr4/cmd.sh +++ b/examples/librispeech/asr4/cmd.sh @@ -54,8 +54,8 @@ elif [ "${cmd_backend}" = sge ]; then # "sbatch" (Slurm) elif [ "${cmd_backend}" = slurm ]; then # The default setting is written in conf/slurm.conf. - # You must change "-p cpu" and "-p gpu" for the "partion" for your environment. - # To know the "partion" names, type "sinfo". + # You must change "-p cpu" and "-p gpu" for the "partition" for your environment. + # To know the "partition" names, type "sinfo". # You can use "--gpu * " by default for slurm and it is interpreted as "--gres gpu:*" # The devices are allocated exclusively using "${CUDA_VISIBLE_DEVICES}". diff --git a/examples/librispeech/asr5/cmd.sh b/examples/librispeech/asr5/cmd.sh index 7b70ef5e0..c87b0f233 100644 --- a/examples/librispeech/asr5/cmd.sh +++ b/examples/librispeech/asr5/cmd.sh @@ -54,8 +54,8 @@ elif [ "${cmd_backend}" = sge ]; then # "sbatch" (Slurm) elif [ "${cmd_backend}" = slurm ]; then # The default setting is written in conf/slurm.conf. - # You must change "-p cpu" and "-p gpu" for the "partion" for your environment. - # To know the "partion" names, type "sinfo". + # You must change "-p cpu" and "-p gpu" for the "partition" for your environment. + # To know the "partition" names, type "sinfo". # You can use "--gpu * " by default for slurm and it is interpreted as "--gres gpu:*" # The devices are allocated exclusively using "${CUDA_VISIBLE_DEVICES}". diff --git a/examples/ljspeech/tts1/conf/default.yaml b/examples/ljspeech/tts1/conf/default.yaml index 456b6a1e3..fecdd140e 100644 --- a/examples/ljspeech/tts1/conf/default.yaml +++ b/examples/ljspeech/tts1/conf/default.yaml @@ -34,7 +34,7 @@ model: # keyword arguments for the selected model dunits: 1024 # number of decoder ff units positionwise_layer_type: conv1d # type of position-wise layer positionwise_conv_kernel_size: 1 # kernel size of position wise conv layer - postnet_layers: 5 # number of layers of postnset + postnet_layers: 5 # number of layers of postnet postnet_filts: 5 # filter size of conv layers in postnet postnet_chans: 256 # number of channels of conv layers in postnet use_scaled_pos_enc: True # whether to use scaled positional encoding diff --git a/examples/ljspeech/tts3/conf/default.yaml b/examples/ljspeech/tts3/conf/default.yaml index 5305c912f..eab4af03c 100644 --- a/examples/ljspeech/tts3/conf/default.yaml +++ b/examples/ljspeech/tts3/conf/default.yaml @@ -42,7 +42,7 @@ model: duration_predictor_layers: 2 # number of layers of duration predictor duration_predictor_chans: 256 # number of channels of duration predictor duration_predictor_kernel_size: 3 # filter size of duration predictor - postnet_layers: 5 # number of layers of postnset + postnet_layers: 5 # number of layers of postnet postnet_filts: 5 # filter size of conv layers in postnet postnet_chans: 256 # number of channels of conv layers in postnet use_scaled_pos_enc: True # whether to use scaled positional encoding @@ -60,14 +60,14 @@ model: transformer_dec_attn_dropout_rate: 0.2 # dropout rate for transformer decoder attention layer pitch_predictor_layers: 5 # number of conv layers in pitch predictor pitch_predictor_chans: 256 # number of channels of conv layers in pitch predictor - pitch_predictor_kernel_size: 5 # kernel size of conv leyers in pitch predictor + pitch_predictor_kernel_size: 5 # kernel size of conv layers in pitch predictor pitch_predictor_dropout: 0.5 # dropout rate in pitch predictor pitch_embed_kernel_size: 1 # kernel size of conv embedding layer for pitch pitch_embed_dropout: 0.0 # dropout rate after conv embedding layer for pitch stop_gradient_from_pitch_predictor: True # whether to stop the gradient from pitch predictor to encoder energy_predictor_layers: 2 # number of conv layers in energy predictor energy_predictor_chans: 256 # number of channels of conv layers in energy predictor - energy_predictor_kernel_size: 3 # kernel size of conv leyers in energy predictor + energy_predictor_kernel_size: 3 # kernel size of conv layers in energy predictor energy_predictor_dropout: 0.5 # dropout rate in energy predictor energy_embed_kernel_size: 1 # kernel size of conv embedding layer for energy energy_embed_dropout: 0.0 # dropout rate after conv embedding layer for energy diff --git a/examples/ljspeech/voc5/conf/default.yaml b/examples/ljspeech/voc5/conf/default.yaml index 97c512204..e45d2d770 100644 --- a/examples/ljspeech/voc5/conf/default.yaml +++ b/examples/ljspeech/voc5/conf/default.yaml @@ -38,7 +38,7 @@ generator_params: use_additional_convs: True # Whether to use additional conv layer in residual blocks. bias: True # Whether to use bias parameter in conv. nonlinear_activation: "leakyrelu" # Nonlinear activation type. - nonlinear_activation_params: # Nonlinear activation paramters. + nonlinear_activation_params: # Nonlinear activation parameters. negative_slope: 0.1 use_weight_norm: True # Whether to apply weight normalization. @@ -76,7 +76,7 @@ discriminator_params: max_downsample_channels: 1024 # Maximum number of channels in downsampling conv layers. bias: True # Whether to use bias parameter in conv layer." nonlinear_activation: "leakyrelu" # Nonlinear activation. - nonlinear_activation_params: # Nonlinear activation paramters. + nonlinear_activation_params: # Nonlinear activation parameters. negative_slope: 0.1 use_weight_norm: True # Whether to apply weight normalization. use_spectral_norm: False # Whether to apply spectral normalization. diff --git a/examples/mustc/st1/cmd.sh b/examples/mustc/st1/cmd.sh index 7b70ef5e0..c87b0f233 100644 --- a/examples/mustc/st1/cmd.sh +++ b/examples/mustc/st1/cmd.sh @@ -54,8 +54,8 @@ elif [ "${cmd_backend}" = sge ]; then # "sbatch" (Slurm) elif [ "${cmd_backend}" = slurm ]; then # The default setting is written in conf/slurm.conf. - # You must change "-p cpu" and "-p gpu" for the "partion" for your environment. - # To know the "partion" names, type "sinfo". + # You must change "-p cpu" and "-p gpu" for the "partition" for your environment. + # To know the "partition" names, type "sinfo". # You can use "--gpu * " by default for slurm and it is interpreted as "--gres gpu:*" # The devices are allocated exclusively using "${CUDA_VISIBLE_DEVICES}". diff --git a/examples/opencpop/svs1/conf/default.yaml b/examples/opencpop/svs1/conf/default.yaml index 5d8060630..a1693923f 100644 --- a/examples/opencpop/svs1/conf/default.yaml +++ b/examples/opencpop/svs1/conf/default.yaml @@ -68,14 +68,14 @@ model: duration_predictor_dropout_rate: 0.5 # dropout rate in energy predictor pitch_predictor_layers: 5 # number of conv layers in pitch predictor pitch_predictor_chans: 256 # number of channels of conv layers in pitch predictor - pitch_predictor_kernel_size: 5 # kernel size of conv leyers in pitch predictor + pitch_predictor_kernel_size: 5 # kernel size of conv layers in pitch predictor pitch_predictor_dropout: 0.5 # dropout rate in pitch predictor pitch_embed_kernel_size: 1 # kernel size of conv embedding layer for pitch pitch_embed_dropout: 0.0 # dropout rate after conv embedding layer for pitch stop_gradient_from_pitch_predictor: True # whether to stop the gradient from pitch predictor to encoder energy_predictor_layers: 2 # number of conv layers in energy predictor energy_predictor_chans: 256 # number of channels of conv layers in energy predictor - energy_predictor_kernel_size: 3 # kernel size of conv leyers in energy predictor + energy_predictor_kernel_size: 3 # kernel size of conv layers in energy predictor energy_predictor_dropout: 0.5 # dropout rate in energy predictor energy_embed_kernel_size: 1 # kernel size of conv embedding layer for energy energy_embed_dropout: 0.0 # dropout rate after conv embedding layer for energy diff --git a/examples/opencpop/voc5/conf/default.yaml b/examples/opencpop/voc5/conf/default.yaml index 10449f860..18822f310 100644 --- a/examples/opencpop/voc5/conf/default.yaml +++ b/examples/opencpop/voc5/conf/default.yaml @@ -38,7 +38,7 @@ generator_params: use_additional_convs: True # Whether to use additional conv layer in residual blocks. bias: True # Whether to use bias parameter in conv. nonlinear_activation: "leakyrelu" # Nonlinear activation type. - nonlinear_activation_params: # Nonlinear activation paramters. + nonlinear_activation_params: # Nonlinear activation parameters. negative_slope: 0.1 use_weight_norm: True # Whether to apply weight normalization. @@ -76,7 +76,7 @@ discriminator_params: max_downsample_channels: 1024 # Maximum number of channels in downsampling conv layers. bias: True # Whether to use bias parameter in conv layer." nonlinear_activation: "leakyrelu" # Nonlinear activation. - nonlinear_activation_params: # Nonlinear activation paramters. + nonlinear_activation_params: # Nonlinear activation parameters. negative_slope: 0.1 use_weight_norm: True # Whether to apply weight normalization. use_spectral_norm: False # Whether to apply spectral normalization. diff --git a/examples/opencpop/voc5/conf/finetune.yaml b/examples/opencpop/voc5/conf/finetune.yaml index 0022a67aa..331f99755 100644 --- a/examples/opencpop/voc5/conf/finetune.yaml +++ b/examples/opencpop/voc5/conf/finetune.yaml @@ -38,7 +38,7 @@ generator_params: use_additional_convs: True # Whether to use additional conv layer in residual blocks. bias: True # Whether to use bias parameter in conv. nonlinear_activation: "leakyrelu" # Nonlinear activation type. - nonlinear_activation_params: # Nonlinear activation paramters. + nonlinear_activation_params: # Nonlinear activation parameters. negative_slope: 0.1 use_weight_norm: True # Whether to apply weight normalization. @@ -76,7 +76,7 @@ discriminator_params: max_downsample_channels: 1024 # Maximum number of channels in downsampling conv layers. bias: True # Whether to use bias parameter in conv layer." nonlinear_activation: "leakyrelu" # Nonlinear activation. - nonlinear_activation_params: # Nonlinear activation paramters. + nonlinear_activation_params: # Nonlinear activation parameters. negative_slope: 0.1 use_weight_norm: True # Whether to apply weight normalization. use_spectral_norm: False # Whether to apply spectral normalization. diff --git a/examples/other/ngram_lm/s0/local/kenlm_score_test.py b/examples/other/ngram_lm/s0/local/kenlm_score_test.py index 30bc1e4b1..4f388cd00 100644 --- a/examples/other/ngram_lm/s0/local/kenlm_score_test.py +++ b/examples/other/ngram_lm/s0/local/kenlm_score_test.py @@ -97,7 +97,7 @@ def test_full_scores_words(): if w not in model: print('"{0}" is an OOV'.format(w)) oov.append(w) - # zh_giga.no_cna_cmn.prune01244.klm is chinese charactor LM + # zh_giga.no_cna_cmn.prune01244.klm is chinese character LM assert oov == ["盘点", "不怕", "网站", "❗", "️", "海淘", "向来", "便宜", "保真", "!"], 'error oov' diff --git a/examples/ted_en_zh/st1/cmd.sh b/examples/ted_en_zh/st1/cmd.sh index 7b70ef5e0..c87b0f233 100644 --- a/examples/ted_en_zh/st1/cmd.sh +++ b/examples/ted_en_zh/st1/cmd.sh @@ -54,8 +54,8 @@ elif [ "${cmd_backend}" = sge ]; then # "sbatch" (Slurm) elif [ "${cmd_backend}" = slurm ]; then # The default setting is written in conf/slurm.conf. - # You must change "-p cpu" and "-p gpu" for the "partion" for your environment. - # To know the "partion" names, type "sinfo". + # You must change "-p cpu" and "-p gpu" for the "partition" for your environment. + # To know the "partition" names, type "sinfo". # You can use "--gpu * " by default for slurm and it is interpreted as "--gres gpu:*" # The devices are allocated exclusively using "${CUDA_VISIBLE_DEVICES}". diff --git a/examples/vctk/tts3/conf/default.yaml b/examples/vctk/tts3/conf/default.yaml index a75658d3d..d0d520e82 100644 --- a/examples/vctk/tts3/conf/default.yaml +++ b/examples/vctk/tts3/conf/default.yaml @@ -42,7 +42,7 @@ model: duration_predictor_layers: 2 # number of layers of duration predictor duration_predictor_chans: 256 # number of channels of duration predictor duration_predictor_kernel_size: 3 # filter size of duration predictor - postnet_layers: 5 # number of layers of postnset + postnet_layers: 5 # number of layers of postnet postnet_filts: 5 # filter size of conv layers in postnet postnet_chans: 256 # number of channels of conv layers in postnet use_scaled_pos_enc: True # whether to use scaled positional encoding @@ -60,14 +60,14 @@ model: transformer_dec_attn_dropout_rate: 0.2 # dropout rate for transformer decoder attention layer pitch_predictor_layers: 5 # number of conv layers in pitch predictor pitch_predictor_chans: 256 # number of channels of conv layers in pitch predictor - pitch_predictor_kernel_size: 5 # kernel size of conv leyers in pitch predictor + pitch_predictor_kernel_size: 5 # kernel size of conv layers in pitch predictor pitch_predictor_dropout: 0.5 # dropout rate in pitch predictor pitch_embed_kernel_size: 1 # kernel size of conv embedding layer for pitch pitch_embed_dropout: 0.0 # dropout rate after conv embedding layer for pitch stop_gradient_from_pitch_predictor: True # whether to stop the gradient from pitch predictor to encoder energy_predictor_layers: 2 # number of conv layers in energy predictor energy_predictor_chans: 256 # number of channels of conv layers in energy predictor - energy_predictor_kernel_size: 3 # kernel size of conv leyers in energy predictor + energy_predictor_kernel_size: 3 # kernel size of conv layers in energy predictor energy_predictor_dropout: 0.5 # dropout rate in energy predictor energy_embed_kernel_size: 1 # kernel size of conv embedding layer for energy energy_embed_dropout: 0.0 # dropout rate after conv embedding layer for energy diff --git a/examples/vctk/voc5/conf/default.yaml b/examples/vctk/voc5/conf/default.yaml index 6361e01b2..ecf0c1775 100644 --- a/examples/vctk/voc5/conf/default.yaml +++ b/examples/vctk/voc5/conf/default.yaml @@ -39,7 +39,7 @@ generator_params: use_additional_convs: True # Whether to use additional conv layer in residual blocks. bias: True # Whether to use bias parameter in conv. nonlinear_activation: "leakyrelu" # Nonlinear activation type. - nonlinear_activation_params: # Nonlinear activation paramters. + nonlinear_activation_params: # Nonlinear activation parameters. negative_slope: 0.1 use_weight_norm: True # Whether to apply weight normalization. @@ -77,7 +77,7 @@ discriminator_params: max_downsample_channels: 1024 # Maximum number of channels in downsampling conv layers. bias: True # Whether to use bias parameter in conv layer." nonlinear_activation: "leakyrelu" # Nonlinear activation. - nonlinear_activation_params: # Nonlinear activation paramters. + nonlinear_activation_params: # Nonlinear activation parameters. negative_slope: 0.1 use_weight_norm: True # Whether to apply weight normalization. use_spectral_norm: False # Whether to apply spectral normalization. diff --git a/examples/voxceleb/sv0/local/data_prepare.py b/examples/voxceleb/sv0/local/data_prepare.py index e5a5dff7b..e7fc8a120 100644 --- a/examples/voxceleb/sv0/local/data_prepare.py +++ b/examples/voxceleb/sv0/local/data_prepare.py @@ -32,8 +32,8 @@ def main(args, config): seed_everything(config.seed) # stage 1: generate the voxceleb csv file - # Note: this may occurs c++ execption, but the program will execute fine - # so we ignore the execption + # Note: this may occurs c++ exception, but the program will execute fine + # so we ignore the exception # we explicitly pass the vox2 base path to data prepare and generate the audio info logger.info("start to generate the voxceleb dataset info") train_dataset = VoxCeleb( diff --git a/examples/zh_en_tts/tts3/conf/default.yaml b/examples/zh_en_tts/tts3/conf/default.yaml index efa8b3ea2..be509432d 100644 --- a/examples/zh_en_tts/tts3/conf/default.yaml +++ b/examples/zh_en_tts/tts3/conf/default.yaml @@ -42,7 +42,7 @@ model: duration_predictor_layers: 2 # number of layers of duration predictor duration_predictor_chans: 256 # number of channels of duration predictor duration_predictor_kernel_size: 3 # filter size of duration predictor - postnet_layers: 5 # number of layers of postnset + postnet_layers: 5 # number of layers of postnet postnet_filts: 5 # filter size of conv layers in postnet postnet_chans: 256 # number of channels of conv layers in postnet use_scaled_pos_enc: True # whether to use scaled positional encoding @@ -60,14 +60,14 @@ model: transformer_dec_attn_dropout_rate: 0.2 # dropout rate for transformer decoder attention layer pitch_predictor_layers: 5 # number of conv layers in pitch predictor pitch_predictor_chans: 256 # number of channels of conv layers in pitch predictor - pitch_predictor_kernel_size: 5 # kernel size of conv leyers in pitch predictor + pitch_predictor_kernel_size: 5 # kernel size of conv layers in pitch predictor pitch_predictor_dropout: 0.5 # dropout rate in pitch predictor pitch_embed_kernel_size: 1 # kernel size of conv embedding layer for pitch pitch_embed_dropout: 0.0 # dropout rate after conv embedding layer for pitch stop_gradient_from_pitch_predictor: True # whether to stop the gradient from pitch predictor to encoder energy_predictor_layers: 2 # number of conv layers in energy predictor energy_predictor_chans: 256 # number of channels of conv layers in energy predictor - energy_predictor_kernel_size: 3 # kernel size of conv leyers in energy predictor + energy_predictor_kernel_size: 3 # kernel size of conv layers in energy predictor energy_predictor_dropout: 0.5 # dropout rate in energy predictor energy_embed_kernel_size: 1 # kernel size of conv embedding layer for energy energy_embed_dropout: 0.0 # dropout rate after conv embedding layer for energy diff --git a/paddlespeech/audio/utils/tensor_utils.py b/paddlespeech/audio/utils/tensor_utils.py index b246a6459..b67b2dd81 100644 --- a/paddlespeech/audio/utils/tensor_utils.py +++ b/paddlespeech/audio/utils/tensor_utils.py @@ -79,7 +79,7 @@ def pad_sequence(sequences: List[paddle.Tensor], # assuming trailing dimensions and type of all the Tensors # in sequences are same and fetching those from sequences[0] max_size = paddle.shape(sequences[0]) - # (TODO Hui Zhang): slice not supprot `end==start` + # (TODO Hui Zhang): slice not support `end==start` # trailing_dims = max_size[1:] trailing_dims = tuple( max_size[1:].numpy().tolist()) if sequences[0].ndim >= 2 else () @@ -93,7 +93,7 @@ def pad_sequence(sequences: List[paddle.Tensor], length = tensor.shape[0] # use index notation to prevent duplicate references to the tensor if batch_first: - # TODO (Hui Zhang): set_value op not supprot `end==start` + # TODO (Hui Zhang): set_value op not support `end==start` # TODO (Hui Zhang): set_value op not support int16 # TODO (Hui Zhang): set_varbase 2 rank not support [0,0,...] # out_tensor[i, :length, ...] = tensor @@ -102,7 +102,7 @@ def pad_sequence(sequences: List[paddle.Tensor], else: out_tensor[i, length] = tensor else: - # TODO (Hui Zhang): set_value op not supprot `end==start` + # TODO (Hui Zhang): set_value op not support `end==start` # out_tensor[:length, i, ...] = tensor if length != 0: out_tensor[:length, i] = tensor diff --git a/paddlespeech/dataset/aidatatang_200zh/aidatatang_200zh.py b/paddlespeech/dataset/aidatatang_200zh/aidatatang_200zh.py index 5d914a438..7250a50b9 100644 --- a/paddlespeech/dataset/aidatatang_200zh/aidatatang_200zh.py +++ b/paddlespeech/dataset/aidatatang_200zh/aidatatang_200zh.py @@ -62,7 +62,7 @@ def create_manifest(data_dir, manifest_path_prefix): if line == '': continue audio_id, text = line.split(' ', 1) - # remove withespace, charactor text + # remove withespace, character text text = ''.join(text.split()) transcript_dict[audio_id] = text diff --git a/paddlespeech/dataset/aishell/aishell.py b/paddlespeech/dataset/aishell/aishell.py index 7ea4d6766..f227cfbd5 100644 --- a/paddlespeech/dataset/aishell/aishell.py +++ b/paddlespeech/dataset/aishell/aishell.py @@ -65,7 +65,7 @@ def create_manifest(data_dir, manifest_path_prefix): if line == '': continue audio_id, text = line.split(' ', 1) - # remove withespace, charactor text + # remove withespace, character text text = ''.join(text.split()) transcript_dict[audio_id] = text @@ -159,7 +159,7 @@ def check_dataset(data_dir): if line == '': continue audio_id, text = line.split(' ', 1) - # remove withespace, charactor text + # remove withespace, character text text = ''.join(text.split()) transcript_dict[audio_id] = text diff --git a/paddlespeech/s2t/exps/hubert/model.py b/paddlespeech/s2t/exps/hubert/model.py index bc05921dd..c2bd63583 100644 --- a/paddlespeech/s2t/exps/hubert/model.py +++ b/paddlespeech/s2t/exps/hubert/model.py @@ -362,7 +362,7 @@ class HubertASRTrainer(Trainer): scratch = None if self.args.resume: # just restore ckpt - # lr will resotre from optimizer ckpt + # lr will restore from optimizer ckpt resume_json_path = os.path.join(self.checkpoint_dir, self.args.resume + '.json') with open(resume_json_path, 'r', encoding='utf8') as f: @@ -370,20 +370,20 @@ class HubertASRTrainer(Trainer): self.iteration = 0 self.epoch = resume_json["epoch"] - # resotre model from *.pdparams + # restore model from *.pdparams params_path = os.path.join(self.checkpoint_dir, "{}".format(self.epoch)) + '.pdparams' model_dict = paddle.load(params_path) self.model.set_state_dict(model_dict) - # resotre optimizer from *.pdopt + # restore optimizer from *.pdopt optimizer_path = os.path.join(self.checkpoint_dir, "{}".format(self.epoch)) + '.pdopt' optimizer_dict = paddle.load(optimizer_path) self.model_optimizer.set_state_dict(optimizer_dict['model']) self.hubert_optimizer.set_state_dict(optimizer_dict['hubert']) - # resotre lr_scheduler from *.pdlrs + # restore lr_scheduler from *.pdlrs scheduler_path = os.path.join(self.checkpoint_dir, "{}".format(self.epoch)) + '.pdlrs' if os.path.isfile(os.path.join(scheduler_path)): diff --git a/paddlespeech/s2t/exps/wav2vec2/model.py b/paddlespeech/s2t/exps/wav2vec2/model.py index 6c90f99e1..7ba86c774 100644 --- a/paddlespeech/s2t/exps/wav2vec2/model.py +++ b/paddlespeech/s2t/exps/wav2vec2/model.py @@ -361,7 +361,7 @@ class Wav2Vec2ASRTrainer(Trainer): scratch = None if self.args.resume: # just restore ckpt - # lr will resotre from optimizer ckpt + # lr will restore from optimizer ckpt resume_json_path = os.path.join(self.checkpoint_dir, self.args.resume + '.json') with open(resume_json_path, 'r', encoding='utf8') as f: @@ -369,20 +369,20 @@ class Wav2Vec2ASRTrainer(Trainer): self.iteration = 0 self.epoch = resume_json["epoch"] - # resotre model from *.pdparams + # restore model from *.pdparams params_path = os.path.join(self.checkpoint_dir, "{}".format(self.epoch)) + '.pdparams' model_dict = paddle.load(params_path) self.model.set_state_dict(model_dict) - # resotre optimizer from *.pdopt + # restore optimizer from *.pdopt optimizer_path = os.path.join(self.checkpoint_dir, "{}".format(self.epoch)) + '.pdopt' optimizer_dict = paddle.load(optimizer_path) self.model_optimizer.set_state_dict(optimizer_dict['model']) self.wav2vec2_optimizer.set_state_dict(optimizer_dict['wav2vec2']) - # resotre lr_scheduler from *.pdlrs + # restore lr_scheduler from *.pdlrs scheduler_path = os.path.join(self.checkpoint_dir, "{}".format(self.epoch)) + '.pdlrs' if os.path.isfile(os.path.join(scheduler_path)): diff --git a/paddlespeech/s2t/exps/wavlm/model.py b/paddlespeech/s2t/exps/wavlm/model.py index 606867eae..5dfbf3b2e 100644 --- a/paddlespeech/s2t/exps/wavlm/model.py +++ b/paddlespeech/s2t/exps/wavlm/model.py @@ -361,7 +361,7 @@ class WavLMASRTrainer(Trainer): scratch = None if self.args.resume: # just restore ckpt - # lr will resotre from optimizer ckpt + # lr will restore from optimizer ckpt resume_json_path = os.path.join(self.checkpoint_dir, self.args.resume + '.json') with open(resume_json_path, 'r', encoding='utf8') as f: @@ -369,20 +369,20 @@ class WavLMASRTrainer(Trainer): self.iteration = 0 self.epoch = resume_json["epoch"] - # resotre model from *.pdparams + # restore model from *.pdparams params_path = os.path.join(self.checkpoint_dir, "{}".format(self.epoch)) + '.pdparams' model_dict = paddle.load(params_path) self.model.set_state_dict(model_dict) - # resotre optimizer from *.pdopt + # restore optimizer from *.pdopt optimizer_path = os.path.join(self.checkpoint_dir, "{}".format(self.epoch)) + '.pdopt' optimizer_dict = paddle.load(optimizer_path) self.model_optimizer.set_state_dict(optimizer_dict['model']) self.wavlm_optimizer.set_state_dict(optimizer_dict['wavlm']) - # resotre lr_scheduler from *.pdlrs + # restore lr_scheduler from *.pdlrs scheduler_path = os.path.join(self.checkpoint_dir, "{}".format(self.epoch)) + '.pdlrs' if os.path.isfile(os.path.join(scheduler_path)): diff --git a/paddlespeech/s2t/training/trainer.py b/paddlespeech/s2t/training/trainer.py index a8f36f91b..3facddc0e 100644 --- a/paddlespeech/s2t/training/trainer.py +++ b/paddlespeech/s2t/training/trainer.py @@ -215,7 +215,7 @@ class Trainer(): checkpoint_path=self.args.checkpoint_path) if infos: # just restore ckpt - # lr will resotre from optimizer ckpt + # lr will restore from optimizer ckpt self.iteration = infos["step"] self.epoch = infos["epoch"] diff --git a/paddlespeech/s2t/utils/error_rate.py b/paddlespeech/s2t/utils/error_rate.py index 548376aa2..9e3357b91 100644 --- a/paddlespeech/s2t/utils/error_rate.py +++ b/paddlespeech/s2t/utils/error_rate.py @@ -171,7 +171,7 @@ def wer(reference, hypothesis, ignore_case=False, delimiter=' '): def cer(reference, hypothesis, ignore_case=False, remove_space=False): - """Calculate charactor error rate (CER). CER compares reference text and + """Calculate character error rate (CER). CER compares reference text and hypothesis text in char-level. CER is defined as: .. math:: diff --git a/paddlespeech/s2t/utils/tensor_utils.py b/paddlespeech/s2t/utils/tensor_utils.py index 0d91b9cfb..15f4abdda 100644 --- a/paddlespeech/s2t/utils/tensor_utils.py +++ b/paddlespeech/s2t/utils/tensor_utils.py @@ -80,7 +80,7 @@ def pad_sequence(sequences: List[paddle.Tensor], # assuming trailing dimensions and type of all the Tensors # in sequences are same and fetching those from sequences[0] max_size = paddle.shape(sequences[0]) - # (TODO Hui Zhang): slice not supprot `end==start` + # (TODO Hui Zhang): slice not support `end==start` # trailing_dims = max_size[1:] trailing_dims = tuple( max_size[1:].numpy().tolist()) if sequences[0].ndim >= 2 else () @@ -98,7 +98,7 @@ def pad_sequence(sequences: List[paddle.Tensor], f"length {length}, out_tensor {out_tensor.shape}, tensor {tensor.shape}" ) if batch_first: - # TODO (Hui Zhang): set_value op not supprot `end==start` + # TODO (Hui Zhang): set_value op not support `end==start` # TODO (Hui Zhang): set_value op not support int16 # TODO (Hui Zhang): set_varbase 2 rank not support [0,0,...] # out_tensor[i, :length, ...] = tensor @@ -107,7 +107,7 @@ def pad_sequence(sequences: List[paddle.Tensor], else: out_tensor[i, length] = tensor else: - # TODO (Hui Zhang): set_value op not supprot `end==start` + # TODO (Hui Zhang): set_value op not support `end==start` # out_tensor[:length, i, ...] = tensor if length != 0: out_tensor[:length, i] = tensor diff --git a/paddlespeech/server/utils/audio_handler.py b/paddlespeech/server/utils/audio_handler.py index 4df651337..08f15fe70 100644 --- a/paddlespeech/server/utils/audio_handler.py +++ b/paddlespeech/server/utils/audio_handler.py @@ -79,7 +79,7 @@ class ASRWsAudioHandler: punc_server_ip=None, punc_server_port=None): """PaddleSpeech Online ASR Server Client audio handler - Online asr server use the websocket protocal + Online asr server use the websocket protocol Args: url (str, optional): the server ip. Defaults to None. port (int, optional): the server port. Defaults to None. @@ -144,10 +144,10 @@ class ASRWsAudioHandler: logger.error("No asr server, please input valid ip and port") return "" - # 1. send websocket handshake protocal + # 1. send websocket handshake protocol start_time = time.time() async with websockets.connect(self.url) as ws: - # 2. server has already received handshake protocal + # 2. server has already received handshake protocol # client start to send the command audio_info = json.dumps( { @@ -255,7 +255,7 @@ class ASRHttpHandler: class TTSWsHandler: def __init__(self, server="127.0.0.1", port=8092, play: bool=False): """PaddleSpeech Online TTS Server Client audio handler - Online tts server use the websocket protocal + Online tts server use the websocket protocol Args: server (str, optional): the server ip. Defaults to "127.0.0.1". port (int, optional): the server port. Defaults to 8092. @@ -405,7 +405,7 @@ class TTSWsHandler: class TTSHttpHandler: def __init__(self, server="127.0.0.1", port=8092, play: bool=False): """PaddleSpeech Online TTS Server Client audio handler - Online tts server use the websocket protocal + Online tts server use the websocket protocol Args: server (str, optional): the server ip. Defaults to "127.0.0.1". port (int, optional): the server port. Defaults to 8092. diff --git a/paddlespeech/server/ws/asr_api.py b/paddlespeech/server/ws/asr_api.py index b3ad0b7c5..3f90ac3b4 100644 --- a/paddlespeech/server/ws/asr_api.py +++ b/paddlespeech/server/ws/asr_api.py @@ -31,7 +31,7 @@ async def websocket_endpoint(websocket: WebSocket): websocket (WebSocket): the websocket instance """ - #1. the interface wait to accept the websocket protocal header + #1. the interface wait to accept the websocket protocol header # and only we receive the header, it establish the connection with specific thread await websocket.accept() @@ -45,7 +45,7 @@ async def websocket_endpoint(websocket: WebSocket): connection_handler = None try: - #4. we do a loop to process the audio package by package according the protocal + #4. we do a loop to process the audio package by package according the protocol # and only if the client send finished signal, we will break the loop while True: # careful here, changed the source code from starlette.websockets diff --git a/paddlespeech/server/ws/tts_api.py b/paddlespeech/server/ws/tts_api.py index 275711f58..11194958c 100644 --- a/paddlespeech/server/ws/tts_api.py +++ b/paddlespeech/server/ws/tts_api.py @@ -32,7 +32,7 @@ async def websocket_endpoint(websocket: WebSocket): websocket (WebSocket): the websocket instance """ - #1. the interface wait to accept the websocket protocal header + #1. the interface wait to accept the websocket protocol header # and only we receive the header, it establish the connection with specific thread await websocket.accept() diff --git a/paddlespeech/t2s/frontend/zh_frontend.py b/paddlespeech/t2s/frontend/zh_frontend.py index 95c75a7f0..3c87c6827 100644 --- a/paddlespeech/t2s/frontend/zh_frontend.py +++ b/paddlespeech/t2s/frontend/zh_frontend.py @@ -523,7 +523,7 @@ class Frontend(): initials = [] finals = [] - # to charactor list + # to character list words = self._split_word_to_char(words[0]) for pinyin, char in zip(pinyin_spec, words): diff --git a/paddlespeech/t2s/models/vits/residual_coupling.py b/paddlespeech/t2s/models/vits/residual_coupling.py index afa6d1fa7..09916484d 100644 --- a/paddlespeech/t2s/models/vits/residual_coupling.py +++ b/paddlespeech/t2s/models/vits/residual_coupling.py @@ -76,7 +76,7 @@ class ResidualAffineCouplingBlock(nn.Layer): use_weight_norm (bool): Whether to use weight normalization in WaveNet. bias (bool): - Whether to use bias paramters in WaveNet. + Whether to use bias parameters in WaveNet. use_only_mean (bool): Whether to estimate only mean. @@ -169,7 +169,7 @@ class ResidualAffineCouplingLayer(nn.Layer): use_weight_norm (bool): Whether to use weight normalization in WaveNet. bias (bool): - Whether to use bias paramters in WaveNet. + Whether to use bias parameters in WaveNet. use_only_mean (bool): Whether to estimate only mean. diff --git a/paddlespeech/t2s/utils/error_rate.py b/paddlespeech/t2s/utils/error_rate.py index 76a4f45be..1298680ed 100644 --- a/paddlespeech/t2s/utils/error_rate.py +++ b/paddlespeech/t2s/utils/error_rate.py @@ -159,7 +159,7 @@ def wer(reference, hypothesis, ignore_case=False, delimiter=' '): def cer(reference, hypothesis, ignore_case=False, remove_space=False): - """Calculate charactor error rate (CER). CER compares reference text and + """Calculate character error rate (CER). CER compares reference text and hypothesis text in char-level. CER is defined as: .. math:: CER = (Sc + Dc + Ic) / Nc diff --git a/runtime/engine/asr/decoder/ctc_prefix_beam_search_decoder.cc b/runtime/engine/asr/decoder/ctc_prefix_beam_search_decoder.cc index bf912af2e..1bda492c8 100644 --- a/runtime/engine/asr/decoder/ctc_prefix_beam_search_decoder.cc +++ b/runtime/engine/asr/decoder/ctc_prefix_beam_search_decoder.cc @@ -157,7 +157,7 @@ void CTCPrefixBeamSearch::AdvanceDecoding( next_score.v_b = prefix_score.ViterbiScore() + prob; next_score.times_b = prefix_score.Times(); - // Prefix not changed, copy the context from pefix + // Prefix not changed, copy the context from prefix if (context_graph_ && !next_score.has_context) { next_score.CopyContext(prefix_score); next_score.has_context = true; @@ -183,7 +183,7 @@ void CTCPrefixBeamSearch::AdvanceDecoding( } } - // Prefix not changed, copy the context from pefix + // Prefix not changed, copy the context from prefix if (context_graph_ && !next_score1.has_context) { next_score1.CopyContext(prefix_score); next_score1.has_context = true; diff --git a/runtime/engine/common/frontend/cmvn.cc b/runtime/engine/common/frontend/cmvn.cc index 0f1108208..bfb02a840 100644 --- a/runtime/engine/common/frontend/cmvn.cc +++ b/runtime/engine/common/frontend/cmvn.cc @@ -72,7 +72,7 @@ bool CMVN::Read(std::vector* feats) { return false; } - // appply cmvn + // apply cmvn kaldi::Timer timer; Compute(feats); VLOG(1) << "CMVN::Read cost: " << timer.Elapsed() << " sec."; diff --git a/runtime/engine/common/frontend/cmvn.h b/runtime/engine/common/frontend/cmvn.h index c515b6aeb..2d8917d95 100644 --- a/runtime/engine/common/frontend/cmvn.h +++ b/runtime/engine/common/frontend/cmvn.h @@ -29,7 +29,7 @@ class CMVN : public FrontendInterface { // the length of feats = feature_row * feature_dim, // the Matrix is squashed into Vector virtual bool Read(std::vector* feats); - // the dim_ is the feautre dim. + // the dim_ is the feature dim. virtual size_t Dim() const { return dim_; } virtual void SetFinished() { base_extractor_->SetFinished(); } virtual bool IsFinished() const { return base_extractor_->IsFinished(); } diff --git a/runtime/engine/common/frontend/db_norm.h b/runtime/engine/common/frontend/db_norm.h index 425971437..e9f8b6995 100644 --- a/runtime/engine/common/frontend/db_norm.h +++ b/runtime/engine/common/frontend/db_norm.h @@ -47,7 +47,7 @@ class DecibelNormalizer : public FrontendInterface { std::unique_ptr base_extractor); virtual void Accept(const kaldi::VectorBase& waves); virtual bool Read(kaldi::Vector* waves); - // noramlize audio, the dim is 1. + // normalize audio, the dim is 1. virtual size_t Dim() const { return dim_; } virtual void SetFinished() { base_extractor_->SetFinished(); } virtual bool IsFinished() const { return base_extractor_->IsFinished(); } diff --git a/runtime/engine/common/matrix/kaldi-matrix.cc b/runtime/engine/common/matrix/kaldi-matrix.cc index 6f65fb0a0..65e8e09a6 100644 --- a/runtime/engine/common/matrix/kaldi-matrix.cc +++ b/runtime/engine/common/matrix/kaldi-matrix.cc @@ -244,8 +244,8 @@ void MatrixBase::SymAddMat2(const Real alpha, /// function will produce NaN in the output. This is a bug in the /// ATLAS library. To overcome this, the AddMatMat function, which calls /// cblas_Xgemm(...) rather than cblas_Xsyrk(...), is used in this special - /// sitation. - /// Wei Shi: Note this bug is observerd for single precision matrix + /// situation. + /// Wei Shi: Note this bug is observed for single precision matrix /// on a 64-bit machine #ifdef HAVE_ATLAS if (transA == kTrans && num_rows_ >= 56) { @@ -683,7 +683,7 @@ empty. if (V_in == NULL) tmpV.Resize(1, this->num_cols_); // work-space if V_in empty. - /// Impementation notes: + /// Implementation notes: /// Lapack works in column-order, therefore the dimensions of *this are /// swapped as well as the U and V matrices. @@ -2378,7 +2378,7 @@ bool ReadHtk(std::istream &is, Matrix *M_ptr, HtkHeader *header_ptr) Matrix &M = *M_ptr; HtkHeader htk_hdr; - // TODO(arnab): this fails if the HTK file has CRC cheksum or is compressed. + // TODO(arnab): this fails if the HTK file has CRC checksum or is compressed. is.read((char*)&htk_hdr, sizeof(htk_hdr)); // we're being really POSIX here! if (is.fail()) { KALDI_WARN << "Could not read header from HTK feature file "; diff --git a/runtime/engine/common/matrix/kaldi-vector.cc b/runtime/engine/common/matrix/kaldi-vector.cc index 3ab9a7ffa..790ebe128 100644 --- a/runtime/engine/common/matrix/kaldi-vector.cc +++ b/runtime/engine/common/matrix/kaldi-vector.cc @@ -235,7 +235,7 @@ void VectorBase::CopyRowsFromMat(const MatrixBase &mat) { memcpy(inc_data, mat.Data(), cols * rows * sizeof(Real)); } else { for (MatrixIndexT i = 0; i < rows; i++) { - // copy the data to the propper position + // copy the data to the proper position memcpy(inc_data, mat.RowData(i), cols * sizeof(Real)); // set new copy position inc_data += cols; diff --git a/runtime/engine/common/utils/file_utils.cc b/runtime/engine/common/utils/file_utils.cc index 385f2b656..59bb64482 100644 --- a/runtime/engine/common/utils/file_utils.cc +++ b/runtime/engine/common/utils/file_utils.cc @@ -44,7 +44,7 @@ std::string ReadFile2String(const std::string& path) { } bool FileExists(const std::string& strFilename) { - // this funciton if from: + // this function if from: // https://github.com/kaldi-asr/kaldi/blob/master/src/fstext/deterministic-fst-test.cc struct stat stFileInfo; bool blnReturn; diff --git a/runtime/engine/kaldi/lat/kaldi-lattice.cc b/runtime/engine/kaldi/lat/kaldi-lattice.cc index 744cc5384..0bd291ee1 100644 --- a/runtime/engine/kaldi/lat/kaldi-lattice.cc +++ b/runtime/engine/kaldi/lat/kaldi-lattice.cc @@ -407,7 +407,7 @@ bool WriteLattice(std::ostream &os, bool binary, const Lattice &t) { if (os.fail()) KALDI_WARN << "Stream failure detected."; // Write another newline as a terminating character. The read routine will - // detect this [this is a Kaldi mechanism, not somethig in the original + // detect this [this is a Kaldi mechanism, not something in the original // OpenFst code]. os << '\n'; return os.good(); diff --git a/runtime/examples/README.md b/runtime/examples/README.md index de27bd94b..6d316d649 100644 --- a/runtime/examples/README.md +++ b/runtime/examples/README.md @@ -34,7 +34,7 @@ bash run.sh --stop_stage 4 ## Display Model with [Netron](https://github.com/lutzroeder/netron) -If you have a model, we can using this commnd to show model graph. +If you have a model, we can using this commend to show model graph. For example: ``` diff --git a/runtime/examples/audio_classification/README.md b/runtime/examples/audio_classification/README.md index 6d7a37423..7fb8d611a 100644 --- a/runtime/examples/audio_classification/README.md +++ b/runtime/examples/audio_classification/README.md @@ -74,7 +74,7 @@ includes/ #### set path push resource into android phone -1. change resource path in conf to gloabal path, such as: +1. change resource path in conf to global path, such as: [CONF] wav_normal=true @@ -92,9 +92,9 @@ push resource into android phone high_freq=14000 dither=0.0 2. adb push conf label_list scp test.wav /data/local/tmp/ -3. set reource path in android demo(android_demo/app/src/main/cpp/native-lib.cpp) to actual path, such as: +3. set resource path in android demo(android_demo/app/src/main/cpp/native-lib.cpp) to actual path, such as: std::string conf_path = "/data/local/tmp/conf"; std::string wav_path = "/data/local/tmp/test.wav"; -4. excecute android_demo in android studio +4. execute android_demo in android studio diff --git a/runtime/examples/text_lm/local/mmseg.py b/runtime/examples/text_lm/local/mmseg.py index 74295cd3c..4d72afd39 100755 --- a/runtime/examples/text_lm/local/mmseg.py +++ b/runtime/examples/text_lm/local/mmseg.py @@ -156,8 +156,8 @@ class Analysis: return self.text[self.pos] #判断该字符是否是中文字符(不包括中文标点) - def isChineseChar(self, charater): - return 0x4e00 <= ord(charater) < 0x9fa6 + def isChineseChar(self, character): + return 0x4e00 <= ord(character) < 0x9fa6 #判断是否是ASCII码 def isASCIIChar(self, ch): @@ -253,7 +253,6 @@ class Analysis: # print(word3.length, word3.text) if word3.length == -1: chunk = Chunk(word1, word2) - # print("Ture") else: chunk = Chunk(word1, word2, word3) chunks.append(chunk) diff --git a/runtime/patch/openfst/src/include/fst/flags.h b/runtime/patch/openfst/src/include/fst/flags.h index b5ec8ff74..54dd30cc2 100644 --- a/runtime/patch/openfst/src/include/fst/flags.h +++ b/runtime/patch/openfst/src/include/fst/flags.h @@ -181,8 +181,8 @@ template class FlagRegisterer { public: FlagRegisterer(const string &name, const FlagDescription &desc) { - auto registr = FlagRegister::GetRegister(); - registr->SetDescription(name, desc); + auto r = FlagRegister::GetRegister(); + r->SetDescription(name, desc); } private: diff --git a/tests/test_tipc/conformer/scripts/aishell_tiny.py b/tests/test_tipc/conformer/scripts/aishell_tiny.py index c87463b50..27b713a55 100644 --- a/tests/test_tipc/conformer/scripts/aishell_tiny.py +++ b/tests/test_tipc/conformer/scripts/aishell_tiny.py @@ -62,7 +62,7 @@ def create_manifest(data_dir, manifest_path_prefix): if line == '': continue audio_id, text = line.split(' ', 1) - # remove withespace, charactor text + # remove withespace, character text text = ''.join(text.split()) transcript_dict[audio_id] = text diff --git a/tests/unit/cli/aishell_test_prepare.py b/tests/unit/cli/aishell_test_prepare.py index c364e4fd9..ef582426c 100644 --- a/tests/unit/cli/aishell_test_prepare.py +++ b/tests/unit/cli/aishell_test_prepare.py @@ -63,7 +63,7 @@ def create_manifest(data_dir, manifest_path_prefix): if line == '': continue audio_id, text = line.split(' ', 1) - # remove withespace, charactor text + # remove withespace, character text text = ''.join(text.split()) transcript_dict[audio_id] = text diff --git a/tests/unit/server/offline/test_server_client.sh b/tests/unit/server/offline/test_server_client.sh index 6418c82fd..26fb100a3 100644 --- a/tests/unit/server/offline/test_server_client.sh +++ b/tests/unit/server/offline/test_server_client.sh @@ -66,8 +66,8 @@ config_file=./conf/application.yaml server_ip=$(cat $config_file | grep "host" | awk -F " " '{print $2}') port=$(cat $config_file | grep "port" | awk '/port:/ {print $2}') -echo "Sevice ip: $server_ip" | tee ./log/test_result.log -echo "Sevice port: $port" | tee -a ./log/test_result.log +echo "Service ip: $server_ip" | tee ./log/test_result.log +echo "Service port: $port" | tee -a ./log/test_result.log # whether a process is listening on $port pid=`lsof -i :"$port"|grep -v "PID" | awk '{print $2}'` @@ -190,7 +190,7 @@ echo "************************************************************************** echo "All tests completed." | tee -a ./log/test_result.log -# sohw all the test results +# show all the test results echo "***************** Here are all the test results ********************" cat ./log/test_result.log diff --git a/tests/unit/server/online/tts/check_server/test.sh b/tests/unit/server/online/tts/check_server/test.sh index c62c54c76..998a07b3f 100644 --- a/tests/unit/server/online/tts/check_server/test.sh +++ b/tests/unit/server/online/tts/check_server/test.sh @@ -76,8 +76,8 @@ config_file=./conf/application.yaml server_ip=$(cat $config_file | grep "host" | awk -F " " '{print $2}') port=$(cat $config_file | grep "port" | awk '/port:/ {print $2}') -echo "Sevice ip: $server_ip" | tee $log/test_result.log -echo "Sevice port: $port" | tee -a $log/test_result.log +echo "Service ip: $server_ip" | tee $log/test_result.log +echo "Service port: $port" | tee -a $log/test_result.log # whether a process is listening on $port pid=`lsof -i :"$port"|grep -v "PID" | awk '{print $2}'` @@ -307,7 +307,7 @@ echo "************************************************************************** echo "All tests completed." | tee -a $log/test_result.log -# sohw all the test results +# show all the test results echo "***************** Here are all the test results ********************" cat $log/test_result.log diff --git a/tests/unit/tts/test_snapshot.py b/tests/unit/tts/test_snapshot.py index fb18c7d78..750e6b68d 100644 --- a/tests/unit/tts/test_snapshot.py +++ b/tests/unit/tts/test_snapshot.py @@ -30,7 +30,7 @@ def _test_snapshot(): # use a simplest iterable object as dataloader dataloader = count() - # hack the training proecss: training does nothing except increse iteration + # hack the training proecss: training does nothing except increase iteration updater = StandardUpdater(model, optimizer, dataloader=dataloader) updater.update_core = lambda x: None diff --git a/tools/extras/install_liblbfgs.sh b/tools/extras/install_liblbfgs.sh index 8d6ae4ab7..0148bd841 100755 --- a/tools/extras/install_liblbfgs.sh +++ b/tools/extras/install_liblbfgs.sh @@ -17,13 +17,13 @@ cd liblbfgs-$VER ./configure --prefix=`pwd` make # due to the liblbfgs project directory structure, we have to use -i -# but the erros are completely harmless +# but the errors are completely harmless make -i install cd .. ( [ ! -z "${LIBLBFGS}" ] && \ - echo >&2 "LIBLBFGS variable is aleady defined. Undefining..." && \ + echo >&2 "LIBLBFGS variable is already defined. Undefining..." && \ unset LIBLBFGS [ -f ./env.sh ] && . ./env.sh diff --git a/tools/extras/install_srilm.sh b/tools/extras/install_srilm.sh index f359e70ce..fdbcf5d97 100755 --- a/tools/extras/install_srilm.sh +++ b/tools/extras/install_srilm.sh @@ -68,7 +68,7 @@ make || exit cd .. ( [ ! -z "${SRILM}" ] && \ - echo >&2 "SRILM variable is aleady defined. Undefining..." && \ + echo >&2 "SRILM variable is already defined. Undefining..." && \ unset SRILM [ -f ./env.sh ] && . ./env.sh diff --git a/utils/format_triplet_data.py b/utils/format_triplet_data.py index e9a0cf54c..029ea2d9b 100755 --- a/utils/format_triplet_data.py +++ b/utils/format_triplet_data.py @@ -44,7 +44,7 @@ add_arg('manifest_paths', str, # bpe add_arg('spm_model_prefix', str, None, "spm model prefix, spm_model_%(bpe_mode)_%(count_threshold), only need when `unit_type` is spm") -add_arg('output_path', str, None, "filepath of formated manifest.", required=True) +add_arg('output_path', str, None, "filepath of formatted manifest.", required=True) # yapf: disable args = parser.parse_args() diff --git a/utils/fst/ctc_token_fst.py b/utils/fst/ctc_token_fst.py index f63e9cdac..85974f27f 100755 --- a/utils/fst/ctc_token_fst.py +++ b/utils/fst/ctc_token_fst.py @@ -32,7 +32,7 @@ def main(args): # leaving `token` print('{} {} {} {}'.format(node, 2, '', '')) node += 1 - # Fianl node + # Final node print('0') diff --git a/utils/fst/make_tlg.sh b/utils/fst/make_tlg.sh index c68387af9..944b8b1f3 100755 --- a/utils/fst/make_tlg.sh +++ b/utils/fst/make_tlg.sh @@ -21,7 +21,7 @@ cp -r $src_lang $tgt_lang # eps2disambig.pl: replace epsilons on the input side with the special disambiguation symbol #0. # s2eps.pl: replaces and with (on both input and output sides), for the G.fst acceptor. # G.fst, the disambiguation symbol #0 only appears on the input side -# do eps2disambig.pl and s2eps.pl maybe just for fallowing `fstrmepsilon`. +# do eps2disambig.pl and s2eps.pl maybe just for following `fstrmepsilon`. cat $arpa_lm | \ grep -v ' ' | \ grep -v ' ' | \ diff --git a/utils/generate_infer_yaml.py b/utils/generate_infer_yaml.py index ca8d6b60d..bd45a1bbd 100755 --- a/utils/generate_infer_yaml.py +++ b/utils/generate_infer_yaml.py @@ -3,7 +3,7 @@ ''' Merge training configs into a single inference config. The single inference config is for CLI, which only takes a single config to do inferencing. - The trainig configs includes: model config, preprocess config, decode config, vocab file and cmvn file. + The training configs includes: model config, preprocess config, decode config, vocab file and cmvn file. Process: # step 1: prepare dir @@ -11,7 +11,7 @@ cp -r exp conf data release_dir cd release_dir - # step 2: get "model.yaml" which conatains all configuration info. + # step 2: get "model.yaml" which contains all configuration info. # if does not contain preprocess.yaml file. e.g ds2: python generate_infer_yaml.py --cfg_pth conf/deepspeech2_online.yaml --dcd_pth conf/tuning/chunk_decode.yaml --vb_pth data/lang_char/vocab.txt --cmvn_pth data/mean_std.json --save_pth model.yaml --pre_pth null # if contains preprocess.yaml file. e.g u2: diff --git a/utils/tokenizer.perl b/utils/tokenizer.perl index 836fe19c6..babf81886 100644 --- a/utils/tokenizer.perl +++ b/utils/tokenizer.perl @@ -79,7 +79,7 @@ if ($HELP) print " -b ... disable Perl buffering.\n"; print " -time ... enable processing time calculation.\n"; print " -penn ... use Penn treebank-like tokenization.\n"; - print " -protected FILE ... specify file with patters to be protected in tokenisation.\n"; + print " -protected FILE ... specify file with patterns to be protected in tokenisation.\n"; print " -no-escape ... don't perform HTML escaping on apostrophy, quotes, etc.\n"; exit; } diff --git a/utils/train_arpa_with_kenlm.sh b/utils/train_arpa_with_kenlm.sh index 8af646ceb..b435239af 100755 --- a/utils/train_arpa_with_kenlm.sh +++ b/utils/train_arpa_with_kenlm.sh @@ -37,7 +37,7 @@ fi # the text should be properly pre-processed, e.g: # cleand, normalized and possibly word-segmented -# get rid off irrelavent symbols +# get rid off irrelevant symbols grep -v '' $symbol_table \ | grep -v '#0' \ | grep -v '' | grep -v '' \ @@ -51,7 +51,7 @@ grep -v '' $symbol_table \ # # TL;DR reason: # Unlike SRILM's -limit-vocab, kenlm's --limit_vocab_file option -# spcifies a *valid* set of vocabulary, whereas *valid but unseen* +# specifies a *valid* set of vocabulary, whereas *valid but unseen* # words are discarded in final arpa. # So the trick is, # we explicitly add kaldi's vocab(one word per line) to training text, diff --git a/utils/zh_tn.py b/utils/zh_tn.py index 6fee626bd..4bb684a1e 100755 --- a/utils/zh_tn.py +++ b/utils/zh_tn.py @@ -1288,7 +1288,7 @@ def normalize_corpus(corpus, def char_token(s: Text) -> List[Text]: - """chinese charactor + """chinese character Args: s (Text): "我爱中国“