diff --git a/.notebook/python_test.ipynb b/.notebook/python_test.ipynb index af55de5a4..5617c8765 100644 --- a/.notebook/python_test.ipynb +++ b/.notebook/python_test.ipynb @@ -637,7 +637,7 @@ { "cell_type": "code", "execution_count": 59, - "id": "threaded-grove", + "id": "legitimate-overhead", "metadata": {}, "outputs": [ { @@ -660,7 +660,7 @@ { "cell_type": "code", "execution_count": 35, - "id": "equal-vanilla", + "id": "genuine-feeding", "metadata": {}, "outputs": [ { @@ -705,7 +705,7 @@ { "cell_type": "code", "execution_count": 36, - "id": "gorgeous-stanford", + "id": "bizarre-story", "metadata": {}, "outputs": [ { @@ -728,7 +728,7 @@ { "cell_type": "code", "execution_count": 37, - "id": "geological-actor", + "id": "appointed-brooklyn", "metadata": {}, "outputs": [ { @@ -748,7 +748,7 @@ { "cell_type": "code", "execution_count": 38, - "id": "miniature-ethnic", + "id": "occasional-utilization", "metadata": {}, "outputs": [], "source": [ @@ -758,7 +758,7 @@ { "cell_type": "code", "execution_count": 40, - "id": "honest-clarity", + "id": "trained-indonesian", "metadata": {}, "outputs": [ { @@ -776,7 +776,7 @@ { "cell_type": "code", "execution_count": 54, - "id": "environmental-stewart", + "id": "following-brave", "metadata": {}, "outputs": [ { @@ -809,7 +809,7 @@ { "cell_type": "code", "execution_count": 42, - "id": "trying-brazil", + "id": "prospective-blind", "metadata": {}, "outputs": [ { @@ -839,7 +839,7 @@ { "cell_type": "code", "execution_count": 43, - "id": "chronic-interval", + "id": "minus-ethernet", "metadata": {}, "outputs": [ { @@ -868,7 +868,7 @@ { "cell_type": "code", "execution_count": 44, - "id": "widespread-basin", + "id": "ordinary-closer", "metadata": {}, "outputs": [ { @@ -900,7 +900,7 @@ { "cell_type": "code", "execution_count": 45, - "id": "clinical-lighting", + "id": "demographic-mumbai", "metadata": {}, "outputs": [], "source": [ @@ -911,7 +911,7 @@ { "cell_type": "code", "execution_count": 46, - "id": "federal-supervision", + "id": "conscious-stuff", "metadata": {}, "outputs": [ { @@ -931,7 +931,7 @@ { "cell_type": "code", "execution_count": 30, - "id": "parallel-trademark", + "id": "virgin-dublin", "metadata": {}, "outputs": [], "source": [ @@ -941,7 +941,7 @@ { "cell_type": "code", "execution_count": 31, - "id": "extended-fishing", + "id": "sized-homework", "metadata": {}, "outputs": [], "source": [ @@ -951,7 +951,7 @@ { "cell_type": "code", "execution_count": 47, - "id": "baking-auckland", + "id": "disciplinary-headquarters", "metadata": {}, "outputs": [ { @@ -969,10 +969,103 @@ "np.allclose(x, samples)" ] }, + { + "cell_type": "code", + "execution_count": 57, + "id": "persistent-synthetic", + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "import random" + ] + }, + { + "cell_type": "code", + "execution_count": 56, + "id": "hydraulic-reach", + "metadata": {}, + "outputs": [], + "source": [ + "np.random.uniform?" + ] + }, + { + "cell_type": "code", + "execution_count": 58, + "id": "suitable-house", + "metadata": {}, + "outputs": [], + "source": [ + "random.uniform?" + ] + }, + { + "cell_type": "code", + "execution_count": 60, + "id": "printable-carter", + "metadata": {}, + "outputs": [], + "source": [ + "np.random.RandomState?" + ] + }, + { + "cell_type": "code", + "execution_count": 66, + "id": "considered-interval", + "metadata": {}, + "outputs": [], + "source": [ + "random.sample?" + ] + }, + { + "cell_type": "code", + "execution_count": 95, + "id": "ideal-hurricane", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array(['3', '5'], dtype=' 0: + self.n_time_masks = 0 + logger.info('n_time_masks is set ot zero for adaptive SpecAugment.') + if adaptive_size_ratio > 0: + self.T = 0 + logger.info('T is set to zero for adaptive SpecAugment.') + + self._freq_mask = None + self._time_mask = None + + def librispeech_basic(self): + self.W = 80 + self.F = 27 + self.T = 100 + self.n_freq_masks = 1 + self.n_time_masks = 1 + self.p = 1.0 + + def librispeech_double(self): + self.W = 80 + self.F = 27 + self.T = 100 + self.n_freq_masks = 2 + self.n_time_masks = 2 + self.p = 1.0 + + def switchboard_mild(self): + self.W = 40 + self.F = 15 + self.T = 70 + self.n_freq_masks = 2 + self.n_time_masks = 2 + self.p = 0.2 + + def switchboard_strong(self): + self.W = 40 + self.F = 27 + self.T = 70 + self.n_freq_masks = 2 + self.n_time_masks = 2 + self.p = 0.2 + + @property + def freq_mask(self): + return self._freq_mask + + @property + def time_mask(self): + return self._time_mask + + def time_warp(xs, W=40): + raise NotImplementedError + + def mask_freq(self, xs, replace_with_zero=False): + n_bins = xs.shape[0] + for i in range(0, self.n_freq_masks): + f = int(self._rng.uniform(low=0, high=self.F)) + f_0 = int(self._rng.uniform(low=0, high=n_bins - f)) + xs[f_0:f_0 + f, :] = 0 + assert f_0 <= f_0 + f + self._freq_mask = (f_0, f_0 + f) + return xs + + def mask_time(self, xs, replace_with_zero=False): + n_frames = xs.shape[1] + + if self.adaptive_number_ratio > 0: + n_masks = int(n_frames * self.adaptive_number_ratio) + n_masks = min(n_masks, self.max_n_time_masks) + else: + n_masks = self.n_time_masks + + if self.adaptive_size_ratio > 0: + T = self.adaptive_size_ratio * n_frames + else: + T = self.T + + for i in range(n_masks): + t = int(self._rng.uniform(low=0, high=T)) + t = min(t, int(n_frames * self.p)) + t_0 = int(self._rng.uniform(low=0, high=n_frames - t)) + xs[:, t_0:t_0 + t] = 0 + assert t_0 <= t_0 + t + self._time_mask = (t_0, t_0 + t) + return xs + + def transform_feature(self, xs: np.ndarray): + """ + Args: + xs (FloatTensor): `[F, T]` + Returns: + xs (FloatTensor): `[F, T]` + """ + # xs = self.time_warp(xs) + xs = self.mask_freq(xs) + xs = self.mask_time(xs) + return xs diff --git a/deepspeech/io/dataset.py b/deepspeech/io/dataset.py index 4550b058a..2bd8ddb8a 100644 --- a/deepspeech/io/dataset.py +++ b/deepspeech/io/dataset.py @@ -192,7 +192,7 @@ class ManifestDataset(Dataset): self._normalizer = FeatureNormalizer( mean_std_filepath) if mean_std_filepath else None - self._audio_augmentation_pipeline = AugmentationPipeline( + self._augmentation_pipeline = AugmentationPipeline( augmentation_config=augmentation_config, random_seed=random_seed) self._speech_featurizer = SpeechFeaturizer( unit_type=unit_type, @@ -295,11 +295,14 @@ class ManifestDataset(Dataset): self._subfile_from_tar(audio_file), transcript) else: speech_segment = SpeechSegment.from_file(audio_file, transcript) - self._audio_augmentation_pipeline.transform_audio(speech_segment) + # audio augment + self._augmentation_pipeline.transform_audio(speech_segment) specgram, transcript_part = self._speech_featurizer.featurize( speech_segment, self._keep_transcription_text) if self._normalizer: specgram = self._normalizer.apply(specgram) + # specgram augment + specgram = self._augmentation_pipeline.transform_feature(specgram) return specgram, transcript_part def _instance_reader_creator(self, manifest): diff --git a/docs/src/augmentation.md b/docs/src/augmentation.md index e4b7c0012..6df8061d7 100644 --- a/docs/src/augmentation.md +++ b/docs/src/augmentation.md @@ -5,6 +5,7 @@ Data augmentation has often been a highly effective technique to boost the deep Six optional augmentation components are provided to be selected, configured and inserted into the processing pipeline. +* Audio - Volume Perturbation - Speed Perturbation - Shifting Perturbation @@ -12,6 +13,10 @@ Six optional augmentation components are provided to be selected, configured and - Noise Perturbation (need background noise audio files) - Impulse Response (need impulse audio files) +* Feature + - SpecAugment + - Adaptive SpecAugment + In order to inform the trainer of what augmentation components are needed and what their processing orders are, it is required to prepare in advance an *augmentation configuration file* in [JSON](http://www.json.org/) format. For example: ``` @@ -31,6 +36,6 @@ In order to inform the trainer of what augmentation components are needed and wh When the `augment_conf_file` argument is set to the path of the above example configuration file, every audio clip in every epoch will be processed: with 60% of chance, it will first be speed perturbed with a uniformly random sampled speed-rate between 0.95 and 1.05, and then with 80% of chance it will be shifted in time with a random sampled offset between -5 ms and 5 ms. Finally this newly synthesized audio clip will be feed into the feature extractor for further training. -For other configuration examples, please refer to `examples/conf/augmentation.config.example`. +For other configuration examples, please refer to `examples/conf/augmentation.example.json`. Be careful when utilizing the data augmentation technique, as improper augmentation will do harm to the training, due to the enlarged train-test gap. diff --git a/docs/src/data_preparation.md b/docs/src/data_preparation.md index 7b6142bde..a3d1b3eb4 100644 --- a/docs/src/data_preparation.md +++ b/docs/src/data_preparation.md @@ -40,4 +40,4 @@ python3 utils/build_vocab.py \ --manifest_paths examples/librispeech/data/manifest.train ``` -It will write a vocabuary file `examples/librispeech/data/eng_vocab.txt` with all transcription text in `examples/librispeech/data/manifest.train`, without vocabulary truncation (`--count_threshold 0`). +It will write a vocabuary file `examples/librispeech/data/vocab.txt` with all transcription text in `examples/librispeech/data/manifest.train`, without vocabulary truncation (`--count_threshold 0`). diff --git a/docs/src/install.md b/docs/src/install.md index bd4d5a432..72b7b6988 100644 --- a/docs/src/install.md +++ b/docs/src/install.md @@ -8,10 +8,10 @@ To avoid the trouble of environment setup, [running in Docker container](#runnin ## Setup -- Make sure these libraries or tools installed: `pkg-config`, `flac`, `ogg`, `vorbis`, `boost` and `swig`, e.g. installing them via `apt-get`: +- Make sure these libraries or tools installed: `pkg-config`, `flac`, `ogg`, `vorbis`, `boost`, `sox, and `swig`, e.g. installing them via `apt-get`: ```bash -sudo apt-get install -y pkg-config libflac-dev libogg-dev libvorbis-dev libboost-dev swig python3-dev +sudo apt-get install -y sox pkg-config libflac-dev libogg-dev libvorbis-dev libboost-dev swig python3-dev ``` or, installing them via `yum`: diff --git a/examples/aishell/s0/conf/augmentation.config b/examples/aishell/s0/conf/augmentation.config deleted file mode 100644 index 6c24da549..000000000 --- a/examples/aishell/s0/conf/augmentation.config +++ /dev/null @@ -1,8 +0,0 @@ -[ - { - "type": "shift", - "params": {"min_shift_ms": -5, - "max_shift_ms": 5}, - "prob": 1.0 - } -] diff --git a/examples/aishell/s0/conf/augmentation.json b/examples/aishell/s0/conf/augmentation.json new file mode 100644 index 000000000..a1a759e67 --- /dev/null +++ b/examples/aishell/s0/conf/augmentation.json @@ -0,0 +1,10 @@ +[ + { + "type": "shift", + "params": { + "min_shift_ms": -5, + "max_shift_ms": 5 + }, + "prob": 1.0 + } +] diff --git a/examples/aishell/s0/conf/deepspeech2.yaml b/examples/aishell/s0/conf/deepspeech2.yaml index 821c183e5..e06ae0239 100644 --- a/examples/aishell/s0/conf/deepspeech2.yaml +++ b/examples/aishell/s0/conf/deepspeech2.yaml @@ -5,7 +5,7 @@ data: test_manifest: data/manifest.test mean_std_filepath: data/mean_std.npz vocab_filepath: data/vocab.txt - augmentation_config: conf/augmentation.config + augmentation_config: conf/augmentation.json batch_size: 64 # one gpu max_duration: 27.0 min_duration: 0.0 diff --git a/examples/aishell/s1/conf/augmentation.json b/examples/aishell/s1/conf/augmentation.json new file mode 100644 index 000000000..aa16afb2e --- /dev/null +++ b/examples/aishell/s1/conf/augmentation.json @@ -0,0 +1,34 @@ +[ + { + "type": "speed", + "params": { + "min_speed_rate": 0.9, + "max_speed_rate": 1.1, + "num_rates": 3 + }, + "prob": 1.0 + }, + { + "type": "shift", + "params": { + "min_shift_ms": -5, + "max_shift_ms": 5 + }, + "prob": 1.0 + }, + { + "type": "specaug", + "params": { + "F": 10, + "T": 50, + "n_freq_masks": 2, + "n_time_masks": 2, + "p": 1.0, + "W": 80, + "adaptive_number_ratio": 0, + "adaptive_size_ratio": 0, + "max_n_time_masks": 20 + }, + "prob": 0.0 + } +] diff --git a/examples/aishell/s1/conf/conformer.yaml b/examples/aishell/s1/conf/conformer.yaml new file mode 100644 index 000000000..fced75d7e --- /dev/null +++ b/examples/aishell/s1/conf/conformer.yaml @@ -0,0 +1,110 @@ +# https://yaml.org/type/float.html +data: + train_manifest: data/manifest.train + dev_manifest: data/manifest.dev + test_manifest: data/manifest.test + vocab_filepath: data/vocab.txt + unit_type: 'char' + spm_model_prefix: '' + mean_std_filepath: "" + augmentation_config: conf/augmentation.json + batch_size: 16 + min_input_len: 0.5 + max_input_len: 20.0 + min_output_len: 0.0 + max_output_len: 400 + min_output_input_ratio: 0.05 + max_output_input_ratio: 10.0 + raw_wav: True # use raw_wav or kaldi feature + specgram_type: fbank #linear, mfcc, fbank + feat_dim: 80 + delta_delta: False + target_sample_rate: 16000 + max_freq: None + n_fft: None + stride_ms: 10.0 + window_ms: 25.0 + use_dB_normalization: True + target_dB: -20 + random_seed: 0 + keep_transcription_text: False + sortagrad: True + shuffle_method: batch_shuffle + num_workers: 0 + + +# network architecture +model: + cmvn_file: "data/mean_std.npz" + cmvn_file_type: "npz" + # encoder related + encoder: conformer + encoder_conf: + output_size: 256 # dimension of attention + attention_heads: 4 + linear_units: 2048 # the number of units of position-wise feed forward + num_blocks: 12 # the number of encoder blocks + dropout_rate: 0.1 + positional_dropout_rate: 0.1 + attention_dropout_rate: 0.0 + input_layer: conv2d # encoder input type, you can chose conv2d, conv2d6 and conv2d8 + normalize_before: True + use_cnn_module: True + cnn_module_kernel: 15 + activation_type: 'swish' + pos_enc_layer_type: 'rel_pos' + selfattention_layer_type: 'rel_selfattn' + + # decoder related + decoder: transformer + decoder_conf: + attention_heads: 4 + linear_units: 2048 + num_blocks: 6 + dropout_rate: 0.1 + positional_dropout_rate: 0.1 + self_attention_dropout_rate: 0.0 + src_attention_dropout_rate: 0.0 + + # hybrid CTC/attention + model_conf: + ctc_weight: 0.3 + lsm_weight: 0.1 # label smoothing option + length_normalized_loss: false + + +training: + n_epoch: 240 + accum_grad: 4 + global_grad_clip: 5.0 + optim: adam + optim_conf: + lr: 0.002 + weight_decay: 1e-06 + scheduler: warmuplr # pytorch v1.1.0+ required + scheduler_conf: + warmup_steps: 25000 + lr_decay: 1.0 + log_interval: 100 + + +decoding: + batch_size: 16 + error_rate_type: wer + decoding_method: attention # 'attention', 'ctc_greedy_search', 'ctc_prefix_beam_search', 'attention_rescoring' + lang_model_path: data/lm/common_crawl_00.prune01111.trie.klm + alpha: 2.5 + beta: 0.3 + beam_size: 10 + cutoff_prob: 1.0 + cutoff_top_n: 0 + num_proc_bsearch: 8 + ctc_weight: 0.0 # ctc weight for attention rescoring decode mode. + decoding_chunk_size: -1 # decoding chunk size. Defaults to -1. + # <0: for decoding, use full chunk. + # >0: for decoding, use fixed chunk size as set. + # 0: used for training, it's prohibited here. + num_decoding_left_chunks: -1 # number of left chunks for decoding. Defaults to -1. + simulate_streaming: False # simulate streaming inference. Defaults to False. + + diff --git a/examples/aug_conf/augmentation.config b/examples/aug_conf/augmentation.config deleted file mode 100644 index 6c24da549..000000000 --- a/examples/aug_conf/augmentation.config +++ /dev/null @@ -1,8 +0,0 @@ -[ - { - "type": "shift", - "params": {"min_shift_ms": -5, - "max_shift_ms": 5}, - "prob": 1.0 - } -] diff --git a/examples/aug_conf/augmentation.config.example b/examples/aug_conf/augmentation.config.example deleted file mode 100644 index 2902125ab..000000000 --- a/examples/aug_conf/augmentation.config.example +++ /dev/null @@ -1,40 +0,0 @@ -[ - { - "type": "noise", - "params": {"min_snr_dB": 40, - "max_snr_dB": 50, - "noise_manifest_path": "datasets/manifest.noise"}, - "prob": 0.6 - }, - { - "type": "impulse", - "params": {"impulse_manifest_path": "datasets/manifest.impulse"}, - "prob": 0.5 - }, - { - "type": "speed", - "params": {"min_speed_rate": 0.95, - "max_speed_rate": 1.05, - "num_rates": 3}, - "prob": 0.5 - }, - { - "type": "shift", - "params": {"min_shift_ms": -5, - "max_shift_ms": 5}, - "prob": 1.0 - }, - { - "type": "volume", - "params": {"min_gain_dBFS": -10, - "max_gain_dBFS": 10}, - "prob": 0.0 - }, - { - "type": "bayesian_normal", - "params": {"target_db": -20, - "prior_db": -20, - "prior_samples": 100}, - "prob": 0.0 - } -] diff --git a/examples/aug_conf/augmentation.example.json b/examples/aug_conf/augmentation.example.json new file mode 100644 index 000000000..efae2e5e3 --- /dev/null +++ b/examples/aug_conf/augmentation.example.json @@ -0,0 +1,67 @@ +[ + { + "type": "noise", + "params": { + "min_snr_dB": 40, + "max_snr_dB": 50, + "noise_manifest_path": "datasets/manifest.noise" + }, + "prob": 0.6 + }, + { + "type": "impulse", + "params": { + "impulse_manifest_path": "datasets/manifest.impulse" + }, + "prob": 0.5 + }, + { + "type": "speed", + "params": { + "min_speed_rate": 0.95, + "max_speed_rate": 1.05, + "num_rates": 3 + }, + "prob": 0.5 + }, + { + "type": "shift", + "params": { + "min_shift_ms": -5, + "max_shift_ms": 5 + }, + "prob": 1.0 + }, + { + "type": "volume", + "params": { + "min_gain_dBFS": -10, + "max_gain_dBFS": 10 + }, + "prob": 0.0 + }, + { + "type": "bayesian_normal", + "params": { + "target_db": -20, + "prior_db": -20, + "prior_samples": 100 + }, + "prob": 0.0 + }, + { + "type": "specaug", + "params": { + "F": 10, + "T": 50, + "n_freq_masks": 2, + "n_time_masks": 2, + "p": 1.0, + "W": 80, + "adaptive_number_ratio": 0, + "adaptive_size_ratio": 0, + "max_n_time_masks": 20 + }, + "prob": 0.0 + } +] diff --git a/examples/aug_conf/augmentation.json b/examples/aug_conf/augmentation.json new file mode 100644 index 000000000..a1a759e67 --- /dev/null +++ b/examples/aug_conf/augmentation.json @@ -0,0 +1,10 @@ +[ + { + "type": "shift", + "params": { + "min_shift_ms": -5, + "max_shift_ms": 5 + }, + "prob": 1.0 + } +] diff --git a/examples/librispeech/s0/conf/augmentation.config b/examples/librispeech/s0/conf/augmentation.config deleted file mode 100644 index 6c24da549..000000000 --- a/examples/librispeech/s0/conf/augmentation.config +++ /dev/null @@ -1,8 +0,0 @@ -[ - { - "type": "shift", - "params": {"min_shift_ms": -5, - "max_shift_ms": 5}, - "prob": 1.0 - } -] diff --git a/examples/librispeech/s0/conf/augmentation.json b/examples/librispeech/s0/conf/augmentation.json new file mode 100644 index 000000000..a1a759e67 --- /dev/null +++ b/examples/librispeech/s0/conf/augmentation.json @@ -0,0 +1,10 @@ +[ + { + "type": "shift", + "params": { + "min_shift_ms": -5, + "max_shift_ms": 5 + }, + "prob": 1.0 + } +] diff --git a/examples/librispeech/s0/conf/deepspeech2.yaml b/examples/librispeech/s0/conf/deepspeech2.yaml index 15fd4cbe3..81313e611 100644 --- a/examples/librispeech/s0/conf/deepspeech2.yaml +++ b/examples/librispeech/s0/conf/deepspeech2.yaml @@ -5,7 +5,7 @@ data: test_manifest: data/manifest.test-clean mean_std_filepath: data/mean_std.npz vocab_filepath: data/vocab.txt - augmentation_config: conf/augmentation.config + augmentation_config: conf/augmentation.json batch_size: 20 max_duration: 27.0 min_duration: 0.0 diff --git a/examples/tiny/s0/conf/augmentation.config b/examples/tiny/s0/conf/augmentation.config deleted file mode 100644 index 6c24da549..000000000 --- a/examples/tiny/s0/conf/augmentation.config +++ /dev/null @@ -1,8 +0,0 @@ -[ - { - "type": "shift", - "params": {"min_shift_ms": -5, - "max_shift_ms": 5}, - "prob": 1.0 - } -] diff --git a/examples/tiny/s0/conf/augmentation.json b/examples/tiny/s0/conf/augmentation.json new file mode 100644 index 000000000..a1a759e67 --- /dev/null +++ b/examples/tiny/s0/conf/augmentation.json @@ -0,0 +1,10 @@ +[ + { + "type": "shift", + "params": { + "min_shift_ms": -5, + "max_shift_ms": 5 + }, + "prob": 1.0 + } +] diff --git a/examples/tiny/s0/conf/deepspeech2.yaml b/examples/tiny/s0/conf/deepspeech2.yaml index c7dd83f3c..59d0d79e2 100644 --- a/examples/tiny/s0/conf/deepspeech2.yaml +++ b/examples/tiny/s0/conf/deepspeech2.yaml @@ -5,7 +5,7 @@ data: test_manifest: data/manifest.tiny mean_std_filepath: data/mean_std.npz vocab_filepath: data/vocab.txt - augmentation_config: conf/augmentation.config + augmentation_config: conf/augmentation.json batch_size: 4 max_duration: 27.0 min_duration: 0.0 diff --git a/examples/tiny/s1/conf/augmentation.config b/examples/tiny/s1/conf/augmentation.config deleted file mode 100644 index 6c24da549..000000000 --- a/examples/tiny/s1/conf/augmentation.config +++ /dev/null @@ -1,8 +0,0 @@ -[ - { - "type": "shift", - "params": {"min_shift_ms": -5, - "max_shift_ms": 5}, - "prob": 1.0 - } -] diff --git a/examples/tiny/s1/conf/augmentation.json b/examples/tiny/s1/conf/augmentation.json new file mode 100644 index 000000000..a1a759e67 --- /dev/null +++ b/examples/tiny/s1/conf/augmentation.json @@ -0,0 +1,10 @@ +[ + { + "type": "shift", + "params": { + "min_shift_ms": -5, + "max_shift_ms": 5 + }, + "prob": 1.0 + } +] diff --git a/examples/tiny/s1/conf/conformer.yaml b/examples/tiny/s1/conf/conformer.yaml index e4c6f33c1..b1101736d 100644 --- a/examples/tiny/s1/conf/conformer.yaml +++ b/examples/tiny/s1/conf/conformer.yaml @@ -7,7 +7,7 @@ data: unit_type: 'spm' spm_model_prefix: 'data/bpe_unigram_200' mean_std_filepath: "" - augmentation_config: conf/augmentation.config + augmentation_config: conf/augmentation.json batch_size: 4 min_input_len: 0.5 max_input_len: 20.0