diff --git a/deepspeech/exps/deepspeech2/model.py b/deepspeech/exps/deepspeech2/model.py index 7f15e565b..326f6771b 100644 --- a/deepspeech/exps/deepspeech2/model.py +++ b/deepspeech/exps/deepspeech2/model.py @@ -144,6 +144,12 @@ class DeepSpeech2Trainer(Trainer): config.data.manifest = config.data.dev_manifest config.data.augmentation_config = "" + config.data.min_input_len = 0.0 # second + config.data.max_input_len: 100.0 # second + config.data.min_output_len: 0.0 # tokens + config.data.max_output_len: 400.0 # tokens + config.data.min_output_input_ratio: 0.00 + config.data.max_output_input_ratio: 100.0 dev_dataset = ManifestDataset.from_config(config) if self.parallel: @@ -320,9 +326,15 @@ class DeepSpeech2Tester(DeepSpeech2Trainer): config.defrost() # return raw text + config.data.manifest = config.data.test_manifest config.data.keep_transcription_text = True config.data.augmentation_config = "" - config.data.manifest = config.data.test_manifest + config.data.min_input_len = 0.0 # second + config.data.max_input_len: 100.0 # second + config.data.min_output_len: 0.0 # tokens + config.data.max_output_len: 400.0 # tokens + config.data.min_output_input_ratio: 0.00 + config.data.max_output_input_ratio: 100.0 test_dataset = ManifestDataset.from_config(config) # return text ord id diff --git a/deepspeech/exps/u2/model.py b/deepspeech/exps/u2/model.py index ce0b0a0ae..bc51a8c1b 100644 --- a/deepspeech/exps/u2/model.py +++ b/deepspeech/exps/u2/model.py @@ -215,8 +215,14 @@ class U2Trainer(Trainer): config.data.manifest = config.data.train_manifest train_dataset = ManifestDataset.from_config(config) - config.data.manifest = config.data.dev_manifest config.data.augmentation_config = "" + config.data.min_input_len = 0.0 # second + config.data.max_input_len: 100.0 # second + config.data.min_output_len: 0.0 # tokens + config.data.max_output_len: 400.0 # tokens + config.data.min_output_input_ratio: 0.00 + config.data.max_output_input_ratio: 100.0 + config.data.manifest = config.data.dev_manifest dev_dataset = ManifestDataset.from_config(config) collate_fn = SpeechCollator(keep_transcription_text=False) @@ -253,6 +259,12 @@ class U2Trainer(Trainer): # test dataset, return raw text config.data.keep_transcription_text = True config.data.augmentation_config = "" + config.data.min_input_len = 0.0 # second + config.data.max_input_len: 100.0 # second + config.data.min_output_len: 0.0 # tokens + config.data.max_output_len: 400.0 # tokens + config.data.min_output_input_ratio: 0.00 + config.data.max_output_input_ratio: 100.0 config.data.manifest = config.data.test_manifest test_dataset = ManifestDataset.from_config(config) # return text ord id diff --git a/examples/librispeech/s0/local/data.sh b/examples/librispeech/s0/local/data.sh index 99630ee53..9c3ddcfac 100755 --- a/examples/librispeech/s0/local/data.sh +++ b/examples/librispeech/s0/local/data.sh @@ -27,6 +27,7 @@ if [ ${stage} -le -1 ] && [ ${stop_stage} -ge -1 ]; then mv data/manifest.${set} data/manifest.${set}.raw done + rm -rf data/manifest.train.raw data/manifest.dev.raw data/manifest.test.raw for set in train-clean-100 train-clean-360 train-other-500; do cat data/manifest.${set}.raw >> data/manifest.train.raw done diff --git a/examples/librispeech/s1/local/data.sh b/examples/librispeech/s1/local/data.sh index 185f429ba..fbdd17d58 100755 --- a/examples/librispeech/s1/local/data.sh +++ b/examples/librispeech/s1/local/data.sh @@ -31,6 +31,7 @@ if [ ${stage} -le -1 ] && [ ${stop_stage} -ge -1 ]; then mv data/manifest.${set} data/manifest.${set}.raw done + rm -rf data/manifest.train.raw data/manifest.dev.raw data/manifest.test.raw for set in train-clean-100 train-clean-360 train-other-500; do cat data/manifest.${set}.raw >> data/manifest.train.raw done