diff --git a/deepspeech/exps/deepspeech2/model.py b/deepspeech/exps/deepspeech2/model.py index 326f6771b..eea784a58 100644 --- a/deepspeech/exps/deepspeech2/model.py +++ b/deepspeech/exps/deepspeech2/model.py @@ -145,11 +145,11 @@ class DeepSpeech2Trainer(Trainer): config.data.manifest = config.data.dev_manifest config.data.augmentation_config = "" config.data.min_input_len = 0.0 # second - config.data.max_input_len: 100.0 # second - config.data.min_output_len: 0.0 # tokens - config.data.max_output_len: 400.0 # tokens - config.data.min_output_input_ratio: 0.00 - config.data.max_output_input_ratio: 100.0 + config.data.max_input_len = float('inf') # second + config.data.min_output_len = 0.0 # tokens + config.data.max_output_len = float('inf') # tokens + config.data.min_output_input_ratio = 0.00 + config.data.max_output_input_ratio = float('inf') dev_dataset = ManifestDataset.from_config(config) if self.parallel: @@ -330,11 +330,11 @@ class DeepSpeech2Tester(DeepSpeech2Trainer): config.data.keep_transcription_text = True config.data.augmentation_config = "" config.data.min_input_len = 0.0 # second - config.data.max_input_len: 100.0 # second - config.data.min_output_len: 0.0 # tokens - config.data.max_output_len: 400.0 # tokens - config.data.min_output_input_ratio: 0.00 - config.data.max_output_input_ratio: 100.0 + config.data.max_input_len = float('inf') # second + config.data.min_output_len = 0.0 # tokens + config.data.max_output_len = float('inf') # tokens + config.data.min_output_input_ratio = 0.00 + config.data.max_output_input_ratio = float('inf') test_dataset = ManifestDataset.from_config(config) # return text ord id diff --git a/deepspeech/exps/u2/model.py b/deepspeech/exps/u2/model.py index bc51a8c1b..db3c34279 100644 --- a/deepspeech/exps/u2/model.py +++ b/deepspeech/exps/u2/model.py @@ -215,14 +215,14 @@ class U2Trainer(Trainer): config.data.manifest = config.data.train_manifest train_dataset = ManifestDataset.from_config(config) + config.data.manifest = config.data.dev_manifest config.data.augmentation_config = "" config.data.min_input_len = 0.0 # second - config.data.max_input_len: 100.0 # second - config.data.min_output_len: 0.0 # tokens - config.data.max_output_len: 400.0 # tokens - config.data.min_output_input_ratio: 0.00 - config.data.max_output_input_ratio: 100.0 - config.data.manifest = config.data.dev_manifest + config.data.max_input_len = float('inf') # second + config.data.min_output_len = 0.0 # tokens + config.data.max_output_len = float('inf') # tokens + config.data.min_output_input_ratio = 0.00 + config.data.max_output_input_ratio = float('inf') dev_dataset = ManifestDataset.from_config(config) collate_fn = SpeechCollator(keep_transcription_text=False) @@ -257,15 +257,15 @@ class U2Trainer(Trainer): collate_fn=collate_fn) # test dataset, return raw text + config.data.manifest = config.data.test_manifest config.data.keep_transcription_text = True config.data.augmentation_config = "" config.data.min_input_len = 0.0 # second - config.data.max_input_len: 100.0 # second - config.data.min_output_len: 0.0 # tokens - config.data.max_output_len: 400.0 # tokens - config.data.min_output_input_ratio: 0.00 - config.data.max_output_input_ratio: 100.0 - config.data.manifest = config.data.test_manifest + config.data.max_input_len = float('inf') # second + config.data.min_output_len = 0.0 # tokens + config.data.max_output_len = float('inf') # tokens + config.data.min_output_input_ratio = 0.00 + config.data.max_output_input_ratio = float('inf') test_dataset = ManifestDataset.from_config(config) # return text ord id self.test_loader = DataLoader(