diff --git a/deepspeech/exps/u2_kaldi/model.py b/deepspeech/exps/u2_kaldi/model.py index 4f6ff4cb..46e5b4d9 100644 --- a/deepspeech/exps/u2_kaldi/model.py +++ b/deepspeech/exps/u2_kaldi/model.py @@ -228,7 +228,7 @@ class U2Trainer(Trainer): maxlen_in=float('inf'), maxlen_out=float('inf'), minibatches=0, - mini_batch_size=1, + mini_batch_size=self.args.nprocs, batch_count='auto', batch_bins=0, batch_frames_in=0, @@ -247,7 +247,7 @@ class U2Trainer(Trainer): maxlen_in=float('inf'), maxlen_out=float('inf'), minibatches=0, - mini_batch_size=1, + mini_batch_size=self.args.nprocs, batch_count='auto', batch_bins=0, batch_frames_in=0, @@ -263,7 +263,7 @@ class U2Trainer(Trainer): json_file=config.data.test_manifest, train_mode=False, sortagrad=False, - batch_size=config.collator.batch_size, + batch_size=config.decoding.batch_size, maxlen_in=float('inf'), maxlen_out=float('inf'), minibatches=0, @@ -282,7 +282,7 @@ class U2Trainer(Trainer): json_file=config.data.test_manifest, train_mode=False, sortagrad=False, - batch_size=config.collator.batch_size, + batch_size=config.decoding.batch_size, maxlen_in=float('inf'), maxlen_out=float('inf'), minibatches=0, diff --git a/deepspeech/frontend/augmentor/spec_augment.py b/deepspeech/frontend/augmentor/spec_augment.py index 7c23b628..a9bb043d 100644 --- a/deepspeech/frontend/augmentor/spec_augment.py +++ b/deepspeech/frontend/augmentor/spec_augment.py @@ -151,6 +151,9 @@ class SpecAugmentor(AugmentorBase): np.ndarray: time warped spectrogram (time, freq) """ window = max_time_warp = self.W + if window == 0: + return x + if mode == "PIL": t = x.shape[0] if t - window <= window: diff --git a/deepspeech/frontend/utility.py b/deepspeech/frontend/utility.py index 3d0683b0..72dfc98d 100644 --- a/deepspeech/frontend/utility.py +++ b/deepspeech/frontend/utility.py @@ -46,7 +46,7 @@ def load_dict(dict_path: Optional[Text], maskctc=False) -> Optional[List[Text]]: with open(dict_path, "r") as f: dictionary = f.readlines() - char_list = [entry.split(" ")[0] for entry in dictionary] + char_list = [entry.strip().split(" ")[0] for entry in dictionary] if BLANK not in char_list: char_list.insert(0, BLANK) if EOS not in char_list: diff --git a/examples/aishell/s0/README.md b/examples/aishell/s0/README.md index 6ce39b23..eedf92c9 100644 --- a/examples/aishell/s0/README.md +++ b/examples/aishell/s0/README.md @@ -1,10 +1,18 @@ # Aishell-1 +## Data +| Data Subset | Duration in Seconds | +| data/manifest.train | 1.23 ~ 14.53125 | +| data/manifest.dev | 1.645 ~ 12.533 | +| data/manifest.test | 1.859125 ~ 14.6999375 | + +`jq '.feat_shape[0]' data/manifest.train | sort -un` + ## Deepspeech2 | Model | Params | Release | Config | Test set | Loss | CER | | --- | --- | --- | --- | --- | --- | --- | -| DeepSpeech2 | 58.4M | 2.2.0 | conf/deepspeech2.yaml + spec aug + new datapipe | test | 6.396368026733398 | 0.068382,0.073507 | +| DeepSpeech2 | 58.4M | 2.2.0 | conf/deepspeech2.yaml + spec aug + new datapipe | test | 6.396368026733398 | 0.068382 | | DeepSpeech2 | 58.4M | 2.1.0 | conf/deepspeech2.yaml + spec aug | test | 7.483316898345947 | 0.077860 | | DeepSpeech2 | 58.4M | 2.1.0 | conf/deepspeech2.yaml | test | 7.299022197723389 | 0.078671 | | DeepSpeech2 | 58.4M | 2.0.0 | conf/deepspeech2.yaml | test | - | 0.078977 | diff --git a/examples/aishell/s0/conf/augmentation.json b/examples/aishell/s0/conf/augmentation.json index ac8a1c53..6f249242 100644 --- a/examples/aishell/s0/conf/augmentation.json +++ b/examples/aishell/s0/conf/augmentation.json @@ -19,17 +19,17 @@ { "type": "specaug", "params": { - "W": 5, + "W": 0, "warp_mode": "PIL", - "F": 30, + "F": 10, "n_freq_masks": 2, - "T": 40, + "T": 50, "n_time_masks": 2, "p": 1.0, "adaptive_number_ratio": 0, "adaptive_size_ratio": 0, "max_n_time_masks": 20, - "replace_with_zero": false + "replace_with_zero": true }, "prob": 1.0 } diff --git a/examples/librispeech/s0/conf/augmentation.json b/examples/librispeech/s0/conf/augmentation.json index d0409b14..31c481c8 100644 --- a/examples/librispeech/s0/conf/augmentation.json +++ b/examples/librispeech/s0/conf/augmentation.json @@ -19,17 +19,17 @@ { "type": "specaug", "params": { + "W": 0, + "warp_mode": "PIL", "F": 10, - "T": 50, "n_freq_masks": 2, + "T": 50, "n_time_masks": 2, "p": 1.0, - "W": 80, "adaptive_number_ratio": 0, "adaptive_size_ratio": 0, "max_n_time_masks": 20, - "replace_with_zero": true, - "warp_mode": "PIL" + "replace_with_zero": true }, "prob": 1.0 }