From fd3491ba1b5a0ccfac7667a0bfc9048d1de792cc Mon Sep 17 00:00:00 2001 From: Hui Zhang Date: Tue, 24 Aug 2021 02:56:41 +0000 Subject: [PATCH 1/4] fix dataloader batchsize and minibatchsize --- deepspeech/exps/u2_kaldi/model.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/deepspeech/exps/u2_kaldi/model.py b/deepspeech/exps/u2_kaldi/model.py index 4f6ff4cb..46e5b4d9 100644 --- a/deepspeech/exps/u2_kaldi/model.py +++ b/deepspeech/exps/u2_kaldi/model.py @@ -228,7 +228,7 @@ class U2Trainer(Trainer): maxlen_in=float('inf'), maxlen_out=float('inf'), minibatches=0, - mini_batch_size=1, + mini_batch_size=self.args.nprocs, batch_count='auto', batch_bins=0, batch_frames_in=0, @@ -247,7 +247,7 @@ class U2Trainer(Trainer): maxlen_in=float('inf'), maxlen_out=float('inf'), minibatches=0, - mini_batch_size=1, + mini_batch_size=self.args.nprocs, batch_count='auto', batch_bins=0, batch_frames_in=0, @@ -263,7 +263,7 @@ class U2Trainer(Trainer): json_file=config.data.test_manifest, train_mode=False, sortagrad=False, - batch_size=config.collator.batch_size, + batch_size=config.decoding.batch_size, maxlen_in=float('inf'), maxlen_out=float('inf'), minibatches=0, @@ -282,7 +282,7 @@ class U2Trainer(Trainer): json_file=config.data.test_manifest, train_mode=False, sortagrad=False, - batch_size=config.collator.batch_size, + batch_size=config.decoding.batch_size, maxlen_in=float('inf'), maxlen_out=float('inf'), minibatches=0, From 8215bd0e7915fb8e139281c15056fe3fa39f01f9 Mon Sep 17 00:00:00 2001 From: Hui Zhang Date: Tue, 24 Aug 2021 05:46:39 +0000 Subject: [PATCH 2/4] fix load vocab; zero W for not warptime --- deepspeech/frontend/augmentor/spec_augment.py | 3 +++ deepspeech/frontend/utility.py | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/deepspeech/frontend/augmentor/spec_augment.py b/deepspeech/frontend/augmentor/spec_augment.py index 7c23b628..a9bb043d 100644 --- a/deepspeech/frontend/augmentor/spec_augment.py +++ b/deepspeech/frontend/augmentor/spec_augment.py @@ -151,6 +151,9 @@ class SpecAugmentor(AugmentorBase): np.ndarray: time warped spectrogram (time, freq) """ window = max_time_warp = self.W + if window == 0: + return x + if mode == "PIL": t = x.shape[0] if t - window <= window: diff --git a/deepspeech/frontend/utility.py b/deepspeech/frontend/utility.py index 3d0683b0..72dfc98d 100644 --- a/deepspeech/frontend/utility.py +++ b/deepspeech/frontend/utility.py @@ -46,7 +46,7 @@ def load_dict(dict_path: Optional[Text], maskctc=False) -> Optional[List[Text]]: with open(dict_path, "r") as f: dictionary = f.readlines() - char_list = [entry.split(" ")[0] for entry in dictionary] + char_list = [entry.strip().split(" ")[0] for entry in dictionary] if BLANK not in char_list: char_list.insert(0, BLANK) if EOS not in char_list: From 3d9aebfaa3373d9ee03ccf06f03bfcf07196c42c Mon Sep 17 00:00:00 2001 From: Hui Zhang Date: Tue, 24 Aug 2021 06:07:47 +0000 Subject: [PATCH 3/4] fix specaug; add data static --- examples/aishell/s0/README.md | 10 +++++++++- examples/aishell/s0/conf/augmentation.json | 8 ++++---- 2 files changed, 13 insertions(+), 5 deletions(-) diff --git a/examples/aishell/s0/README.md b/examples/aishell/s0/README.md index 6ce39b23..eedf92c9 100644 --- a/examples/aishell/s0/README.md +++ b/examples/aishell/s0/README.md @@ -1,10 +1,18 @@ # Aishell-1 +## Data +| Data Subset | Duration in Seconds | +| data/manifest.train | 1.23 ~ 14.53125 | +| data/manifest.dev | 1.645 ~ 12.533 | +| data/manifest.test | 1.859125 ~ 14.6999375 | + +`jq '.feat_shape[0]' data/manifest.train | sort -un` + ## Deepspeech2 | Model | Params | Release | Config | Test set | Loss | CER | | --- | --- | --- | --- | --- | --- | --- | -| DeepSpeech2 | 58.4M | 2.2.0 | conf/deepspeech2.yaml + spec aug + new datapipe | test | 6.396368026733398 | 0.068382,0.073507 | +| DeepSpeech2 | 58.4M | 2.2.0 | conf/deepspeech2.yaml + spec aug + new datapipe | test | 6.396368026733398 | 0.068382 | | DeepSpeech2 | 58.4M | 2.1.0 | conf/deepspeech2.yaml + spec aug | test | 7.483316898345947 | 0.077860 | | DeepSpeech2 | 58.4M | 2.1.0 | conf/deepspeech2.yaml | test | 7.299022197723389 | 0.078671 | | DeepSpeech2 | 58.4M | 2.0.0 | conf/deepspeech2.yaml | test | - | 0.078977 | diff --git a/examples/aishell/s0/conf/augmentation.json b/examples/aishell/s0/conf/augmentation.json index ac8a1c53..6f249242 100644 --- a/examples/aishell/s0/conf/augmentation.json +++ b/examples/aishell/s0/conf/augmentation.json @@ -19,17 +19,17 @@ { "type": "specaug", "params": { - "W": 5, + "W": 0, "warp_mode": "PIL", - "F": 30, + "F": 10, "n_freq_masks": 2, - "T": 40, + "T": 50, "n_time_masks": 2, "p": 1.0, "adaptive_number_ratio": 0, "adaptive_size_ratio": 0, "max_n_time_masks": 20, - "replace_with_zero": false + "replace_with_zero": true }, "prob": 1.0 } From 715e90a9dfb6fa3fab98d6b7c29a99b4570d789f Mon Sep 17 00:00:00 2001 From: Hui Zhang Date: Tue, 24 Aug 2021 07:42:52 +0000 Subject: [PATCH 4/4] fix librispeech s0 specaug --- examples/librispeech/s0/conf/augmentation.json | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/examples/librispeech/s0/conf/augmentation.json b/examples/librispeech/s0/conf/augmentation.json index d0409b14..31c481c8 100644 --- a/examples/librispeech/s0/conf/augmentation.json +++ b/examples/librispeech/s0/conf/augmentation.json @@ -19,17 +19,17 @@ { "type": "specaug", "params": { + "W": 0, + "warp_mode": "PIL", "F": 10, - "T": 50, "n_freq_masks": 2, + "T": 50, "n_time_masks": 2, "p": 1.0, - "W": 80, "adaptive_number_ratio": 0, "adaptive_size_ratio": 0, "max_n_time_masks": 20, - "replace_with_zero": true, - "warp_mode": "PIL" + "replace_with_zero": true }, "prob": 1.0 }