Merge branch 'develop' of https://github.com/PaddlePaddle/DeepSpeech into ds2_online

pull/783/head
huangyuxin 3 years ago
commit 40466ef669

@ -228,7 +228,7 @@ class U2Trainer(Trainer):
maxlen_in=float('inf'),
maxlen_out=float('inf'),
minibatches=0,
mini_batch_size=1,
mini_batch_size=self.args.nprocs,
batch_count='auto',
batch_bins=0,
batch_frames_in=0,
@ -247,7 +247,7 @@ class U2Trainer(Trainer):
maxlen_in=float('inf'),
maxlen_out=float('inf'),
minibatches=0,
mini_batch_size=1,
mini_batch_size=self.args.nprocs,
batch_count='auto',
batch_bins=0,
batch_frames_in=0,
@ -263,7 +263,7 @@ class U2Trainer(Trainer):
json_file=config.data.test_manifest,
train_mode=False,
sortagrad=False,
batch_size=config.collator.batch_size,
batch_size=config.decoding.batch_size,
maxlen_in=float('inf'),
maxlen_out=float('inf'),
minibatches=0,
@ -282,7 +282,7 @@ class U2Trainer(Trainer):
json_file=config.data.test_manifest,
train_mode=False,
sortagrad=False,
batch_size=config.collator.batch_size,
batch_size=config.decoding.batch_size,
maxlen_in=float('inf'),
maxlen_out=float('inf'),
minibatches=0,

@ -151,6 +151,9 @@ class SpecAugmentor(AugmentorBase):
np.ndarray: time warped spectrogram (time, freq)
"""
window = max_time_warp = self.W
if window == 0:
return x
if mode == "PIL":
t = x.shape[0]
if t - window <= window:

@ -46,7 +46,7 @@ def load_dict(dict_path: Optional[Text], maskctc=False) -> Optional[List[Text]]:
with open(dict_path, "r") as f:
dictionary = f.readlines()
char_list = [entry.split(" ")[0] for entry in dictionary]
char_list = [entry.strip().split(" ")[0] for entry in dictionary]
if BLANK not in char_list:
char_list.insert(0, BLANK)
if EOS not in char_list:

@ -1,10 +1,18 @@
# Aishell-1
## Data
| Data Subset | Duration in Seconds |
| data/manifest.train | 1.23 ~ 14.53125 |
| data/manifest.dev | 1.645 ~ 12.533 |
| data/manifest.test | 1.859125 ~ 14.6999375 |
`jq '.feat_shape[0]' data/manifest.train | sort -un`
## Deepspeech2
| Model | Params | Release | Config | Test set | Loss | CER |
| --- | --- | --- | --- | --- | --- | --- |
| DeepSpeech2 | 58.4M | 2.2.0 | conf/deepspeech2.yaml + spec aug + new datapipe | test | 6.396368026733398 | 0.068382,0.073507 |
| DeepSpeech2 | 58.4M | 2.2.0 | conf/deepspeech2.yaml + spec aug + new datapipe | test | 6.396368026733398 | 0.068382 |
| DeepSpeech2 | 58.4M | 2.1.0 | conf/deepspeech2.yaml + spec aug | test | 7.483316898345947 | 0.077860 |
| DeepSpeech2 | 58.4M | 2.1.0 | conf/deepspeech2.yaml | test | 7.299022197723389 | 0.078671 |
| DeepSpeech2 | 58.4M | 2.0.0 | conf/deepspeech2.yaml | test | - | 0.078977 |

@ -19,17 +19,17 @@
{
"type": "specaug",
"params": {
"W": 5,
"W": 0,
"warp_mode": "PIL",
"F": 30,
"F": 10,
"n_freq_masks": 2,
"T": 40,
"T": 50,
"n_time_masks": 2,
"p": 1.0,
"adaptive_number_ratio": 0,
"adaptive_size_ratio": 0,
"max_n_time_masks": 20,
"replace_with_zero": false
"replace_with_zero": true
},
"prob": 1.0
}

@ -19,17 +19,17 @@
{
"type": "specaug",
"params": {
"W": 0,
"warp_mode": "PIL",
"F": 10,
"T": 50,
"n_freq_masks": 2,
"T": 50,
"n_time_masks": 2,
"p": 1.0,
"W": 80,
"adaptive_number_ratio": 0,
"adaptive_size_ratio": 0,
"max_n_time_masks": 20,
"replace_with_zero": true,
"warp_mode": "PIL"
"replace_with_zero": true
},
"prob": 1.0
}

Loading…
Cancel
Save