Merge branch 'develop' of https://github.com/PaddlePaddle/DeepSpeech into ds2_online

pull/783/head
huangyuxin 3 years ago
commit 40466ef669

@ -228,7 +228,7 @@ class U2Trainer(Trainer):
maxlen_in=float('inf'), maxlen_in=float('inf'),
maxlen_out=float('inf'), maxlen_out=float('inf'),
minibatches=0, minibatches=0,
mini_batch_size=1, mini_batch_size=self.args.nprocs,
batch_count='auto', batch_count='auto',
batch_bins=0, batch_bins=0,
batch_frames_in=0, batch_frames_in=0,
@ -247,7 +247,7 @@ class U2Trainer(Trainer):
maxlen_in=float('inf'), maxlen_in=float('inf'),
maxlen_out=float('inf'), maxlen_out=float('inf'),
minibatches=0, minibatches=0,
mini_batch_size=1, mini_batch_size=self.args.nprocs,
batch_count='auto', batch_count='auto',
batch_bins=0, batch_bins=0,
batch_frames_in=0, batch_frames_in=0,
@ -263,7 +263,7 @@ class U2Trainer(Trainer):
json_file=config.data.test_manifest, json_file=config.data.test_manifest,
train_mode=False, train_mode=False,
sortagrad=False, sortagrad=False,
batch_size=config.collator.batch_size, batch_size=config.decoding.batch_size,
maxlen_in=float('inf'), maxlen_in=float('inf'),
maxlen_out=float('inf'), maxlen_out=float('inf'),
minibatches=0, minibatches=0,
@ -282,7 +282,7 @@ class U2Trainer(Trainer):
json_file=config.data.test_manifest, json_file=config.data.test_manifest,
train_mode=False, train_mode=False,
sortagrad=False, sortagrad=False,
batch_size=config.collator.batch_size, batch_size=config.decoding.batch_size,
maxlen_in=float('inf'), maxlen_in=float('inf'),
maxlen_out=float('inf'), maxlen_out=float('inf'),
minibatches=0, minibatches=0,

@ -151,6 +151,9 @@ class SpecAugmentor(AugmentorBase):
np.ndarray: time warped spectrogram (time, freq) np.ndarray: time warped spectrogram (time, freq)
""" """
window = max_time_warp = self.W window = max_time_warp = self.W
if window == 0:
return x
if mode == "PIL": if mode == "PIL":
t = x.shape[0] t = x.shape[0]
if t - window <= window: if t - window <= window:

@ -46,7 +46,7 @@ def load_dict(dict_path: Optional[Text], maskctc=False) -> Optional[List[Text]]:
with open(dict_path, "r") as f: with open(dict_path, "r") as f:
dictionary = f.readlines() dictionary = f.readlines()
char_list = [entry.split(" ")[0] for entry in dictionary] char_list = [entry.strip().split(" ")[0] for entry in dictionary]
if BLANK not in char_list: if BLANK not in char_list:
char_list.insert(0, BLANK) char_list.insert(0, BLANK)
if EOS not in char_list: if EOS not in char_list:

@ -1,10 +1,18 @@
# Aishell-1 # Aishell-1
## Data
| Data Subset | Duration in Seconds |
| data/manifest.train | 1.23 ~ 14.53125 |
| data/manifest.dev | 1.645 ~ 12.533 |
| data/manifest.test | 1.859125 ~ 14.6999375 |
`jq '.feat_shape[0]' data/manifest.train | sort -un`
## Deepspeech2 ## Deepspeech2
| Model | Params | Release | Config | Test set | Loss | CER | | Model | Params | Release | Config | Test set | Loss | CER |
| --- | --- | --- | --- | --- | --- | --- | | --- | --- | --- | --- | --- | --- | --- |
| DeepSpeech2 | 58.4M | 2.2.0 | conf/deepspeech2.yaml + spec aug + new datapipe | test | 6.396368026733398 | 0.068382,0.073507 | | DeepSpeech2 | 58.4M | 2.2.0 | conf/deepspeech2.yaml + spec aug + new datapipe | test | 6.396368026733398 | 0.068382 |
| DeepSpeech2 | 58.4M | 2.1.0 | conf/deepspeech2.yaml + spec aug | test | 7.483316898345947 | 0.077860 | | DeepSpeech2 | 58.4M | 2.1.0 | conf/deepspeech2.yaml + spec aug | test | 7.483316898345947 | 0.077860 |
| DeepSpeech2 | 58.4M | 2.1.0 | conf/deepspeech2.yaml | test | 7.299022197723389 | 0.078671 | | DeepSpeech2 | 58.4M | 2.1.0 | conf/deepspeech2.yaml | test | 7.299022197723389 | 0.078671 |
| DeepSpeech2 | 58.4M | 2.0.0 | conf/deepspeech2.yaml | test | - | 0.078977 | | DeepSpeech2 | 58.4M | 2.0.0 | conf/deepspeech2.yaml | test | - | 0.078977 |

@ -19,17 +19,17 @@
{ {
"type": "specaug", "type": "specaug",
"params": { "params": {
"W": 5, "W": 0,
"warp_mode": "PIL", "warp_mode": "PIL",
"F": 30, "F": 10,
"n_freq_masks": 2, "n_freq_masks": 2,
"T": 40, "T": 50,
"n_time_masks": 2, "n_time_masks": 2,
"p": 1.0, "p": 1.0,
"adaptive_number_ratio": 0, "adaptive_number_ratio": 0,
"adaptive_size_ratio": 0, "adaptive_size_ratio": 0,
"max_n_time_masks": 20, "max_n_time_masks": 20,
"replace_with_zero": false "replace_with_zero": true
}, },
"prob": 1.0 "prob": 1.0
} }

@ -19,17 +19,17 @@
{ {
"type": "specaug", "type": "specaug",
"params": { "params": {
"W": 0,
"warp_mode": "PIL",
"F": 10, "F": 10,
"T": 50,
"n_freq_masks": 2, "n_freq_masks": 2,
"T": 50,
"n_time_masks": 2, "n_time_masks": 2,
"p": 1.0, "p": 1.0,
"W": 80,
"adaptive_number_ratio": 0, "adaptive_number_ratio": 0,
"adaptive_size_ratio": 0, "adaptive_size_ratio": 0,
"max_n_time_masks": 20, "max_n_time_masks": 20,
"replace_with_zero": true, "replace_with_zero": true
"warp_mode": "PIL"
}, },
"prob": 1.0 "prob": 1.0
} }

Loading…
Cancel
Save