From 047d8bb37d668f9041991b3f7984014fb9562415 Mon Sep 17 00:00:00 2001 From: Hui Zhang Date: Mon, 19 Jul 2021 08:47:20 +0000 Subject: [PATCH 1/2] libri s0 w/ spec-aug result --- examples/librispeech/s0/README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/examples/librispeech/s0/README.md b/examples/librispeech/s0/README.md index 76aa5e78a..5603d3c8a 100644 --- a/examples/librispeech/s0/README.md +++ b/examples/librispeech/s0/README.md @@ -4,6 +4,7 @@ | Model | Params | release | Config | Test set | Loss | WER | | --- | --- | --- | --- | --- | --- | --- | +| DeepSpeech2 | 42.96M | 2.2.0 | conf/deepspeech2.yaml + spec_aug | 14.49190807 | test-clean | 0.067283 | | DeepSpeech2 | 42.96M | 2.1.0 | conf/deepspeech2.yaml | 15.184467315673828 | test-clean | 0.072154 | | DeepSpeech2 | 42.96M | 2.0.0 | conf/deepspeech2.yaml | - | test-clean | 0.073973 | | DeepSpeech2 | 42.96M | 1.8.5 | - | test-clean | - | 0.074939 | From 080df497ab0bbc8620abe0d9365a0b7fb3c98950 Mon Sep 17 00:00:00 2001 From: Hui Zhang Date: Mon, 19 Jul 2021 09:03:44 +0000 Subject: [PATCH 2/2] download aishell rescoure --- examples/dataset/aishell/aishell.py | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/examples/dataset/aishell/aishell.py b/examples/dataset/aishell/aishell.py index 5811a401a..e95327cc8 100644 --- a/examples/dataset/aishell/aishell.py +++ b/examples/dataset/aishell/aishell.py @@ -34,6 +34,8 @@ URL_ROOT = 'http://www.openslr.org/resources/33' # URL_ROOT = 'https://openslr.magicdatatech.com/resources/33' DATA_URL = URL_ROOT + '/data_aishell.tgz' MD5_DATA = '2f494334227864a8a8fec932999db9d8' +RESOURCE_URL = URL_ROOT + '/resource_aishell.tgz' +MD5_RESOURCE = '957d480a0fcac85fc18e550756f624e5' parser = argparse.ArgumentParser(description=__doc__) parser.add_argument( @@ -110,7 +112,7 @@ def create_manifest(data_dir, manifest_path_prefix): print(f"{total_sec / total_num} sec/utt", file=f) -def prepare_dataset(url, md5sum, target_dir, manifest_path): +def prepare_dataset(url, md5sum, target_dir, manifest_path=None): """Download, unpack and create manifest file.""" data_dir = os.path.join(target_dir, 'data_aishell') if not os.path.exists(data_dir): @@ -124,7 +126,9 @@ def prepare_dataset(url, md5sum, target_dir, manifest_path): else: print("Skip downloading and unpacking. Data already exists in %s." % target_dir) - create_manifest(data_dir, manifest_path) + + if manifest_path: + create_manifest(data_dir, manifest_path) def main(): @@ -137,6 +141,12 @@ def main(): target_dir=args.target_dir, manifest_path=args.manifest_prefix) + prepare_dataset( + url=RESOURCE_URL, + md5sum=MD5_RESOURCE, + target_dir=args.target_dir, + manifest_path=None) + print("Data download and manifest prepare done!")