fix url in librispeech.py

pull/2086/head
huangyuxin 2 years ago
parent 9aa868d14d
commit 69c36a56a8

@ -1,4 +1,4 @@
# [Aidatatang_200zh](http://www.openslr.org/62/)
# [Aidatatang_200zh](http://openslr.elda.org/62/)
Aidatatang_200zh is a free Chinese Mandarin speech corpus provided by Beijing DataTang Technology Co., Ltd under Creative Commons Attribution-NonCommercial-NoDerivatives 4.0 International Public License.
The contents and the corresponding descriptions of the corpus include:

@ -1,3 +1,3 @@
# [Aishell1](http://www.openslr.org/33/)
# [Aishell1](http://openslr.elda.org/33/)
This Open Source Mandarin Speech Corpus, AISHELL-ASR0009-OS1, is 178 hours long. It is a part of AISHELL-ASR0009, of which utterance contains 11 domains, including smart home, autonomous driving, and industrial production. The whole recording was put in quiet indoor environment, using 3 different devices at the same time: high fidelity microphone (44.1kHz, 16-bit,); Android-system mobile phone (16kHz, 16-bit), iOS-system mobile phone (16kHz, 16-bit). Audios in high fidelity were re-sampled to 16kHz to build AISHELL- ASR0009-OS1. 400 speakers from different accent areas in China were invited to participate in the recording. The manual transcription accuracy rate is above 95%, through professional speech annotation and strict quality inspection. The corpus is divided into training, development and testing sets. ( This database is free for academic research, not in the commerce, if without permission. )

@ -31,7 +31,7 @@ from utils.utility import unpack
DATA_HOME = os.path.expanduser('~/.cache/paddle/dataset/speech')
URL_ROOT = 'http://www.openslr.org/resources/33'
URL_ROOT = 'http://openslr.elda.org/resources/33'
# URL_ROOT = 'https://openslr.magicdatatech.com/resources/33'
DATA_URL = URL_ROOT + '/data_aishell.tgz'
MD5_DATA = '2f494334227864a8a8fec932999db9d8'

@ -31,7 +31,7 @@ import soundfile
from utils.utility import download
from utils.utility import unpack
URL_ROOT = "http://www.openslr.org/resources/12"
URL_ROOT = "http://openslr.elda.org/resources/12"
#URL_ROOT = "https://openslr.magicdatatech.com/resources/12"
URL_TEST_CLEAN = URL_ROOT + "/test-clean.tar.gz"
URL_TEST_OTHER = URL_ROOT + "/test-other.tar.gz"

@ -1,4 +1,4 @@
# [MagicData](http://www.openslr.org/68/)
# [MagicData](http://openslr.elda.org/68/)
MAGICDATA Mandarin Chinese Read Speech Corpus was developed by MAGIC DATA Technology Co., Ltd. and freely published for non-commercial use.
The contents and the corresponding descriptions of the corpus include:

@ -30,7 +30,7 @@ import soundfile
from utils.utility import download
from utils.utility import unpack
URL_ROOT = "http://www.openslr.org/resources/31"
URL_ROOT = "http://openslr.elda.org/resources/31"
URL_TRAIN_CLEAN = URL_ROOT + "/train-clean-5.tar.gz"
URL_DEV_CLEAN = URL_ROOT + "/dev-clean-2.tar.gz"

@ -34,7 +34,7 @@ from utils.utility import unpack
DATA_HOME = os.path.expanduser('~/.cache/paddle/dataset/speech')
URL_ROOT = 'https://www.openslr.org/resources/17'
URL_ROOT = 'https://openslr.elda.org/resources/17'
DATA_URL = URL_ROOT + '/musan.tar.gz'
MD5_DATA = '0c472d4fc0c5141eca47ad1ffeb2a7df'

@ -1,4 +1,4 @@
# [Primewords](http://www.openslr.org/47/)
# [Primewords](http://openslr.elda.org/47/)
This free Chinese Mandarin speech corpus set is released by Shanghai Primewords Information Technology Co., Ltd.
The corpus is recorded by smart mobile phones from 296 native Chinese speakers. The transcription accuracy is larger than 98%, at the confidence level of 95%. It is free for academic use.

@ -34,7 +34,7 @@ from utils.utility import unzip
DATA_HOME = os.path.expanduser('~/.cache/paddle/dataset/speech')
URL_ROOT = '--no-check-certificate http://www.openslr.org/resources/28'
URL_ROOT = '--no-check-certificate http://openslr.elda.org/resources/28'
DATA_URL = URL_ROOT + '/rirs_noises.zip'
MD5_DATA = 'e6f48e257286e05de56413b4779d8ffb'

@ -1 +1 @@
# [FreeST](http://www.openslr.org/38/)
# [FreeST](http://openslr.elda.org/38/)

@ -1,4 +1,4 @@
# [THCHS30](http://www.openslr.org/18/)
# [THCHS30](http://openslr.elda.org/18/)
This is the *data part* of the `THCHS30 2015` acoustic data
& scripts dataset.

@ -32,7 +32,7 @@ from utils.utility import unpack
DATA_HOME = os.path.expanduser('~/.cache/paddle/dataset/speech')
URL_ROOT = 'http://www.openslr.org/resources/18'
URL_ROOT = 'http://openslr.elda.org/resources/18'
# URL_ROOT = 'https://openslr.magicdatatech.com/resources/18'
DATA_URL = URL_ROOT + '/data_thchs30.tgz'
TEST_NOISE_URL = URL_ROOT + '/test-noise.tgz'

Loading…
Cancel
Save