diff --git a/data/aishell/aishell.py b/data/aishell/aishell.py index d3340fc6f..e81d20853 100644 --- a/data/aishell/aishell.py +++ b/data/aishell/aishell.py @@ -19,6 +19,7 @@ from data_utils.utility import download, unpack DATA_HOME = os.path.expanduser('~/.cache/paddle/dataset/speech') URL_ROOT = 'http://www.openslr.org/resources/33' +URL_ROOT = 'https://openslr.magicdatatech.com/resources/33' DATA_URL = URL_ROOT + '/data_aishell.tgz' MD5_DATA = '2f494334227864a8a8fec932999db9d8' diff --git a/data/librispeech/librispeech.py b/data/librispeech/librispeech.py index 07cc09339..770b71108 100644 --- a/data/librispeech/librispeech.py +++ b/data/librispeech/librispeech.py @@ -20,6 +20,7 @@ import io from data_utils.utility import download, unpack URL_ROOT = "http://www.openslr.org/resources/12" +URL_ROOT = "https://openslr.magicdatatech.com/resources/12" URL_TEST_CLEAN = URL_ROOT + "/test-clean.tar.gz" URL_TEST_OTHER = URL_ROOT + "/test-other.tar.gz" URL_DEV_CLEAN = URL_ROOT + "/dev-clean.tar.gz" diff --git a/data/voxforge/voxforge.py b/data/voxforge/voxforge.py index b86b0f004..b7cc38bdb 100644 --- a/data/voxforge/voxforge.py +++ b/data/voxforge/voxforge.py @@ -11,6 +11,7 @@ from __future__ import print_function import os import codecs +import datetime import soundfile import json import argparse diff --git a/data_utils/data.py b/data_utils/data.py index 3628e0676..4a5224efa 100644 --- a/data_utils/data.py +++ b/data_utils/data.py @@ -276,8 +276,8 @@ class DataGenerator(object): def reader(): for instance in manifest: inst = self.process_utterance(instance["audio_filepath"], - instance["text"]), - yield inst[0] + instance["text"]) + yield inst return reader diff --git a/data_utils/speech.py b/data_utils/speech.py index 0cea88730..e64109dc8 100644 --- a/data_utils/speech.py +++ b/data_utils/speech.py @@ -3,6 +3,7 @@ from __future__ import absolute_import from __future__ import division from __future__ import print_function +import numpy as np from data_utils.audio import AudioSegment