Merge pull request #517 from PaddlePaddle/1.8

update data source and remove useless code
pull/518/head
Hui Zhang 4 years ago committed by GitHub
commit afe49c5f07
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -19,6 +19,7 @@ from data_utils.utility import download, unpack
DATA_HOME = os.path.expanduser('~/.cache/paddle/dataset/speech')
URL_ROOT = 'http://www.openslr.org/resources/33'
URL_ROOT = 'https://openslr.magicdatatech.com/resources/33'
DATA_URL = URL_ROOT + '/data_aishell.tgz'
MD5_DATA = '2f494334227864a8a8fec932999db9d8'

@ -20,6 +20,7 @@ import io
from data_utils.utility import download, unpack
URL_ROOT = "http://www.openslr.org/resources/12"
URL_ROOT = "https://openslr.magicdatatech.com/resources/12"
URL_TEST_CLEAN = URL_ROOT + "/test-clean.tar.gz"
URL_TEST_OTHER = URL_ROOT + "/test-other.tar.gz"
URL_DEV_CLEAN = URL_ROOT + "/dev-clean.tar.gz"

@ -11,6 +11,7 @@ from __future__ import print_function
import os
import codecs
import datetime
import soundfile
import json
import argparse

@ -276,8 +276,8 @@ class DataGenerator(object):
def reader():
for instance in manifest:
inst = self.process_utterance(instance["audio_filepath"],
instance["text"]),
yield inst[0]
instance["text"])
yield inst
return reader

@ -3,6 +3,7 @@ from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import numpy as np
from data_utils.audio import AudioSegment

Loading…
Cancel
Save