Extract common utility functions.

pull/2/head
yangyaming 7 years ago
parent 69e0d86ddb
commit d1420d121e

@ -12,12 +12,12 @@ from __future__ import print_function
import distutils.util import distutils.util
import os import os
import sys import sys
import tarfile
import argparse import argparse
import soundfile import soundfile
import json import json
import codecs import codecs
from paddle.v2.dataset.common import md5file from paddle.v2.dataset.common import md5file
from data_utils.utility import download, unpack
DATA_HOME = os.path.expanduser('~/.cache/paddle/dataset/speech') DATA_HOME = os.path.expanduser('~/.cache/paddle/dataset/speech')
@ -59,33 +59,6 @@ parser.add_argument(
args = parser.parse_args() args = parser.parse_args()
def download(url, md5sum, target_dir):
"""
Download file from url to target_dir, and check md5sum.
"""
if not os.path.exists(target_dir): os.makedirs(target_dir)
filepath = os.path.join(target_dir, url.split("/")[-1])
if not (os.path.exists(filepath) and md5file(filepath) == md5sum):
print("Downloading %s ..." % url)
os.system("wget -c " + url + " -P " + target_dir)
print("\nMD5 Chesksum %s ..." % filepath)
if not md5file(filepath) == md5sum:
raise RuntimeError("MD5 checksum failed.")
else:
print("File exists, skip downloading. (%s)" % filepath)
return filepath
def unpack(filepath, target_dir):
"""
Unpack the file to the target_dir.
"""
print("Unpacking %s ..." % filepath)
tar = tarfile.open(filepath)
tar.extractall(target_dir)
tar.close()
def create_manifest(data_dir, manifest_path): def create_manifest(data_dir, manifest_path):
""" """
Create a manifest json file summarizing the data set, with each line Create a manifest json file summarizing the data set, with each line

@ -5,6 +5,8 @@ from __future__ import print_function
import json import json
import codecs import codecs
import os
import tarfile
def read_manifest(manifest_path, max_duration=float('inf'), min_duration=0.0): def read_manifest(manifest_path, max_duration=float('inf'), min_duration=0.0):
@ -33,3 +35,28 @@ def read_manifest(manifest_path, max_duration=float('inf'), min_duration=0.0):
json_data["duration"] >= min_duration): json_data["duration"] >= min_duration):
manifest.append(json_data) manifest.append(json_data)
return manifest return manifest
def download(url, md5sum, target_dir):
"""Download file from url to target_dir, and check md5sum."""
if not os.path.exists(target_dir): os.makedirs(target_dir)
filepath = os.path.join(target_dir, url.split("/")[-1])
if not (os.path.exists(filepath) and md5file(filepath) == md5sum):
print("Downloading %s ..." % url)
os.system("wget -c " + url + " -P " + target_dir)
print("\nMD5 Chesksum %s ..." % filepath)
if not md5file(filepath) == md5sum:
raise RuntimeError("MD5 checksum failed.")
else:
print("File exists, skip downloading. (%s)" % filepath)
return filepath
def unpack(filepath, target_dir, rm_tar=False):
"""Unpack the file to the target_dir."""
print("Unpacking %s ..." % filepath)
tar = tarfile.open(filepath)
tar.extractall(target_dir)
tar.close()
if rm_tar == True:
os.remove(filepath)

Loading…
Cancel
Save