diff --git a/examples/dataset/musan/.gitignore b/examples/dataset/musan/.gitignore
new file mode 100644
index 000000000..3f0d0616a
--- /dev/null
+++ b/examples/dataset/musan/.gitignore
@@ -0,0 +1,5 @@
+manifest.music
+manifest.noise
+manifest.speech
+musan/
+musan.tar.gz
diff --git a/examples/dataset/musan/musan.py b/examples/dataset/musan/musan.py
index 0d01057e4..87d8e5e10 100644
--- a/examples/dataset/musan/musan.py
+++ b/examples/dataset/musan/musan.py
@@ -33,7 +33,7 @@ DATA_HOME = os.path.expanduser('~/.cache/paddle/dataset/speech')
 
 URL_ROOT = 'https://www.openslr.org/resources/17'
 DATA_URL = URL_ROOT + '/musan.tar.gz'
-MD5_DATA = ''
+MD5_DATA = '0c472d4fc0c5141eca47ad1ffeb2a7df'
 
 parser = argparse.ArgumentParser(description=__doc__)
 parser.add_argument(
@@ -52,37 +52,24 @@ args = parser.parse_args()
 def create_manifest(data_dir, manifest_path_prefix):
     print("Creating manifest %s ..." % manifest_path_prefix)
     json_lines = []
-    transcript_path = os.path.join(data_dir, 'transcript',
-                                   'aishell_transcript_v0.8.txt')
-    transcript_dict = {}
-    for line in codecs.open(transcript_path, 'r', 'utf-8'):
-        line = line.strip()
-        if line == '': continue
-        audio_id, text = line.split(' ', 1)
-        # remove withespace
-        text = ''.join(text.split())
-        transcript_dict[audio_id] = text
-
-    data_types = ['train', 'dev', 'test']
+    data_types = ['music', 'noise', 'speech']
     for type in data_types:
         del json_lines[:]
-        audio_dir = os.path.join(data_dir, 'wav', type)
+        audio_dir = os.path.join(data_dir, type)
         for subfolder, _, filelist in sorted(os.walk(audio_dir)):
+            print('x, ', subfolder)
             for fname in filelist:
                 audio_path = os.path.join(subfolder, fname)
-                audio_id = fname[:-4]
-                # if no transcription for audio then skipped
-                if audio_id not in transcript_dict:
+                if not audio_path.endswith('.wav'):
                     continue
                 audio_data, samplerate = soundfile.read(audio_path)
                 duration = float(len(audio_data) / samplerate)
-                text = transcript_dict[audio_id]
                 json_lines.append(
                     json.dumps(
                         {
                             'audio_filepath': audio_path,
                             'duration': duration,
-                            'text': text
+                            'type': type,
                         },
                         ensure_ascii=False))
         manifest_path = manifest_path_prefix + '.' + type
@@ -93,15 +80,10 @@ def create_manifest(data_dir, manifest_path_prefix):
 
 def prepare_dataset(url, md5sum, target_dir, manifest_path):
     """Download, unpack and create manifest file."""
-    data_dir = os.path.join(target_dir, 'data_aishell')
+    data_dir = os.path.join(target_dir, 'musan')
     if not os.path.exists(data_dir):
         filepath = download(url, md5sum, target_dir)
         unpack(filepath, target_dir)
-        # unpack all audio tar files
-        audio_dir = os.path.join(data_dir, 'wav')
-        for subfolder, _, filelist in sorted(os.walk(audio_dir)):
-            for ftar in filelist:
-                unpack(os.path.join(subfolder, ftar), subfolder, True)
     else:
         print("Skip downloading and unpacking. Data already exists in %s." %
               target_dir)
diff --git a/examples/dataset/rir_noise/.gitignore b/examples/dataset/rir_noise/.gitignore
new file mode 100644
index 000000000..eb7588824
--- /dev/null
+++ b/examples/dataset/rir_noise/.gitignore
@@ -0,0 +1,5 @@
+RIRS_NOISES/
+manifest.pointsource_noises
+manifest.real_rirs_isotropic_noises
+manifest.simulated_rirs
+rirs_noises.zip
diff --git a/examples/dataset/rir_noise/rir_noise.py b/examples/dataset/rir_noise/rir_noise.py
index dd2b5c64f..643540c9b 100644
--- a/examples/dataset/rir_noise/rir_noise.py
+++ b/examples/dataset/rir_noise/rir_noise.py
@@ -27,13 +27,13 @@ import codecs
 import soundfile
 import json
 import argparse
-from data_utils.utility import download, unpack
+from utils.utility import download, unpack, unzip
 
 DATA_HOME = os.path.expanduser('~/.cache/paddle/dataset/speech')
 
 URL_ROOT = 'http://www.openslr.org/resources/28'
 DATA_URL = URL_ROOT + '/rirs_noises.zip'
-MD5_DATA = ''
+MD5_DATA = 'e6f48e257286e05de56413b4779d8ffb'
 
 parser = argparse.ArgumentParser(description=__doc__)
 parser.add_argument(
@@ -52,37 +52,25 @@ args = parser.parse_args()
 def create_manifest(data_dir, manifest_path_prefix):
     print("Creating manifest %s ..." % manifest_path_prefix)
     json_lines = []
-    transcript_path = os.path.join(data_dir, 'transcript',
-                                   'aishell_transcript_v0.8.txt')
-    transcript_dict = {}
-    for line in codecs.open(transcript_path, 'r', 'utf-8'):
-        line = line.strip()
-        if line == '': continue
-        audio_id, text = line.split(' ', 1)
-        # remove withespace
-        text = ''.join(text.split())
-        transcript_dict[audio_id] = text
-
-    data_types = ['train', 'dev', 'test']
+    data_types = [
+        'pointsource_noises', 'real_rirs_isotropic_noises', 'simulated_rirs'
+    ]
     for type in data_types:
         del json_lines[:]
-        audio_dir = os.path.join(data_dir, 'wav', type)
+        audio_dir = os.path.join(data_dir, type)
         for subfolder, _, filelist in sorted(os.walk(audio_dir)):
             for fname in filelist:
                 audio_path = os.path.join(subfolder, fname)
-                audio_id = fname[:-4]
-                # if no transcription for audio then skipped
-                if audio_id not in transcript_dict:
+                if not audio_path.endswith('.wav'):
                     continue
                 audio_data, samplerate = soundfile.read(audio_path)
                 duration = float(len(audio_data) / samplerate)
-                text = transcript_dict[audio_id]
                 json_lines.append(
                     json.dumps(
                         {
                             'audio_filepath': audio_path,
                             'duration': duration,
-                            'text': text
+                            'type': type,
                         },
                         ensure_ascii=False))
         manifest_path = manifest_path_prefix + '.' + type
@@ -92,16 +80,11 @@ def create_manifest(data_dir, manifest_path_prefix):
 
 
 def prepare_dataset(url, md5sum, target_dir, manifest_path):
-    """Download, unpack and create manifest file."""
-    data_dir = os.path.join(target_dir, 'data_aishell')
+    """Download, unzip and create manifest file."""
+    data_dir = os.path.join(target_dir, 'RIRS_NOISES')
     if not os.path.exists(data_dir):
         filepath = download(url, md5sum, target_dir)
-        unpack(filepath, target_dir)
-        # unpack all audio tar files
-        audio_dir = os.path.join(data_dir, 'wav')
-        for subfolder, _, filelist in sorted(os.walk(audio_dir)):
-            for ftar in filelist:
-                unpack(os.path.join(subfolder, ftar), subfolder, True)
+        unzip(filepath, target_dir)
     else:
         print("Skip downloading and unpacking. Data already exists in %s." %
               target_dir)
diff --git a/utils/utility.py b/utils/utility.py
index 1d3be04d4..b13bc1129 100644
--- a/utils/utility.py
+++ b/utils/utility.py
@@ -14,6 +14,7 @@
 
 import os
 import tarfile
+import zipfile
 from paddle.dataset.common import md5file
 
 
@@ -59,3 +60,13 @@ def unpack(filepath, target_dir, rm_tar=False):
     tar.close()
     if rm_tar == True:
         os.remove(filepath)
+
+
+def unzip(filepath, target_dir, rm_tar=False):
+    """Unzip the file to the target_dir."""
+    print("Unpacking %s ..." % filepath)
+    tar = zipfile.ZipFile(filepath, 'r')
+    tar.extractall(target_dir)
+    tar.close()
+    if rm_tar == True:
+        os.remove(filepath)