From 45f73c507cbb6b4e7e16d9b4988011bf97f8e446 Mon Sep 17 00:00:00 2001
From: Hui Zhang <zhtclz@foxmail.com>
Date: Mon, 1 Mar 2021 07:13:56 +0000
Subject: [PATCH] refactor repo fix decoding

---
 .gitignore                                    |   3 +-
 {data_utils => deepspeech}/__init__.py        |   0
 .../decoders}/__init__.py                     |   0
 .../decoders}/decoders_deprecated.py          |   0
 .../decoders}/scorer_deprecated.py            |   0
 .../decoders/swig}/__init__.py                |   0
 .../decoders}/swig/_init_paths.py             |   0
 .../swig/ctc_beam_search_decoder.cpp          |   0
 .../decoders}/swig/ctc_beam_search_decoder.h  |   0
 .../decoders}/swig/ctc_greedy_decoder.cpp     |   0
 .../decoders}/swig/ctc_greedy_decoder.h       |   0
 .../decoders}/swig/decoder_utils.cpp          |   0
 .../decoders}/swig/decoder_utils.h            |   0
 .../decoders}/swig/decoders.i                 |   0
 .../decoders}/swig/path_trie.cpp              |   0
 .../decoders}/swig/path_trie.h                |   0
 .../decoders}/swig/scorer.cpp                 |   0
 .../decoders}/swig/scorer.h                   |   0
 .../decoders}/swig/setup.py                   |   0
 .../decoders}/swig/setup.sh                   |   0
 .../decoders}/swig_wrapper.py                 |   0
 .../decoders}/tests/test_decoders.py          |   0
 {decoders => deepspeech/exps}/__init__.py     |   0
 .../exps/deepspeech2}/__init__.py             |   0
 .../exps/deepspeech2/bin/infer.py             |  11 +-
 .../exps/deepspeech2/bin/test.py              |  10 +-
 .../exps/deepspeech2/bin/train.py             |   8 +-
 .../exps/deepspeech2/bin/tune.py              |  36 ++---
 .../exps/deepspeech2}/config.py               |   4 -
 .../exps/deepspeech2}/dataset.py              |  10 +-
 .../exps/deepspeech2}/model.py                |  97 ++++++--------
 .../frontend}/__init__.py                     |   0
 {data_utils => deepspeech/frontend}/audio.py  |   0
 .../frontend/augmentor}/__init__.py           |   0
 .../frontend}/augmentor/augmentation.py       |  14 +-
 .../frontend}/augmentor/base.py               |   0
 .../frontend}/augmentor/impulse_response.py   |   6 +-
 .../frontend}/augmentor/noise_perturb.py      |   6 +-
 .../online_bayesian_normalization.py          |   2 +-
 .../frontend}/augmentor/resample.py           |   2 +-
 .../frontend}/augmentor/shift_perturb.py      |   2 +-
 .../frontend}/augmentor/speed_perturb.py      |   2 +-
 .../frontend}/augmentor/volume_perturb.py     |   2 +-
 .../frontend/featurizer}/__init__.py          |   2 -
 .../frontend}/featurizer/audio_featurizer.py  |   4 +-
 .../frontend}/featurizer/speech_featurizer.py |   4 +-
 .../frontend}/featurizer/text_featurizer.py   |   0
 .../frontend}/normalizer.py                   |   4 +-
 {data_utils => deepspeech/frontend}/speech.py |  27 ++--
 .../frontend}/utility.py                      |  49 +------
 deepspeech/models/__init__.py                 |  13 ++
 {model_utils => deepspeech/models}/network.py |  35 ++---
 deepspeech/modules/__init__.py                |  13 ++
 deepspeech/training/__init__.py               |  15 +++
 {training => deepspeech/training}/cli.py      |   3 +-
 {training => deepspeech/training}/trainer.py  |  43 +++---
 deepspeech/utils/__init__.py                  |  13 ++
 {utils => deepspeech/utils}/checkpoint.py     |   4 +-
 {utils => deepspeech/utils}/error_rate.py     |   0
 {utils => deepspeech/utils}/mp_tools.py       |   0
 deepspeech/utils/utility.py                   |  57 ++++++++
 deploy/demo_server.py                         |  11 +-
 examples/aishell/.gitignore                   |   2 +
 examples/aishell/conf/deepspeech2.yaml        |  10 +-
 examples/aishell/local/data.sh                |  11 +-
 .../aishell/local}/download_lm_ch.sh          |   7 +-
 .../aishell/local}/download_model.sh          |   9 +-
 examples/aishell/local/infer.sh               |   6 +-
 examples/aishell/local/infer_golden.sh        |  18 +--
 examples/aishell/local/test.sh                |   9 +-
 examples/aishell/local/test_golden.sh         |  41 ++----
 examples/aishell/local/train.sh               |   9 +-
 examples/aishell/local/tune.sh                |   2 +-
 examples/aishell/models                       |   1 -
 examples/aishell/path.sh                      |   3 +
 examples/aishell/run.sh                       |  13 +-
 .../{conf => aug_conf}/augmentation.config    |   0
 .../augmentation.config.example               |   0
 .../baidu_en8k}/download_lm_en.sh             |   8 +-
 .../baidu_en8k/download_model.sh              |   7 +-
 examples/baidu_en8k/path.sh                   |   5 +
 examples/baidu_en8k/run_infer_golden.sh       |  13 +-
 examples/baidu_en8k/run_test_golden.sh        |  14 +-
 examples/dataset/aishell/.gitignore           |   1 +
 .../local => dataset/aishell}/aishell.py      |   2 +-
 .../chime3_background}/chime3_background.py   |   3 +-
 examples/dataset/librispeech/.gitignore       |   7 +
 .../librispeech}/librispeech.py               |   4 +-
 examples/dataset/mini_librispeech/.gitignore  |   4 +
 .../mini_librispeech/mini_librispeech.py      | 115 ++++++++++++++++
 examples/dataset/musan/musan.py               | 123 ++++++++++++++++++
 examples/dataset/rir_noise/rir_noise.py       | 123 ++++++++++++++++++
 .../dataset}/voxforge/run_data.sh             |   9 +-
 .../dataset}/voxforge/voxforge.py             |   4 +-
 examples/librispeech/.gitignore               |   2 +
 examples/librispeech/conf/deepspeech2.yaml    |  24 ++--
 examples/librispeech/local/data.sh            |  13 +-
 examples/librispeech/local/download_lm_en.sh  |  20 +++
 .../librispeech/local}/download_model.sh      |  10 +-
 examples/librispeech/local/infer.sh           |  34 +----
 examples/librispeech/local/infer_golden.sh    |  10 +-
 examples/librispeech/local/test.sh            |  33 +----
 examples/librispeech/local/test_golden.sh     |  19 +--
 examples/librispeech/local/train.sh           |  37 ++----
 examples/librispeech/local/tune.sh            |  25 ++--
 examples/librispeech/models                   |   1 -
 examples/librispeech/path.sh                  |   4 +
 examples/librispeech/run.sh                   |  10 +-
 examples/tiny/.gitignore                      |   2 +
 examples/tiny/conf/deepspeech2.yaml           |   7 +-
 examples/tiny/local/data.sh                   |  16 +--
 examples/tiny/local/download_lm_en.sh         |  20 +++
 examples/tiny/local/download_model.sh         |  21 +++
 examples/tiny/local/infer.sh                  |   8 +-
 examples/tiny/local/infer_golden.sh           |  10 +-
 examples/tiny/local/test.sh                   |   6 +-
 examples/tiny/local/test_golden.sh            |   9 +-
 examples/tiny/local/train.sh                  |   3 +-
 examples/tiny/local/tune.sh                   |  25 ++--
 examples/tiny/models                          |   1 -
 examples/tiny/path.sh                         |   4 +
 examples/tiny/run.sh                          |   3 -
 dataloader.ipynb => notebook/dataloader.ipynb |   0
 train_test.ipynb => notebook/train_test.ipynb |   0
 setup.sh                                      |   2 +-
 tests/network_test.py                         |   3 +-
 {utils/tests => tests}/test_error_rate.py     |   2 +-
 {tools => utils}/build_vocab.py               |   8 +-
 {tools => utils}/compute_mean_std.py          |   8 +-
 {tools => utils}/profile.sh                   |   0
 utils/utility.py                              |  90 +++++++------
 131 files changed, 968 insertions(+), 617 deletions(-)
 rename {data_utils => deepspeech}/__init__.py (100%)
 rename {data_utils/augmentor => deepspeech/decoders}/__init__.py (100%)
 rename {decoders => deepspeech/decoders}/decoders_deprecated.py (100%)
 rename {decoders => deepspeech/decoders}/scorer_deprecated.py (100%)
 rename {data_utils/featurizer => deepspeech/decoders/swig}/__init__.py (100%)
 rename {decoders => deepspeech/decoders}/swig/_init_paths.py (100%)
 rename {decoders => deepspeech/decoders}/swig/ctc_beam_search_decoder.cpp (100%)
 rename {decoders => deepspeech/decoders}/swig/ctc_beam_search_decoder.h (100%)
 rename {decoders => deepspeech/decoders}/swig/ctc_greedy_decoder.cpp (100%)
 rename {decoders => deepspeech/decoders}/swig/ctc_greedy_decoder.h (100%)
 rename {decoders => deepspeech/decoders}/swig/decoder_utils.cpp (100%)
 rename {decoders => deepspeech/decoders}/swig/decoder_utils.h (100%)
 rename {decoders => deepspeech/decoders}/swig/decoders.i (100%)
 rename {decoders => deepspeech/decoders}/swig/path_trie.cpp (100%)
 rename {decoders => deepspeech/decoders}/swig/path_trie.h (100%)
 rename {decoders => deepspeech/decoders}/swig/scorer.cpp (100%)
 rename {decoders => deepspeech/decoders}/swig/scorer.h (100%)
 rename {decoders => deepspeech/decoders}/swig/setup.py (100%)
 rename {decoders => deepspeech/decoders}/swig/setup.sh (100%)
 rename {decoders => deepspeech/decoders}/swig_wrapper.py (100%)
 rename {decoders => deepspeech/decoders}/tests/test_decoders.py (100%)
 rename {decoders => deepspeech/exps}/__init__.py (100%)
 rename {decoders/swig => deepspeech/exps/deepspeech2}/__init__.py (100%)
 rename infer.py => deepspeech/exps/deepspeech2/bin/infer.py (79%)
 rename test.py => deepspeech/exps/deepspeech2/bin/test.py (81%)
 rename train.py => deepspeech/exps/deepspeech2/bin/train.py (85%)
 rename tune.py => deepspeech/exps/deepspeech2/bin/tune.py (89%)
 rename {model_utils => deepspeech/exps/deepspeech2}/config.py (93%)
 rename {data_utils => deepspeech/exps/deepspeech2}/dataset.py (98%)
 rename {model_utils => deepspeech/exps/deepspeech2}/model.py (89%)
 rename {model_utils => deepspeech/frontend}/__init__.py (100%)
 rename {data_utils => deepspeech/frontend}/audio.py (100%)
 rename {utils => deepspeech/frontend/augmentor}/__init__.py (100%)
 rename {data_utils => deepspeech/frontend}/augmentor/augmentation.py (90%)
 rename {data_utils => deepspeech/frontend}/augmentor/base.py (100%)
 rename {data_utils => deepspeech/frontend}/augmentor/impulse_response.py (90%)
 rename {data_utils => deepspeech/frontend}/augmentor/noise_perturb.py (93%)
 rename {data_utils => deepspeech/frontend}/augmentor/online_bayesian_normalization.py (97%)
 rename {data_utils => deepspeech/frontend}/augmentor/resample.py (95%)
 rename {data_utils => deepspeech/frontend}/augmentor/shift_perturb.py (96%)
 rename {data_utils => deepspeech/frontend}/augmentor/speed_perturb.py (97%)
 rename {data_utils => deepspeech/frontend}/augmentor/volume_perturb.py (96%)
 rename {training => deepspeech/frontend/featurizer}/__init__.py (95%)
 rename {data_utils => deepspeech/frontend}/featurizer/audio_featurizer.py (98%)
 rename {data_utils => deepspeech/frontend}/featurizer/speech_featurizer.py (95%)
 rename {data_utils => deepspeech/frontend}/featurizer/text_featurizer.py (100%)
 rename {data_utils => deepspeech/frontend}/normalizer.py (97%)
 rename {data_utils => deepspeech/frontend}/speech.py (91%)
 rename {data_utils => deepspeech/frontend}/utility.py (53%)
 create mode 100644 deepspeech/models/__init__.py
 rename {model_utils => deepspeech/models}/network.py (96%)
 create mode 100644 deepspeech/modules/__init__.py
 create mode 100644 deepspeech/training/__init__.py
 rename {training => deepspeech/training}/cli.py (88%)
 rename {training => deepspeech/training}/trainer.py (93%)
 create mode 100644 deepspeech/utils/__init__.py
 rename {utils => deepspeech/utils}/checkpoint.py (98%)
 rename {utils => deepspeech/utils}/error_rate.py (100%)
 rename {utils => deepspeech/utils}/mp_tools.py (100%)
 create mode 100644 deepspeech/utils/utility.py
 create mode 100644 examples/aishell/.gitignore
 rename {models/lm => examples/aishell/local}/download_lm_ch.sh (73%)
 rename {models/aishell => examples/aishell/local}/download_model.sh (68%)
 delete mode 120000 examples/aishell/models
 rename examples/{conf => aug_conf}/augmentation.config (100%)
 rename examples/{conf => aug_conf}/augmentation.config.example (100%)
 rename {models/lm => examples/baidu_en8k}/download_lm_en.sh (73%)
 rename {models => examples}/baidu_en8k/download_model.sh (73%)
 create mode 100644 examples/dataset/aishell/.gitignore
 rename examples/{aishell/local => dataset/aishell}/aishell.py (98%)
 rename {data/noise => examples/dataset/chime3_background}/chime3_background.py (97%)
 create mode 100644 examples/dataset/librispeech/.gitignore
 rename examples/{librispeech/local => dataset/librispeech}/librispeech.py (98%)
 create mode 100644 examples/dataset/mini_librispeech/.gitignore
 create mode 100644 examples/dataset/mini_librispeech/mini_librispeech.py
 create mode 100644 examples/dataset/musan/musan.py
 create mode 100644 examples/dataset/rir_noise/rir_noise.py
 rename {data => examples/dataset}/voxforge/run_data.sh (58%)
 rename {data => examples/dataset}/voxforge/voxforge.py (98%)
 create mode 100644 examples/librispeech/.gitignore
 create mode 100644 examples/librispeech/local/download_lm_en.sh
 rename {models/librispeech => examples/librispeech/local}/download_model.sh (68%)
 delete mode 120000 examples/librispeech/models
 create mode 100644 examples/tiny/.gitignore
 create mode 100644 examples/tiny/local/download_lm_en.sh
 create mode 100644 examples/tiny/local/download_model.sh
 delete mode 120000 examples/tiny/models
 rename dataloader.ipynb => notebook/dataloader.ipynb (100%)
 rename train_test.ipynb => notebook/train_test.ipynb (100%)
 rename {utils/tests => tests}/test_error_rate.py (99%)
 rename {tools => utils}/build_vocab.py (92%)
 rename {tools => utils}/compute_mean_std.py (87%)
 rename {tools => utils}/profile.sh (100%)

diff --git a/.gitignore b/.gitignore
index 2ec11b5ee..dee7e4b33 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,5 +1,4 @@
 .DS_Store
 *.pyc
 tools/venv
-dataset
-models/*
+.vscode
diff --git a/data_utils/__init__.py b/deepspeech/__init__.py
similarity index 100%
rename from data_utils/__init__.py
rename to deepspeech/__init__.py
diff --git a/data_utils/augmentor/__init__.py b/deepspeech/decoders/__init__.py
similarity index 100%
rename from data_utils/augmentor/__init__.py
rename to deepspeech/decoders/__init__.py
diff --git a/decoders/decoders_deprecated.py b/deepspeech/decoders/decoders_deprecated.py
similarity index 100%
rename from decoders/decoders_deprecated.py
rename to deepspeech/decoders/decoders_deprecated.py
diff --git a/decoders/scorer_deprecated.py b/deepspeech/decoders/scorer_deprecated.py
similarity index 100%
rename from decoders/scorer_deprecated.py
rename to deepspeech/decoders/scorer_deprecated.py
diff --git a/data_utils/featurizer/__init__.py b/deepspeech/decoders/swig/__init__.py
similarity index 100%
rename from data_utils/featurizer/__init__.py
rename to deepspeech/decoders/swig/__init__.py
diff --git a/decoders/swig/_init_paths.py b/deepspeech/decoders/swig/_init_paths.py
similarity index 100%
rename from decoders/swig/_init_paths.py
rename to deepspeech/decoders/swig/_init_paths.py
diff --git a/decoders/swig/ctc_beam_search_decoder.cpp b/deepspeech/decoders/swig/ctc_beam_search_decoder.cpp
similarity index 100%
rename from decoders/swig/ctc_beam_search_decoder.cpp
rename to deepspeech/decoders/swig/ctc_beam_search_decoder.cpp
diff --git a/decoders/swig/ctc_beam_search_decoder.h b/deepspeech/decoders/swig/ctc_beam_search_decoder.h
similarity index 100%
rename from decoders/swig/ctc_beam_search_decoder.h
rename to deepspeech/decoders/swig/ctc_beam_search_decoder.h
diff --git a/decoders/swig/ctc_greedy_decoder.cpp b/deepspeech/decoders/swig/ctc_greedy_decoder.cpp
similarity index 100%
rename from decoders/swig/ctc_greedy_decoder.cpp
rename to deepspeech/decoders/swig/ctc_greedy_decoder.cpp
diff --git a/decoders/swig/ctc_greedy_decoder.h b/deepspeech/decoders/swig/ctc_greedy_decoder.h
similarity index 100%
rename from decoders/swig/ctc_greedy_decoder.h
rename to deepspeech/decoders/swig/ctc_greedy_decoder.h
diff --git a/decoders/swig/decoder_utils.cpp b/deepspeech/decoders/swig/decoder_utils.cpp
similarity index 100%
rename from decoders/swig/decoder_utils.cpp
rename to deepspeech/decoders/swig/decoder_utils.cpp
diff --git a/decoders/swig/decoder_utils.h b/deepspeech/decoders/swig/decoder_utils.h
similarity index 100%
rename from decoders/swig/decoder_utils.h
rename to deepspeech/decoders/swig/decoder_utils.h
diff --git a/decoders/swig/decoders.i b/deepspeech/decoders/swig/decoders.i
similarity index 100%
rename from decoders/swig/decoders.i
rename to deepspeech/decoders/swig/decoders.i
diff --git a/decoders/swig/path_trie.cpp b/deepspeech/decoders/swig/path_trie.cpp
similarity index 100%
rename from decoders/swig/path_trie.cpp
rename to deepspeech/decoders/swig/path_trie.cpp
diff --git a/decoders/swig/path_trie.h b/deepspeech/decoders/swig/path_trie.h
similarity index 100%
rename from decoders/swig/path_trie.h
rename to deepspeech/decoders/swig/path_trie.h
diff --git a/decoders/swig/scorer.cpp b/deepspeech/decoders/swig/scorer.cpp
similarity index 100%
rename from decoders/swig/scorer.cpp
rename to deepspeech/decoders/swig/scorer.cpp
diff --git a/decoders/swig/scorer.h b/deepspeech/decoders/swig/scorer.h
similarity index 100%
rename from decoders/swig/scorer.h
rename to deepspeech/decoders/swig/scorer.h
diff --git a/decoders/swig/setup.py b/deepspeech/decoders/swig/setup.py
similarity index 100%
rename from decoders/swig/setup.py
rename to deepspeech/decoders/swig/setup.py
diff --git a/decoders/swig/setup.sh b/deepspeech/decoders/swig/setup.sh
similarity index 100%
rename from decoders/swig/setup.sh
rename to deepspeech/decoders/swig/setup.sh
diff --git a/decoders/swig_wrapper.py b/deepspeech/decoders/swig_wrapper.py
similarity index 100%
rename from decoders/swig_wrapper.py
rename to deepspeech/decoders/swig_wrapper.py
diff --git a/decoders/tests/test_decoders.py b/deepspeech/decoders/tests/test_decoders.py
similarity index 100%
rename from decoders/tests/test_decoders.py
rename to deepspeech/decoders/tests/test_decoders.py
diff --git a/decoders/__init__.py b/deepspeech/exps/__init__.py
similarity index 100%
rename from decoders/__init__.py
rename to deepspeech/exps/__init__.py
diff --git a/decoders/swig/__init__.py b/deepspeech/exps/deepspeech2/__init__.py
similarity index 100%
rename from decoders/swig/__init__.py
rename to deepspeech/exps/deepspeech2/__init__.py
diff --git a/infer.py b/deepspeech/exps/deepspeech2/bin/infer.py
similarity index 79%
rename from infer.py
rename to deepspeech/exps/deepspeech2/bin/infer.py
index 52d3a6744..6f52c812f 100644
--- a/infer.py
+++ b/deepspeech/exps/deepspeech2/bin/infer.py
@@ -20,12 +20,13 @@ import functools
 
 from paddle import distributed as dist
 
-from utils.utility import print_arguments
-from training.cli import default_argument_parser
+from deepspeech.training.cli import default_argument_parser
+from deepspeech.utils.utility import print_arguments
+from deepspeech.utils.error_rate import char_errors, word_errors
 
-from model_utils.config import get_cfg_defaults
-from model_utils.model import DeepSpeech2Tester as Tester
-from utils.error_rate import char_errors, word_errors
+# TODO(hui zhang): dynamic load 
+from deepspeech.exps.deepspeech2.config import get_cfg_defaults
+from deepspeech.exps.deepspeech2.model import DeepSpeech2Tester as Tester
 
 
 def main_sp(config, args):
diff --git a/test.py b/deepspeech/exps/deepspeech2/bin/test.py
similarity index 81%
rename from test.py
rename to deepspeech/exps/deepspeech2/bin/test.py
index 7758ddfd2..72b38f485 100644
--- a/test.py
+++ b/deepspeech/exps/deepspeech2/bin/test.py
@@ -20,12 +20,12 @@ import functools
 
 from paddle import distributed as dist
 
-from utils.utility import print_arguments
-from training.cli import default_argument_parser
+from deepspeech.training.cli import default_argument_parser
+from deepspeech.utils.utility import print_arguments
+from deepspeech.utils.error_rate import char_errors, word_errors
 
-from model_utils.config import get_cfg_defaults
-from model_utils.model import DeepSpeech2Tester as Tester
-from utils.error_rate import char_errors, word_errors
+from deepspeech.exps.deepspeech2.config import get_cfg_defaults
+from deepspeech.exps.deepspeech2.model import DeepSpeech2Tester as Tester
 
 
 def main_sp(config, args):
diff --git a/train.py b/deepspeech/exps/deepspeech2/bin/train.py
similarity index 85%
rename from train.py
rename to deepspeech/exps/deepspeech2/bin/train.py
index 87bd33d07..0c1d08914 100644
--- a/train.py
+++ b/deepspeech/exps/deepspeech2/bin/train.py
@@ -20,11 +20,11 @@ import functools
 
 from paddle import distributed as dist
 
-from utils.utility import print_arguments
-from training.cli import default_argument_parser
+from deepspeech.utils.utility import print_arguments
+from deepspeech.training.cli import default_argument_parser
 
-from model_utils.config import get_cfg_defaults
-from model_utils.model import DeepSpeech2Trainer as Trainer
+from deepspeech.exps.deepspeech2.config import get_cfg_defaults
+from deepspeech.exps.deepspeech2.model import DeepSpeech2Trainer as Trainer
 
 
 def main_sp(config, args):
diff --git a/tune.py b/deepspeech/exps/deepspeech2/bin/tune.py
similarity index 89%
rename from tune.py
rename to deepspeech/exps/deepspeech2/bin/tune.py
index b269265ae..33ecfe926 100644
--- a/tune.py
+++ b/deepspeech/exps/deepspeech2/bin/tune.py
@@ -20,22 +20,21 @@ import argparse
 import functools
 import gzip
 import logging
-import paddle.fluid as fluid
 
-from training.cli import default_argument_parser
-from model_utils.config import get_cfg_defaults
-
-from data_utils.dataset import SpeechCollator
-from data_utils.dataset import DeepSpeech2Dataset
-from data_utils.dataset import DeepSpeech2DistributedBatchSampler
-from data_utils.dataset import DeepSpeech2BatchSampler
 from paddle.io import DataLoader
 
-from model_utils.network import DeepSpeech2
-from model_utils.network import DeepSpeech2Loss
+from deepspeech.training.cli import default_argument_parser
+from deepspeech.utils.error_rate import char_errors, word_errors
+from deepspeech.utils.utility import add_arguments, print_arguments
+
+from deepspeech.models.network import DeepSpeech2
+from deepspeech.models.network import DeepSpeech2Loss
 
-from utils.error_rate import char_errors, word_errors
-from utils.utility import add_arguments, print_arguments
+from deepspeech.exps.deepspeech2.dataset import SpeechCollator
+from deepspeech.exps.deepspeech2.dataset import DeepSpeech2Dataset
+from deepspeech.exps.deepspeech2.dataset import DeepSpeech2DistributedBatchSampler
+from deepspeech.exps.deepspeech2.dataset import DeepSpeech2BatchSampler
+from deepspeech.exps.deepspeech2.config import get_cfg_defaults
 
 
 def tune(config, args):
@@ -114,7 +113,7 @@ def tune(config, args):
             return trans
 
         audio, text, audio_len, text_len = infer_data
-        _, probs, _ = model.predict(audio, audio_len)
+        _, probs, logits_lens = model.predict(audio, audio_len)
         target_transcripts = ordid2token(text, text_len)
         num_ins += audio.shape[0]
 
@@ -122,17 +121,17 @@ def tune(config, args):
         for index, (alpha, beta) in enumerate(params_grid):
             print(f"tuneing: alpha={alpha} beta={beta}")
             result_transcripts = model.decode_probs(
-                probs.numpy(), vocab_list, config.decoding.decoding_method,
+                probs.numpy(), logits_lens, vocab_list,
+                config.decoding.decoding_method,
                 config.decoding.lang_model_path, alpha, beta,
                 config.decoding.beam_size, config.decoding.cutoff_prob,
                 config.decoding.cutoff_top_n, config.decoding.num_proc_bsearch)
 
             for target, result in zip(target_transcripts, result_transcripts):
-                #print(f"tuneing: {target} {result}")
                 errors, len_ref = errors_func(target, result)
                 err_sum[index] += errors
 
-                # accumulate the length of references of every batch
+                # accumulate the length of references of every batchπ
                 # in the first iteration
                 if args.alpha_from == alpha and args.beta_from == beta:
                     len_refs += len_ref
@@ -148,8 +147,9 @@ def tune(config, args):
         min_index = err_ave.index(err_ave_min)
         print("\nBatch %d [%d/?], current opt (alpha, beta) = (%s, %s), "
               " min [%s] = %f" %
-              (cur_batch, num_ins, "%.3f" % params_grid[min_index][0], "%.3f" %
-               params_grid[min_index][1], args.error_rate_type, err_ave_min))
+              (cur_batch, num_ins, "%.3f" % params_grid[min_index][0],
+               "%.3f" % params_grid[min_index][1],
+               config.decoding.error_rate_type, err_ave_min))
         cur_batch += 1
 
     # output WER/CER at every (alpha, beta)
diff --git a/model_utils/config.py b/deepspeech/exps/deepspeech2/config.py
similarity index 93%
rename from model_utils/config.py
rename to deepspeech/exps/deepspeech2/config.py
index a6b99a61d..455f5b6c1 100644
--- a/model_utils/config.py
+++ b/deepspeech/exps/deepspeech2/config.py
@@ -56,10 +56,6 @@ _C.training = CN(
         lr_decay=1.0,  # learning rate decay
         weight_decay=1e-6,  # the coeff of weight decay
         global_grad_clip=5.0,  # the global norm clip
-        plot_interval=1000,  # plot attention and spectrogram by step
-        valid_interval=1000,  # validation by step
-        save_interval=1000,  # checkpoint by step
-        max_iteration=500000,  # max iteration to train by step
         n_epoch=50,  # train epochs
     ))
 
diff --git a/data_utils/dataset.py b/deepspeech/exps/deepspeech2/dataset.py
similarity index 98%
rename from data_utils/dataset.py
rename to deepspeech/exps/deepspeech2/dataset.py
index 6be0c0455..72e3d840d 100644
--- a/data_utils/dataset.py
+++ b/deepspeech/exps/deepspeech2/dataset.py
@@ -27,11 +27,11 @@ from paddle.io import BatchSampler
 from paddle.io import DistributedBatchSampler
 from paddle import distributed as dist
 
-from data_utils.utility import read_manifest
-from data_utils.augmentor.augmentation import AugmentationPipeline
-from data_utils.featurizer.speech_featurizer import SpeechFeaturizer
-from data_utils.speech import SpeechSegment
-from data_utils.normalizer import FeatureNormalizer
+from deepspeech.frontend.utility import read_manifest
+from deepspeech.frontend.augmentor.augmentation import AugmentationPipeline
+from deepspeech.frontend.featurizer.speech_featurizer import SpeechFeaturizer
+from deepspeech.frontend.speech import SpeechSegment
+from deepspeech.frontend.normalizer import FeatureNormalizer
 
 logger = logging.getLogger(__name__)
 
diff --git a/model_utils/model.py b/deepspeech/exps/deepspeech2/model.py
similarity index 89%
rename from model_utils/model.py
rename to deepspeech/exps/deepspeech2/model.py
index 6520d94a3..633569fcf 100644
--- a/model_utils/model.py
+++ b/deepspeech/exps/deepspeech2/model.py
@@ -29,26 +29,23 @@ from paddle.io import DataLoader
 
 from paddle.fluid.dygraph import base as imperative_base
 from paddle.fluid import layers
-from paddle.fluid import framework
 from paddle.fluid import core
-from paddle.fluid import name_scope
 
-from utils import mp_tools
-from training import Trainer
+from deepspeech.training import Trainer
+from deepspeech.utils import mp_tools
+from deepspeech.utils.error_rate import char_errors, word_errors, cer, wer
 
-from model_utils.network import DeepSpeech2
-from model_utils.network import DeepSpeech2Loss
+from deepspeech.models.network import DeepSpeech2
+from deepspeech.models.network import DeepSpeech2Loss
 
-from data_utils.dataset import SpeechCollator
-from data_utils.dataset import DeepSpeech2Dataset
-from data_utils.dataset import DeepSpeech2DistributedBatchSampler
-from data_utils.dataset import DeepSpeech2BatchSampler
+from deepspeech.decoders.swig_wrapper import Scorer
+from deepspeech.decoders.swig_wrapper import ctc_greedy_decoder
+from deepspeech.decoders.swig_wrapper import ctc_beam_search_decoder_batch
 
-from decoders.swig_wrapper import Scorer
-from decoders.swig_wrapper import ctc_greedy_decoder
-from decoders.swig_wrapper import ctc_beam_search_decoder_batch
-
-from utils.error_rate import char_errors, word_errors, cer, wer
+from deepspeech.exps.deepspeech2.dataset import SpeechCollator
+from deepspeech.exps.deepspeech2.dataset import DeepSpeech2Dataset
+from deepspeech.exps.deepspeech2.dataset import DeepSpeech2DistributedBatchSampler
+from deepspeech.exps.deepspeech2.dataset import DeepSpeech2BatchSampler
 
 logger = logging.getLogger(__name__)
 
@@ -161,46 +158,6 @@ class DeepSpeech2Trainer(Trainer):
                 self.visualizer.add_scalar("train/{}".format(k), v,
                                            self.iteration)
 
-    def new_epoch(self):
-        """Reset the train loader and increment ``epoch``.
-        """
-        if self.parallel:
-            # batch sampler epoch start from 0
-            self.train_loader.batch_sampler.set_epoch(self.epoch)
-        self.epoch += 1
-
-    def train(self):
-        """The training process.
-        
-        It includes forward/backward/update and periodical validation and 
-        saving.
-        """
-        self.logger.info(
-            f"Train Total Examples: {len(self.train_loader.dataset)}")
-        self.new_epoch()
-        while self.epoch <= self.config.training.n_epoch:
-            try:
-                for batch in self.train_loader:
-                    self.iteration += 1
-                    self.train_batch(batch)
-
-                    # if self.iteration % self.config.training.valid_interval == 0:
-                    #     self.valid()
-
-                    # if self.iteration % self.config.training.save_interval == 0:
-                    #     self.save()
-            except Exception as e:
-                self.logger.error(e)
-                pass
-
-            self.valid()
-            self.save()
-            self.lr_scheduler.step()
-            self.new_epoch()
-
-    def compute_metrics(self, inputs, outputs):
-        pass
-
     @mp_tools.rank_zero_only
     @paddle.no_grad()
     def valid(self):
@@ -212,7 +169,7 @@ class DeepSpeech2Trainer(Trainer):
             audio, text, audio_len, text_len = batch
             outputs = self.model(*batch)
             loss = self.compute_losses(batch, outputs)
-            metrics = self.compute_metrics(batch, outputs)
+            #metrics = self.compute_metrics(batch, outputs)
 
             valid_losses['val_loss'].append(float(loss))
             valid_losses['val_loss_div_batchsize'].append(
@@ -373,6 +330,7 @@ class DeepSpeech2Tester(DeepSpeech2Trainer):
         target_transcripts = self.ordid2token(texts, texts_len)
         result_transcripts = self.model.decode_probs(
             probs.numpy(),
+            logits_len,
             vocab_list,
             decoding_method=cfg.decoding_method,
             lang_model_path=cfg.lang_model_path,
@@ -446,15 +404,37 @@ class DeepSpeech2Tester(DeepSpeech2Trainer):
         """
         # output dir
         if self.args.output:
-            output_dir = Path(self.args.output).expanduser() / "infer"
+            output_dir = Path(self.args.output).expanduser()
             output_dir.mkdir(parents=True, exist_ok=True)
         else:
             output_dir = Path(
-                self.args.checkpoint_path).expanduser().parent.parent / "infer"
+                self.args.checkpoint_path).expanduser().parent.parent
             output_dir.mkdir(parents=True, exist_ok=True)
 
         self.output_dir = output_dir
 
+    def setup_logger(self):
+        """Initialize a text logger to log the experiment.
+        
+        Each process has its own text logger. The logging message is write to 
+        the standard output and a text file named ``worker_n.log`` in the 
+        output directory, where ``n`` means the rank of the process. 
+        """
+        format = '[%(levelname)s %(asctime)s %(filename)s:%(lineno)d] %(message)s'
+        formatter = logging.Formatter(fmt=format, datefmt='%Y/%m/%d %H:%M:%S')
+
+        logger.setLevel("INFO")
+
+        # global logger
+        stdout = True
+        save_path = ""
+        logging.basicConfig(
+            level=logging.DEBUG if stdout else logging.INFO,
+            format=format,
+            datefmt='%Y/%m/%d %H:%M:%S',
+            filename=save_path if not stdout else None)
+        self.logger = logger
+
     def setup(self):
         """Setup the experiment.
         """
@@ -463,6 +443,7 @@ class DeepSpeech2Tester(DeepSpeech2Trainer):
             self.init_parallel()
 
         self.setup_output_dir()
+        self.setup_checkpointer()
         self.setup_logger()
 
         self.setup_dataloader()
diff --git a/model_utils/__init__.py b/deepspeech/frontend/__init__.py
similarity index 100%
rename from model_utils/__init__.py
rename to deepspeech/frontend/__init__.py
diff --git a/data_utils/audio.py b/deepspeech/frontend/audio.py
similarity index 100%
rename from data_utils/audio.py
rename to deepspeech/frontend/audio.py
diff --git a/utils/__init__.py b/deepspeech/frontend/augmentor/__init__.py
similarity index 100%
rename from utils/__init__.py
rename to deepspeech/frontend/augmentor/__init__.py
diff --git a/data_utils/augmentor/augmentation.py b/deepspeech/frontend/augmentor/augmentation.py
similarity index 90%
rename from data_utils/augmentor/augmentation.py
rename to deepspeech/frontend/augmentor/augmentation.py
index f36d993e1..e50084a00 100644
--- a/data_utils/augmentor/augmentation.py
+++ b/deepspeech/frontend/augmentor/augmentation.py
@@ -15,13 +15,13 @@
 
 import json
 import random
-from data_utils.augmentor.volume_perturb import VolumePerturbAugmentor
-from data_utils.augmentor.shift_perturb import ShiftPerturbAugmentor
-from data_utils.augmentor.speed_perturb import SpeedPerturbAugmentor
-from data_utils.augmentor.noise_perturb import NoisePerturbAugmentor
-from data_utils.augmentor.impulse_response import ImpulseResponseAugmentor
-from data_utils.augmentor.resample import ResampleAugmentor
-from data_utils.augmentor.online_bayesian_normalization import \
+from deepspeech.frontend.augmentor.volume_perturb import VolumePerturbAugmentor
+from deepspeech.frontend.augmentor.shift_perturb import ShiftPerturbAugmentor
+from deepspeech.frontend.augmentor.speed_perturb import SpeedPerturbAugmentor
+from deepspeech.frontend.augmentor.noise_perturb import NoisePerturbAugmentor
+from deepspeech.frontend.augmentor.impulse_response import ImpulseResponseAugmentor
+from deepspeech.frontend.augmentor.resample import ResampleAugmentor
+from deepspeech.frontend.augmentor.online_bayesian_normalization import \
      OnlineBayesianNormalizationAugmentor
 
 
diff --git a/data_utils/augmentor/base.py b/deepspeech/frontend/augmentor/base.py
similarity index 100%
rename from data_utils/augmentor/base.py
rename to deepspeech/frontend/augmentor/base.py
diff --git a/data_utils/augmentor/impulse_response.py b/deepspeech/frontend/augmentor/impulse_response.py
similarity index 90%
rename from data_utils/augmentor/impulse_response.py
rename to deepspeech/frontend/augmentor/impulse_response.py
index 839c6a809..40aa3d47e 100644
--- a/data_utils/augmentor/impulse_response.py
+++ b/deepspeech/frontend/augmentor/impulse_response.py
@@ -13,9 +13,9 @@
 # limitations under the License.
 """Contains the impulse response augmentation model."""
 
-from data_utils.augmentor.base import AugmentorBase
-from data_utils.utility import read_manifest
-from data_utils.audio import AudioSegment
+from deepspeech.frontend.augmentor.base import AugmentorBase
+from deepspeech.frontend.utility import read_manifest
+from deepspeech.frontend.audio import AudioSegment
 
 
 class ImpulseResponseAugmentor(AugmentorBase):
diff --git a/data_utils/augmentor/noise_perturb.py b/deepspeech/frontend/augmentor/noise_perturb.py
similarity index 93%
rename from data_utils/augmentor/noise_perturb.py
rename to deepspeech/frontend/augmentor/noise_perturb.py
index 954d1b419..350370b8f 100644
--- a/data_utils/augmentor/noise_perturb.py
+++ b/deepspeech/frontend/augmentor/noise_perturb.py
@@ -13,9 +13,9 @@
 # limitations under the License.
 """Contains the noise perturb augmentation model."""
 
-from data_utils.augmentor.base import AugmentorBase
-from data_utils.utility import read_manifest
-from data_utils.audio import AudioSegment
+from deepspeech.frontend.augmentor.base import AugmentorBase
+from deepspeech.frontend.utility import read_manifest
+from deepspeech.frontend.audio import AudioSegment
 
 
 class NoisePerturbAugmentor(AugmentorBase):
diff --git a/data_utils/augmentor/online_bayesian_normalization.py b/deepspeech/frontend/augmentor/online_bayesian_normalization.py
similarity index 97%
rename from data_utils/augmentor/online_bayesian_normalization.py
rename to deepspeech/frontend/augmentor/online_bayesian_normalization.py
index f5c7d99fd..14c260dfd 100644
--- a/data_utils/augmentor/online_bayesian_normalization.py
+++ b/deepspeech/frontend/augmentor/online_bayesian_normalization.py
@@ -13,7 +13,7 @@
 # limitations under the License.
 """Contain the online bayesian normalization augmentation model."""
 
-from data_utils.augmentor.base import AugmentorBase
+from deepspeech.frontend.augmentor.base import AugmentorBase
 
 
 class OnlineBayesianNormalizationAugmentor(AugmentorBase):
diff --git a/data_utils/augmentor/resample.py b/deepspeech/frontend/augmentor/resample.py
similarity index 95%
rename from data_utils/augmentor/resample.py
rename to deepspeech/frontend/augmentor/resample.py
index 3732e09cd..8ef574cbb 100644
--- a/data_utils/augmentor/resample.py
+++ b/deepspeech/frontend/augmentor/resample.py
@@ -13,7 +13,7 @@
 # limitations under the License.
 """Contain the resample augmentation model."""
 
-from data_utils.augmentor.base import AugmentorBase
+from deepspeech.frontend.augmentor.base import AugmentorBase
 
 
 class ResampleAugmentor(AugmentorBase):
diff --git a/data_utils/augmentor/shift_perturb.py b/deepspeech/frontend/augmentor/shift_perturb.py
similarity index 96%
rename from data_utils/augmentor/shift_perturb.py
rename to deepspeech/frontend/augmentor/shift_perturb.py
index 8b8e60362..2edbf594d 100644
--- a/data_utils/augmentor/shift_perturb.py
+++ b/deepspeech/frontend/augmentor/shift_perturb.py
@@ -13,7 +13,7 @@
 # limitations under the License.
 """Contains the volume perturb augmentation model."""
 
-from data_utils.augmentor.base import AugmentorBase
+from deepspeech.frontend.augmentor.base import AugmentorBase
 
 
 class ShiftPerturbAugmentor(AugmentorBase):
diff --git a/data_utils/augmentor/speed_perturb.py b/deepspeech/frontend/augmentor/speed_perturb.py
similarity index 97%
rename from data_utils/augmentor/speed_perturb.py
rename to deepspeech/frontend/augmentor/speed_perturb.py
index 7b28f7ec6..6518382db 100644
--- a/data_utils/augmentor/speed_perturb.py
+++ b/deepspeech/frontend/augmentor/speed_perturb.py
@@ -13,7 +13,7 @@
 # limitations under the License.
 """Contain the speech perturbation augmentation model."""
 
-from data_utils.augmentor.base import AugmentorBase
+from deepspeech.frontend.augmentor.base import AugmentorBase
 
 
 class SpeedPerturbAugmentor(AugmentorBase):
diff --git a/data_utils/augmentor/volume_perturb.py b/deepspeech/frontend/augmentor/volume_perturb.py
similarity index 96%
rename from data_utils/augmentor/volume_perturb.py
rename to deepspeech/frontend/augmentor/volume_perturb.py
index b98c7a3b4..dc64d0e9e 100644
--- a/data_utils/augmentor/volume_perturb.py
+++ b/deepspeech/frontend/augmentor/volume_perturb.py
@@ -13,7 +13,7 @@
 # limitations under the License.
 """Contains the volume perturb augmentation model."""
 
-from data_utils.augmentor.base import AugmentorBase
+from deepspeech.frontend.augmentor.base import AugmentorBase
 
 
 class VolumePerturbAugmentor(AugmentorBase):
diff --git a/training/__init__.py b/deepspeech/frontend/featurizer/__init__.py
similarity index 95%
rename from training/__init__.py
rename to deepspeech/frontend/featurizer/__init__.py
index 932432db1..185a92b8d 100644
--- a/training/__init__.py
+++ b/deepspeech/frontend/featurizer/__init__.py
@@ -11,5 +11,3 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
-from training.trainer import *
\ No newline at end of file
diff --git a/data_utils/featurizer/audio_featurizer.py b/deepspeech/frontend/featurizer/audio_featurizer.py
similarity index 98%
rename from data_utils/featurizer/audio_featurizer.py
rename to deepspeech/frontend/featurizer/audio_featurizer.py
index b410b0217..b5edb32d5 100644
--- a/data_utils/featurizer/audio_featurizer.py
+++ b/deepspeech/frontend/featurizer/audio_featurizer.py
@@ -14,8 +14,8 @@
 """Contains the audio featurizer class."""
 
 import numpy as np
-from data_utils.utility import read_manifest
-from data_utils.audio import AudioSegment
+from deepspeech.frontend.utility import read_manifest
+from deepspeech.frontend.audio import AudioSegment
 from python_speech_features import mfcc
 from python_speech_features import delta
 
diff --git a/data_utils/featurizer/speech_featurizer.py b/deepspeech/frontend/featurizer/speech_featurizer.py
similarity index 95%
rename from data_utils/featurizer/speech_featurizer.py
rename to deepspeech/frontend/featurizer/speech_featurizer.py
index 1bbf2bf58..d4de96adc 100644
--- a/data_utils/featurizer/speech_featurizer.py
+++ b/deepspeech/frontend/featurizer/speech_featurizer.py
@@ -13,8 +13,8 @@
 # limitations under the License.
 """Contains the speech featurizer class."""
 
-from data_utils.featurizer.audio_featurizer import AudioFeaturizer
-from data_utils.featurizer.text_featurizer import TextFeaturizer
+from deepspeech.frontend.featurizer.audio_featurizer import AudioFeaturizer
+from deepspeech.frontend.featurizer.text_featurizer import TextFeaturizer
 
 
 class SpeechFeaturizer(object):
diff --git a/data_utils/featurizer/text_featurizer.py b/deepspeech/frontend/featurizer/text_featurizer.py
similarity index 100%
rename from data_utils/featurizer/text_featurizer.py
rename to deepspeech/frontend/featurizer/text_featurizer.py
diff --git a/data_utils/normalizer.py b/deepspeech/frontend/normalizer.py
similarity index 97%
rename from data_utils/normalizer.py
rename to deepspeech/frontend/normalizer.py
index 83a008f10..8e50566c6 100644
--- a/data_utils/normalizer.py
+++ b/deepspeech/frontend/normalizer.py
@@ -15,8 +15,8 @@
 
 import numpy as np
 import random
-from data_utils.utility import read_manifest
-from data_utils.audio import AudioSegment
+from deepspeech.frontend.utility import read_manifest
+from deepspeech.frontend.audio import AudioSegment
 
 
 class FeatureNormalizer(object):
diff --git a/data_utils/speech.py b/deepspeech/frontend/speech.py
similarity index 91%
rename from data_utils/speech.py
rename to deepspeech/frontend/speech.py
index 01c1787a4..2883405bb 100644
--- a/data_utils/speech.py
+++ b/deepspeech/frontend/speech.py
@@ -14,28 +14,33 @@
 """Contains the speech segment class."""
 
 import numpy as np
-from data_utils.audio import AudioSegment
+from deepspeech.frontend.audio import AudioSegment
 
 
 class SpeechSegment(AudioSegment):
-    """Speech segment abstraction, a subclass of AudioSegment,
-    with an additional transcript.
-
-    :param samples: Audio samples [num_samples x num_channels].
-    :type samples: ndarray.float32
-    :param sample_rate: Audio sample rate.
-    :type sample_rate: int
-    :param transcript: Transcript text for the speech.
-    :type transript: str
-    :raises TypeError: If the sample data type is not float or int.
+    """Speech Segment with Text
+
+    Args:
+        AudioSegment (AudioSegment): Audio Segment
     """
 
     def __init__(self, samples, sample_rate, transcript):
+        """Speech segment abstraction, a subclass of AudioSegment,
+            with an additional transcript.
+
+        Args:
+            samples (ndarray.float32): Audio samples [num_samples x num_channels].
+            sample_rate (int): Audio sample rate.
+            transcript (str): Transcript text for the speech.
+        """
         AudioSegment.__init__(self, samples, sample_rate)
         self._transcript = transcript
 
     def __eq__(self, other):
         """Return whether two objects are equal.
+
+        Returns:
+            bool: True, when equal to other
         """
         if not AudioSegment.__eq__(self, other):
             return False
diff --git a/data_utils/utility.py b/deepspeech/frontend/utility.py
similarity index 53%
rename from data_utils/utility.py
rename to deepspeech/frontend/utility.py
index 6cc1b2713..3694e106a 100644
--- a/data_utils/utility.py
+++ b/deepspeech/frontend/utility.py
@@ -20,6 +20,7 @@ import tarfile
 import time
 from threading import Thread
 from multiprocessing import Process, Manager, Value
+
 from paddle.dataset.common import md5file
 
 
@@ -49,51 +50,3 @@ def read_manifest(manifest_path, max_duration=float('inf'), min_duration=0.0):
                 json_data["duration"] >= min_duration):
             manifest.append(json_data)
     return manifest
-
-
-def getfile_insensitive(path):
-    """Get the actual file path when given insensitive filename."""
-    directory, filename = os.path.split(path)
-    directory, filename = (directory or '.'), filename.lower()
-    for f in os.listdir(directory):
-        newpath = os.path.join(directory, f)
-        if os.path.isfile(newpath) and f.lower() == filename:
-            return newpath
-
-
-def download_multi(url, target_dir, extra_args):
-    """Download multiple files from url to target_dir."""
-    if not os.path.exists(target_dir): os.makedirs(target_dir)
-    print("Downloading %s ..." % url)
-    ret_code = os.system("wget -c " + url + ' ' + extra_args + " -P " +
-                         target_dir)
-    return ret_code
-
-
-def download(url, md5sum, target_dir):
-    """Download file from url to target_dir, and check md5sum."""
-    if not os.path.exists(target_dir): os.makedirs(target_dir)
-    filepath = os.path.join(target_dir, url.split("/")[-1])
-    if not (os.path.exists(filepath) and md5file(filepath) == md5sum):
-        print("Downloading %s ..." % url)
-        os.system("wget -c " + url + " -P " + target_dir)
-        print("\nMD5 Chesksum %s ..." % filepath)
-        if not md5file(filepath) == md5sum:
-            raise RuntimeError("MD5 checksum failed.")
-    else:
-        print("File exists, skip downloading. (%s)" % filepath)
-    return filepath
-
-
-def unpack(filepath, target_dir, rm_tar=False):
-    """Unpack the file to the target_dir."""
-    print("Unpacking %s ..." % filepath)
-    tar = tarfile.open(filepath)
-    tar.extractall(target_dir)
-    tar.close()
-    if rm_tar == True:
-        os.remove(filepath)
-
-
-class XmapEndSignal():
-    pass
diff --git a/deepspeech/models/__init__.py b/deepspeech/models/__init__.py
new file mode 100644
index 000000000..185a92b8d
--- /dev/null
+++ b/deepspeech/models/__init__.py
@@ -0,0 +1,13 @@
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
diff --git a/model_utils/network.py b/deepspeech/models/network.py
similarity index 96%
rename from model_utils/network.py
rename to deepspeech/models/network.py
index 1e7545ee6..a3ea771dc 100644
--- a/model_utils/network.py
+++ b/deepspeech/models/network.py
@@ -22,11 +22,10 @@ from paddle import nn
 from paddle.nn import functional as F
 from paddle.nn import initializer as I
 
-from utils import checkpoint
-
-from decoders.swig_wrapper import Scorer
-from decoders.swig_wrapper import ctc_greedy_decoder
-from decoders.swig_wrapper import ctc_beam_search_decoder_batch
+from deepspeech.utils import checkpoint
+from deepspeech.decoders.swig_wrapper import Scorer
+from deepspeech.decoders.swig_wrapper import ctc_greedy_decoder
+from deepspeech.decoders.swig_wrapper import ctc_beam_search_decoder_batch
 
 logger = logging.getLogger(__name__)
 
@@ -661,16 +660,19 @@ class DeepSpeech2(nn.Layer):
             self._init_ext_scorer(beam_alpha, beam_beta, lang_model_path,
                                   vocab_list)
 
-    def decode_probs(self, probs, vocab_list, decoding_method, lang_model_path,
-                     beam_alpha, beam_beta, beam_size, cutoff_prob,
-                     cutoff_top_n, num_processes):
-        """ probs: activation after softmax """
+    def decode_probs(self, probs, logits_lens, vocab_list, decoding_method,
+                     lang_model_path, beam_alpha, beam_beta, beam_size,
+                     cutoff_prob, cutoff_top_n, num_processes):
+        """ probs: activation after softmax 
+        logits_len: audio output lens
+        """
+        probs_split = [probs[i, :l, :] for i, l in enumerate(logits_lens)]
         if decoding_method == "ctc_greedy":
             result_transcripts = self._decode_batch_greedy(
-                probs_split=probs, vocab_list=vocab_list)
+                probs_split=probs_split, vocab_list=vocab_list)
         elif decoding_method == "ctc_beam_search":
             result_transcripts = self._decode_batch_beam_search(
-                probs_split=probs,
+                probs_split=probs_split,
                 beam_alpha=beam_alpha,
                 beam_beta=beam_beta,
                 beam_size=beam_size,
@@ -686,12 +688,11 @@ class DeepSpeech2(nn.Layer):
     def decode(self, audio, audio_len, vocab_list, decoding_method,
                lang_model_path, beam_alpha, beam_beta, beam_size, cutoff_prob,
                cutoff_top_n, num_processes):
-        _, probs, audio_lens = self.predict(audio, audio_len)
-        probs_split = [probs[i, :l, :] for i, l in enumerate(audio_lens)]
-        return self.decode_probs(probs_split, vocab_list, decoding_method,
-                                 lang_model_path, beam_alpha, beam_beta,
-                                 beam_size, cutoff_prob, cutoff_top_n,
-                                 num_processes)
+        _, probs, logits_lens = self.predict(audio, audio_len)
+        return self.decode_probs(probs.numpy(), logits_lens, vocab_list,
+                                 decoding_method, lang_model_path, beam_alpha,
+                                 beam_beta, beam_size, cutoff_prob,
+                                 cutoff_top_n, num_processes)
 
     def from_pretrained(self, checkpoint_path):
         """Build a model from a pretrained model.
diff --git a/deepspeech/modules/__init__.py b/deepspeech/modules/__init__.py
new file mode 100644
index 000000000..185a92b8d
--- /dev/null
+++ b/deepspeech/modules/__init__.py
@@ -0,0 +1,13 @@
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
diff --git a/deepspeech/training/__init__.py b/deepspeech/training/__init__.py
new file mode 100644
index 000000000..1071a3dd7
--- /dev/null
+++ b/deepspeech/training/__init__.py
@@ -0,0 +1,15 @@
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from deepspeech.training.trainer import *
diff --git a/training/cli.py b/deepspeech/training/cli.py
similarity index 88%
rename from training/cli.py
rename to deepspeech/training/cli.py
index e0ebfc7de..1076fe0c7 100644
--- a/training/cli.py
+++ b/deepspeech/training/cli.py
@@ -59,7 +59,8 @@ def default_argument_parser():
     parser.add_argument("--nprocs", type=int, default=1, help="number of parallel processes to use.")
 
     # overwrite extra config and default config
-    parser.add_argument("--opts", nargs=argparse.REMAINDER, help="options to overwrite --config file and the default config, passing in KEY VALUE pairs")
+    #parser.add_argument("--opts", nargs=argparse.REMAINDER, help="options to overwrite --config file and the default config, passing in KEY VALUE pairs")
+    parser.add_argument("--opts", type=str, default=[], nargs='+', help="options to overwrite --config file and the default config, passing in KEY VALUE pairs")
     # yapd: enable
 
     return parser
diff --git a/training/trainer.py b/deepspeech/training/trainer.py
similarity index 93%
rename from training/trainer.py
rename to deepspeech/training/trainer.py
index 3fac31d70..f93bc5754 100644
--- a/training/trainer.py
+++ b/deepspeech/training/trainer.py
@@ -24,8 +24,8 @@ from paddle import distributed as dist
 from paddle.distributed.utils import get_gpus
 from tensorboardX import SummaryWriter
 
-from utils import checkpoint
-from utils import mp_tools
+from deepspeech.utils import checkpoint
+from deepspeech.utils import mp_tools
 
 __all__ = ["Trainer"]
 
@@ -148,20 +148,6 @@ class Trainer():
             checkpoint_path=self.args.checkpoint_path)
         self.iteration = iteration
 
-    def read_batch(self):
-        """Read a batch from the train_loader.
-        Returns
-        -------
-        List[Tensor]
-            A batch.
-        """
-        try:
-            batch = next(self.iterator)
-        except StopIteration:
-            self.new_epoch()
-            batch = next(self.iterator)
-        return batch
-
     def new_epoch(self):
         """Reset the train loader and increment ``epoch``.
         """
@@ -169,7 +155,6 @@ class Trainer():
             # batch sampler epoch start from 0
             self.train_loader.batch_sampler.set_epoch(self.epoch)
         self.epoch += 1
-        self.iterator = iter(self.train_loader)
 
     def train(self):
         """The training process.
@@ -177,16 +162,22 @@ class Trainer():
         It includes forward/backward/update and periodical validation and 
         saving.
         """
+        self.logger.info(
+            f"Train Total Examples: {len(self.train_loader.dataset)}")
         self.new_epoch()
-        while self.iteration < self.config.training.max_iteration:
-            self.iteration += 1
-            self.train_batch()
-
-            if self.iteration % self.config.training.valid_interval == 0:
-                self.valid()
-
-            if self.iteration % self.config.training.save_interval == 0:
-                self.save()
+        while self.epoch <= self.config.training.n_epoch:
+            try:
+                for batch in self.train_loader:
+                    self.iteration += 1
+                    self.train_batch(batch)
+            except Exception as e:
+                self.logger.error(e)
+                pass
+
+            self.valid()
+            self.save()
+            self.lr_scheduler.step()
+            self.new_epoch()
 
     def run(self):
         """The routine of the experiment after setup. This method is intended
diff --git a/deepspeech/utils/__init__.py b/deepspeech/utils/__init__.py
new file mode 100644
index 000000000..185a92b8d
--- /dev/null
+++ b/deepspeech/utils/__init__.py
@@ -0,0 +1,13 @@
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
diff --git a/utils/checkpoint.py b/deepspeech/utils/checkpoint.py
similarity index 98%
rename from utils/checkpoint.py
rename to deepspeech/utils/checkpoint.py
index e0c6938c9..f2066fdec 100644
--- a/utils/checkpoint.py
+++ b/deepspeech/utils/checkpoint.py
@@ -16,15 +16,15 @@ import os
 import time
 import logging
 import numpy as np
+
 import paddle
 from paddle import distributed as dist
 from paddle.nn import Layer
 from paddle.optimizer import Optimizer
 
-from utils import mp_tools
+from deepspeech.utils import mp_tools
 
 logger = logging.getLogger(__name__)
-logger.setLevel("INFO")
 
 __all__ = ["load_parameters", "save_parameters"]
 
diff --git a/utils/error_rate.py b/deepspeech/utils/error_rate.py
similarity index 100%
rename from utils/error_rate.py
rename to deepspeech/utils/error_rate.py
diff --git a/utils/mp_tools.py b/deepspeech/utils/mp_tools.py
similarity index 100%
rename from utils/mp_tools.py
rename to deepspeech/utils/mp_tools.py
diff --git a/deepspeech/utils/utility.py b/deepspeech/utils/utility.py
new file mode 100644
index 000000000..cd7166593
--- /dev/null
+++ b/deepspeech/utils/utility.py
@@ -0,0 +1,57 @@
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Contains common utility functions."""
+
+import distutils.util
+
+
+def print_arguments(args):
+    """Print argparse's arguments.
+
+    Usage:
+
+    .. code-block:: python
+
+        parser = argparse.ArgumentParser()
+        parser.add_argument("name", default="Jonh", type=str, help="User name.")
+        args = parser.parse_args()
+        print_arguments(args)
+
+    :param args: Input argparse.Namespace for printing.
+    :type args: argparse.Namespace
+    """
+    print("-----------  Configuration Arguments -----------")
+    for arg, value in sorted(vars(args).items()):
+        print("%s: %s" % (arg, value))
+    print("------------------------------------------------")
+
+
+def add_arguments(argname, type, default, help, argparser, **kwargs):
+    """Add argparse's argument.
+
+    Usage:
+
+    .. code-block:: python
+
+        parser = argparse.ArgumentParser()
+        add_argument("name", str, "Jonh", "User name.", parser)
+        args = parser.parse_args()
+    """
+    type = distutils.util.strtobool if type == bool else type
+    argparser.add_argument(
+        "--" + argname,
+        default=default,
+        type=type,
+        help=help + ' Default: %(default)s.',
+        **kwargs)
diff --git a/deploy/demo_server.py b/deploy/demo_server.py
index bfc48c9f1..299b58091 100644
--- a/deploy/demo_server.py
+++ b/deploy/demo_server.py
@@ -23,11 +23,12 @@ import struct
 import wave
 import paddle.fluid as fluid
 import numpy as np
-import _init_paths
-from data_utils.data import DataGenerator
-from model_utils.model import DeepSpeech2Model
-from data_utils.utility import read_manifest
-from utils.utility import add_arguments, print_arguments
+
+from deepspeech.frontend.utility import read_manifest
+from deepspeech.utils.utility import add_arguments, print_arguments
+
+from deepspeech.exps.deepspeech2.model import DeepSpeech2Model
+from deepspeech.exps.deepspeech2.dataset import DataGenerator
 
 parser = argparse.ArgumentParser(description=__doc__)
 add_arg = functools.partial(add_arguments, argparser=parser)
diff --git a/examples/aishell/.gitignore b/examples/aishell/.gitignore
new file mode 100644
index 000000000..44038ca5b
--- /dev/null
+++ b/examples/aishell/.gitignore
@@ -0,0 +1,2 @@
+data
+ckpt*
diff --git a/examples/aishell/conf/deepspeech2.yaml b/examples/aishell/conf/deepspeech2.yaml
index e2e08e1a9..821c183e5 100644
--- a/examples/aishell/conf/deepspeech2.yaml
+++ b/examples/aishell/conf/deepspeech2.yaml
@@ -34,18 +34,14 @@ training:
   lr_decay: 0.83
   weight_decay: 1e-06
   global_grad_clip: 5.0
-  max_iteration: 500000
-  plot_interval: 1000
-  save_interval: 1000
-  valid_interval: 1000
 decoding:
-  batch_size: 10
+  batch_size: 128
   error_rate_type: cer 
   decoding_method: ctc_beam_search
-  lang_model_path: models/lm/zh_giga.no_cna_cmn.prune01244.klm
+  lang_model_path: data/lm/zh_giga.no_cna_cmn.prune01244.klm
   alpha: 2.6
   beta: 5.0
   beam_size: 300
-  cutoff_prob: 1.0 
+  cutoff_prob: 0.99
   cutoff_top_n: 40
   num_proc_bsearch: 10
diff --git a/examples/aishell/local/data.sh b/examples/aishell/local/data.sh
index b874b2df8..6eeb3d8fc 100644
--- a/examples/aishell/local/data.sh
+++ b/examples/aishell/local/data.sh
@@ -2,10 +2,13 @@
 
 mkdir -p data
 
+TARGET_DIR=${MAIN_ROOT}/examples/dataset
+mkdir -p ${TARGET_DIR}
+
 # download data, generate manifests
-PYTHONPATH=.:$PYTHONPATH python3 local/aishell.py \
+PYTHONPATH=.:$PYTHONPATH python3 ${TARGET_DIR}/aishell/aishell.py \
 --manifest_prefix="data/manifest" \
---target_dir="${MAIN_ROOT}/dataset/aishell"
+--target_dir="${TARGET_DIR}/aishell"
 
 if [ $? -ne 0 ]; then
     echo "Prepare Aishell failed. Terminated."
@@ -14,7 +17,7 @@ fi
 
 
 # build vocabulary
-python3 ${MAIN_ROOT}/tools/build_vocab.py \
+python3 ${MAIN_ROOT}/utils/build_vocab.py \
 --count_threshold=0 \
 --vocab_path="data/vocab.txt" \
 --manifest_paths "data/manifest.train" "data/manifest.dev"
@@ -26,7 +29,7 @@ fi
 
 
 # compute mean and stddev for normalizer
-python3 ${MAIN_ROOT}/tools/compute_mean_std.py \
+python3 ${MAIN_ROOT}/utils/compute_mean_std.py \
 --manifest_path="data/manifest.train" \
 --num_samples=2000 \
 --specgram_type="linear" \
diff --git a/models/lm/download_lm_ch.sh b/examples/aishell/local/download_lm_ch.sh
similarity index 73%
rename from models/lm/download_lm_ch.sh
rename to examples/aishell/local/download_lm_ch.sh
index 0e4915262..f9e2261fd 100644
--- a/models/lm/download_lm_ch.sh
+++ b/examples/aishell/local/download_lm_ch.sh
@@ -1,10 +1,13 @@
 #! /usr/bin/env bash
 
-. ../../utils/utility.sh
+. ${MAIN_ROOT}/utils/utility.sh
+
+DIR=data/lm
+mkdir -p ${DIR}
 
 URL='https://deepspeech.bj.bcebos.com/zh_lm/zh_giga.no_cna_cmn.prune01244.klm'
 MD5="29e02312deb2e59b3c8686c7966d4fe3"
-TARGET=./zh_giga.no_cna_cmn.prune01244.klm
+TARGET=${DIR}/zh_giga.no_cna_cmn.prune01244.klm
 
 
 echo "Download language model ..."
diff --git a/models/aishell/download_model.sh b/examples/aishell/local/download_model.sh
similarity index 68%
rename from models/aishell/download_model.sh
rename to examples/aishell/local/download_model.sh
index 76ac4d005..2f9f40fb3 100644
--- a/models/aishell/download_model.sh
+++ b/examples/aishell/local/download_model.sh
@@ -1,10 +1,13 @@
 #! /usr/bin/env bash
 
-. ../../utils/utility.sh
+. ${MAIN_ROOT}/utils/utility.sh
+
+DIR=data/pretrain
+mkdir -p ${DIR} 
 
 URL='https://deepspeech.bj.bcebos.com/mandarin_models/aishell_model_fluid.tar.gz'
 MD5=2bf0cc8b6d5da2a2a787b5cc36a496b5
-TARGET=./aishell_model_fluid.tar.gz
+TARGET=${DIR}/aishell_model_fluid.tar.gz
 
 
 echo "Download Aishell model ..."
@@ -13,7 +16,7 @@ if [ $? -ne 0 ]; then
     echo "Fail to download Aishell model!"
     exit 1
 fi
-tar -zxvf $TARGET
+tar -zxvf $TARGET -C ${DIR}
 
 
 exit 0
diff --git a/examples/aishell/local/infer.sh b/examples/aishell/local/infer.sh
index bc413be11..4b4c9381b 100644
--- a/examples/aishell/local/infer.sh
+++ b/examples/aishell/local/infer.sh
@@ -2,14 +2,12 @@
 
 
 # download language model
-cd ${MAIN_ROOT}/models/lm > /dev/null
-bash download_lm_ch.sh
+bash local/download_lm_ch.sh
 if [ $? -ne 0 ]; then
     exit 1
 fi
-cd - > /dev/null
 
-python3 -u ${MAIN_ROOT}/infer.py \
+python3 -u ${BIN_DIR}/infer.py \
 --device 'gpu' \
 --nproc 1 \
 --config conf/deepspeech2.yaml \
diff --git a/examples/aishell/local/infer_golden.sh b/examples/aishell/local/infer_golden.sh
index 296c0d5b4..1727bcbad 100644
--- a/examples/aishell/local/infer_golden.sh
+++ b/examples/aishell/local/infer_golden.sh
@@ -1,22 +1,16 @@
 #! /usr/bin/env bash
 
 # download language model
-cd ${MAIN_ROOT}/models/lm > /dev/null
-bash download_lm_ch.sh
+bash local/download_lm_ch.sh
 if [ $? -ne 0 ]; then
     exit 1
 fi
-cd - > /dev/null
-
 
 # download well-trained model
-cd ${MAIN_ROOT}/models/aishell > /dev/null
-bash download_model.sh
+bash local/download_model.sh
 if [ $? -ne 0 ]; then
     exit 1
 fi
-cd - > /dev/null
-
 
 # infer
 CUDA_VISIBLE_DEVICES=0 \
@@ -35,10 +29,10 @@ python3 -u ${MAIN_ROOT}/infer.py \
 --use_gpu=False \
 --share_rnn_weights=False \
 --infer_manifest="data/manifest.test" \
---mean_std_path="${MAIN_ROOT}/models/aishell/mean_std.npz" \
---vocab_path="${MAIN_ROOT}/models/aishell/vocab.txt" \
---model_path="${MAIN_ROOT}/models/aishell" \
---lang_model_path="${MAIN_ROOT}/models/lm/zh_giga.no_cna_cmn.prune01244.klm" \
+--mean_std_path="data/pretrain/mean_std.npz" \
+--vocab_path="data/pretrain/vocab.txt" \
+--model_path="data/pretrain" \
+--lang_model_path="data/lm/zh_giga.no_cna_cmn.prune01244.klm" \
 --decoding_method="ctc_beam_search" \
 --error_rate_type="cer" \
 --specgram_type="linear"
diff --git a/examples/aishell/local/test.sh b/examples/aishell/local/test.sh
index 6e6544bdb..74015f5d5 100644
--- a/examples/aishell/local/test.sh
+++ b/examples/aishell/local/test.sh
@@ -1,19 +1,16 @@
 #! /usr/bin/env bash
 
 # download language model
-cd ${MAIN_ROOT}/models/lm > /dev/null
-bash download_lm_ch.sh
+bash local/download_lm_ch.sh
 if [ $? -ne 0 ]; then
     exit 1
 fi
-cd - > /dev/null
 
-
-python3 -u ${MAIN_ROOT}/test.py \
+python3 -u ${BIN_DIR}/test.py \
 --device 'gpu' \
 --nproc 1 \
 --config conf/deepspeech2.yaml \
---output ckpt
+--checkpoint_path ${1} 
 
 if [ $? -ne 0 ]; then
     echo "Failed in evaluation!"
diff --git a/examples/aishell/local/test_golden.sh b/examples/aishell/local/test_golden.sh
index 062a1b99b..86abd38cb 100644
--- a/examples/aishell/local/test_golden.sh
+++ b/examples/aishell/local/test_golden.sh
@@ -1,47 +1,26 @@
 #! /usr/bin/env bash
 
 # download language model
-cd ${MAIN_ROOT}/models/lm > /dev/null
-bash download_lm_ch.sh
+bash local/download_lm_ch.sh
 if [ $? -ne 0 ]; then
     exit 1
 fi
-cd - > /dev/null
-
 
 # download well-trained model
-cd ${MAIN_ROOT}/models/aishell > /dev/null
-bash download_model.sh
+bash local/download_model.sh
 if [ $? -ne 0 ]; then
     exit 1
 fi
-cd - > /dev/null
-
 
 # evaluate model
-CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 \
-python3 -u ${MAIN_ROOT}/test.py \
---batch_size=128 \
---beam_size=300 \
---num_proc_bsearch=8 \
---num_conv_layers=2 \
---num_rnn_layers=3 \
---rnn_layer_size=1024 \
---alpha=2.6 \
---beta=5.0 \
---cutoff_prob=0.99 \
---cutoff_top_n=40 \
---use_gru=True \
---use_gpu=True \
---share_rnn_weights=False \
---test_manifest="data/manifest.test" \
---mean_std_path="${MAIN_ROOT}/models/aishell/mean_std.npz" \
---vocab_path="${MAIN_ROOT}/models/aishell/vocab.txt" \
---model_path="${MAIN_ROOT}/models/aishell" \
---lang_model_path="${MAIN_ROOT}/models/lm/zh_giga.no_cna_cmn.prune01244.klm" \
---decoding_method="ctc_beam_search" \
---error_rate_type="cer" \
---specgram_type="linear"
+CUDA_VISIBLE_DEVICES=0 \
+python3 -u ${BIN_DIR}/test.py \
+--device 'gpu' \
+--nproc 1 \
+--config conf/deepspeech2.yaml \
+--checkpoint_path data/pretrain/params.pdparams  \
+--opts data.mean_std_filepath data/pretrain/mean_std.npz  \
+--opts data.vocab_filepath data/pretrain/vocab.txt
 
 if [ $? -ne 0 ]; then
     echo "Failed in evaluation!"
diff --git a/examples/aishell/local/train.sh b/examples/aishell/local/train.sh
index ce30c4a11..3e13a79e3 100644
--- a/examples/aishell/local/train.sh
+++ b/examples/aishell/local/train.sh
@@ -4,11 +4,14 @@
 # if you wish to resume from an exists model, uncomment --init_from_pretrained_model
 export FLAGS_sync_nccl_allreduce=0
 
-python3 -u ${MAIN_ROOT}/train.py \
+ngpu=$(echo ${CUDA_VISIBLE_DEVICES} | python -c 'import sys; a = sys.stdin.read(); print(len(a.split(",")));')
+echo "using $ngpu gpus..."
+
+python3 -u ${BIN_DIR}/train.py \
 --device 'gpu' \
---nproc 4 \
+--nproc ${ngpu} \
 --config conf/deepspeech2.yaml \
---output ckpt-${1}
+--output ckpt
 
 
 if [ $? -ne 0 ]; then
diff --git a/examples/aishell/local/tune.sh b/examples/aishell/local/tune.sh
index a11137706..9ff5e8b99 100644
--- a/examples/aishell/local/tune.sh
+++ b/examples/aishell/local/tune.sh
@@ -1,7 +1,7 @@
 #! /usr/bin/env bash
 
 # grid-search for hyper-parameters in language model
-python3 -u ${MAIN_ROOT}/tune.py \
+python3 -u ${BIN_DIR}/tune.py \
 --device 'gpu' \
 --nproc 1 \
 --config conf/deepspeech2.yaml \
diff --git a/examples/aishell/models b/examples/aishell/models
deleted file mode 120000
index 9e68e9945..000000000
--- a/examples/aishell/models
+++ /dev/null
@@ -1 +0,0 @@
-../../models
\ No newline at end of file
diff --git a/examples/aishell/path.sh b/examples/aishell/path.sh
index a55139e11..debdbba46 100644
--- a/examples/aishell/path.sh
+++ b/examples/aishell/path.sh
@@ -8,3 +8,6 @@ export PYTHONIOENCODING=UTF-8
 export PYTHONPATH=${MAIN_ROOT}:${PYTHONPATH}
 
 export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:/usr/local/lib/
+
+MODEL=deepspeech2
+export BIN_DIR=${MAIN_ROOT}/deepspeech/exps/${MODEL}/bin
diff --git a/examples/aishell/run.sh b/examples/aishell/run.sh
index 6cf8af2ba..dc762df99 100644
--- a/examples/aishell/run.sh
+++ b/examples/aishell/run.sh
@@ -1,21 +1,16 @@
 #!/bin/bash
 
 source path.sh
+# only demos
 
 # prepare data
 bash ./local/data.sh
 
-# test pretrain model
-bash ./local/test_golden.sh
-
-# test pretain model
-bash ./local/infer_golden.sh
-
 # train model
-bash ./local/train.sh
+CUDA_VISIBLE_DEVICES=0,1,2,3 bash ./local/train.sh
 
 # test model
-bash ./local/test.sh
+CUDA_VISIBLE_DEVICES=0 bash ./local/test.sh ckpt/checkpoints/step-3284
 
 # infer model
-bash ./local/infer.sh
+CUDA_VISIBLE_DEVICES=0 bash ./local/infer.sh ckpt/checkpoints/step-3284
diff --git a/examples/conf/augmentation.config b/examples/aug_conf/augmentation.config
similarity index 100%
rename from examples/conf/augmentation.config
rename to examples/aug_conf/augmentation.config
diff --git a/examples/conf/augmentation.config.example b/examples/aug_conf/augmentation.config.example
similarity index 100%
rename from examples/conf/augmentation.config.example
rename to examples/aug_conf/augmentation.config.example
diff --git a/models/lm/download_lm_en.sh b/examples/baidu_en8k/download_lm_en.sh
similarity index 73%
rename from models/lm/download_lm_en.sh
rename to examples/baidu_en8k/download_lm_en.sh
index cc8d32035..05ea793fb 100644
--- a/models/lm/download_lm_en.sh
+++ b/examples/baidu_en8k/download_lm_en.sh
@@ -1,11 +1,13 @@
 #! /usr/bin/env bash
 
-. ../../utils/utility.sh
+. ${MAIN_ROOT}/utils/utility.sh
+
+DIR=data/lm
+mkdir -p ${DIR}
 
 URL=https://deepspeech.bj.bcebos.com/en_lm/common_crawl_00.prune01111.trie.klm
 MD5="099a601759d467cd0a8523ff939819c5"
-TARGET=./common_crawl_00.prune01111.trie.klm
-
+TARGET=${DIR}/common_crawl_00.prune01111.trie.klm
 
 echo "Download language model ..."
 download $URL $MD5 $TARGET
diff --git a/models/baidu_en8k/download_model.sh b/examples/baidu_en8k/download_model.sh
similarity index 73%
rename from models/baidu_en8k/download_model.sh
rename to examples/baidu_en8k/download_model.sh
index bbdb32b61..3fc36b514 100644
--- a/models/baidu_en8k/download_model.sh
+++ b/examples/baidu_en8k/download_model.sh
@@ -1,10 +1,13 @@
 #! /usr/bin/env bash
 
-. ../../utils/utility.sh
+. ${MAIN_ROOT}/utils/utility.sh
+
+DIR=data/pretrain
+mkdir -p ${DIR}
 
 URL='https://deepspeech.bj.bcebos.com/demo_models/baidu_en8k_model_fluid.tar.gz'
 MD5=7e58fbf64aa4ecf639b049792ddcf788
-TARGET=./baidu_en8k_model_fluid.tar.gz
+TARGET=${DIR}/baidu_en8k_model_fluid.tar.gz
 
 
 echo "Download BaiduEn8k model ..."
diff --git a/examples/baidu_en8k/path.sh b/examples/baidu_en8k/path.sh
index fd1cebba8..1b150ca40 100644
--- a/examples/baidu_en8k/path.sh
+++ b/examples/baidu_en8k/path.sh
@@ -6,3 +6,8 @@ export LC_ALL=C
 # Use UTF-8 in Python to avoid UnicodeDecodeError when LC_ALL=C
 export PYTHONIOENCODING=UTF-8 
 export PYTHONPATH=${MAIN_ROOT}:${PYTHONPATH}
+
+export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:/usr/local/lib/
+
+MODEL=deepspeech2
+export BIN_DIR=${MAIN_ROOT}/deepspeech/exps/${MODEL}/bin
\ No newline at end of file
diff --git a/examples/baidu_en8k/run_infer_golden.sh b/examples/baidu_en8k/run_infer_golden.sh
index 11d7541ee..32916f21f 100644
--- a/examples/baidu_en8k/run_infer_golden.sh
+++ b/examples/baidu_en8k/run_infer_golden.sh
@@ -3,22 +3,17 @@
 source path.sh
 
 # download language model
-cd ${MAIN_ROOT}/models/lm > /dev/null
 bash download_lm_en.sh
 if [ $? -ne 0 ]; then
     exit 1
 fi
-cd - > /dev/null
 
 
 # download well-trained model
-cd ${MAIN_ROOT}/models/baidu_en8k > /dev/null
 bash download_model.sh
 if [ $? -ne 0 ]; then
     exit 1
 fi
-cd - > /dev/null
-
 
 # infer
 CUDA_VISIBLE_DEVICES=0 \
@@ -37,10 +32,10 @@ python3 -u ${MAIN_ROOT}/infer.py \
 --use_gpu=False \
 --share_rnn_weights=False \
 --infer_manifest="${MAIN_ROOT}/examples/librispeech/data/manifest.test-clean" \
---mean_std_path="${MAIN_ROOT}/models/baidu_en8k/mean_std.npz" \
---vocab_path="${MAIN_ROOT}/models/baidu_en8k/vocab.txt" \
---model_path="${MAIN_ROOT}/models/baidu_en8k" \
---lang_model_path="${MAIN_ROOT}/models/lm/common_crawl_00.prune01111.trie.klm" \
+--mean_std_path="data/pretrain/baidu_en8k/mean_std.npz" \
+--vocab_path="data/pretrain/baidu_en8k/vocab.txt" \
+--model_path="data/pretrain/baidu_en8k" \
+--lang_model_path="data/lm/common_crawl_00.prune01111.trie.klm" \
 --decoding_method="ctc_beam_search" \
 --error_rate_type="wer" \
 --specgram_type="linear"
diff --git a/examples/baidu_en8k/run_test_golden.sh b/examples/baidu_en8k/run_test_golden.sh
index 10c61a096..eb51d8e33 100644
--- a/examples/baidu_en8k/run_test_golden.sh
+++ b/examples/baidu_en8k/run_test_golden.sh
@@ -3,21 +3,17 @@
 source path.sh
 
 # download language model
-cd ${MAIN_ROOT}/models/lm > /dev/null
 bash download_lm_en.sh
 if [ $? -ne 0 ]; then
     exit 1
 fi
-cd - > /dev/null
 
 
 # download well-trained model
-cd ${MAIN_ROOT}/models/baidu_en8k > /dev/null
 bash download_model.sh
 if [ $? -ne 0 ]; then
     exit 1
 fi
-cd - > /dev/null
 
 
 # evaluate model
@@ -37,11 +33,11 @@ python3 -u ${MAIN_ROOT}/test.py \
 --use_gru=True \
 --use_gpu=False \
 --share_rnn_weights=False \
---test_manifest="data/manifest.test-clean" \
---mean_std_path="${MAIN_ROOT}/models/baidu_en8k/mean_std.npz" \
---vocab_path="${MAIN_ROOT}/models/baidu_en8k/vocab.txt" \
---model_path="${MAIN_ROOT}/models/baidu_en8k" \
---lang_model_path="${MAIN_ROOT}/models/lm/common_crawl_00.prune01111.trie.klm" \
+--test_manifest="${MAIN_ROOT}/examples/librispeech/data/manifest.test-clean" \
+--mean_std_path="data/pretrain/baidu_en8k/mean_std.npz" \
+--vocab_path="data/pretrain/baidu_en8k/vocab.txt" \
+--model_path="data/pretrain/baidu_en8k" \
+--lang_model_path="data/lm/common_crawl_00.prune01111.trie.klm" \
 --decoding_method="ctc_beam_search" \
 --error_rate_type="wer" \
 --specgram_type="linear"
diff --git a/examples/dataset/aishell/.gitignore b/examples/dataset/aishell/.gitignore
new file mode 100644
index 000000000..9c6e517e5
--- /dev/null
+++ b/examples/dataset/aishell/.gitignore
@@ -0,0 +1 @@
+data_aishell*
diff --git a/examples/aishell/local/aishell.py b/examples/dataset/aishell/aishell.py
similarity index 98%
rename from examples/aishell/local/aishell.py
rename to examples/dataset/aishell/aishell.py
index ba59b744d..38d0c28a3 100644
--- a/examples/aishell/local/aishell.py
+++ b/examples/dataset/aishell/aishell.py
@@ -24,7 +24,7 @@ import codecs
 import soundfile
 import json
 import argparse
-from data_utils.utility import download, unpack
+from utils.utility import download, unpack
 
 DATA_HOME = os.path.expanduser('~/.cache/paddle/dataset/speech')
 
diff --git a/data/noise/chime3_background.py b/examples/dataset/chime3_background/chime3_background.py
similarity index 97%
rename from data/noise/chime3_background.py
rename to examples/dataset/chime3_background/chime3_background.py
index 8db09204e..31208d147 100644
--- a/data/noise/chime3_background.py
+++ b/examples/dataset/chime3_background/chime3_background.py
@@ -29,7 +29,8 @@ import json
 import io
 from paddle.v2.dataset.common import md5file
 
-DATA_HOME = os.path.expanduser('~/.cache/paddle/dataset/speech')
+#DATA_HOME = os.path.expanduser('~/.cache/paddle/dataset/speech')
+DATA_HOME = os.path.expanduser('.')
 
 URL = "https://d4s.myairbridge.com/packagev2/AG0Y3DNBE5IWRRTV/?dlid=W19XG7T0NNHB027139H0EQ"
 MD5 = "c3ff512618d7a67d4f85566ea1bc39ec"
diff --git a/examples/dataset/librispeech/.gitignore b/examples/dataset/librispeech/.gitignore
new file mode 100644
index 000000000..a8d8eb76d
--- /dev/null
+++ b/examples/dataset/librispeech/.gitignore
@@ -0,0 +1,7 @@
+dev-clean/
+dev-other/
+test-clean/
+test-other/
+train-clean-100/
+train-clean-360/
+train-other-500/
diff --git a/examples/librispeech/local/librispeech.py b/examples/dataset/librispeech/librispeech.py
similarity index 98%
rename from examples/librispeech/local/librispeech.py
rename to examples/dataset/librispeech/librispeech.py
index ae1bae2de..4cf0f5541 100644
--- a/examples/librispeech/local/librispeech.py
+++ b/examples/dataset/librispeech/librispeech.py
@@ -27,10 +27,10 @@ import soundfile
 import json
 import codecs
 import io
-from data_utils.utility import download, unpack
+from utils.utility import download, unpack
 
 URL_ROOT = "http://www.openslr.org/resources/12"
-URL_ROOT = "https://openslr.magicdatatech.com/resources/12"
+#URL_ROOT = "https://openslr.magicdatatech.com/resources/12"
 URL_TEST_CLEAN = URL_ROOT + "/test-clean.tar.gz"
 URL_TEST_OTHER = URL_ROOT + "/test-other.tar.gz"
 URL_DEV_CLEAN = URL_ROOT + "/dev-clean.tar.gz"
diff --git a/examples/dataset/mini_librispeech/.gitignore b/examples/dataset/mini_librispeech/.gitignore
new file mode 100644
index 000000000..61f54c966
--- /dev/null
+++ b/examples/dataset/mini_librispeech/.gitignore
@@ -0,0 +1,4 @@
+dev-clean/
+manifest.dev-clean
+manifest.train-clean
+train-clean/
diff --git a/examples/dataset/mini_librispeech/mini_librispeech.py b/examples/dataset/mini_librispeech/mini_librispeech.py
new file mode 100644
index 000000000..883a322dc
--- /dev/null
+++ b/examples/dataset/mini_librispeech/mini_librispeech.py
@@ -0,0 +1,115 @@
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Prepare Librispeech ASR datasets.
+
+Download, unpack and create manifest files.
+Manifest file is a json-format file with each line containing the
+meta data (i.e. audio filepath, transcript and audio duration)
+of each audio file in the data set.
+"""
+
+import distutils.util
+import os
+import sys
+import argparse
+import soundfile
+import json
+import codecs
+import io
+from utils.utility import download, unpack
+
+URL_ROOT = "http://www.openslr.org/resources/31"
+URL_TRAIN_CLEAN = URL_ROOT + "/train-clean-5.tar.gz"
+URL_DEV_CLEAN = URL_ROOT + "/dev-clean-2.tar.gz"
+
+MD5_TRAIN_CLEAN = "5df7d4e78065366204ca6845bb08f490"
+MD5_DEV_CLEAN = "6d7ab67ac6a1d2c993d050e16d61080d"
+
+parser = argparse.ArgumentParser(description=__doc__)
+parser.add_argument(
+    "--target_dir",
+    default='~/.cache/paddle/dataset/speech/libri',
+    type=str,
+    help="Directory to save the dataset. (default: %(default)s)")
+parser.add_argument(
+    "--manifest_prefix",
+    default="manifest",
+    type=str,
+    help="Filepath prefix for output manifests. (default: %(default)s)")
+args = parser.parse_args()
+
+
+def create_manifest(data_dir, manifest_path):
+    """Create a manifest json file summarizing the data set, with each line
+    containing the meta data (i.e. audio filepath, transcription text, audio
+    duration) of each audio file within the data set.
+    """
+    print("Creating manifest %s ..." % manifest_path)
+    json_lines = []
+    for subfolder, _, filelist in sorted(os.walk(data_dir)):
+        text_filelist = [
+            filename for filename in filelist if filename.endswith('trans.txt')
+        ]
+        if len(text_filelist) > 0:
+            text_filepath = os.path.join(subfolder, text_filelist[0])
+            for line in io.open(text_filepath, encoding="utf8"):
+                segments = line.strip().split()
+                text = ' '.join(segments[1:]).lower()
+                audio_filepath = os.path.join(subfolder, segments[0] + '.flac')
+                audio_data, samplerate = soundfile.read(audio_filepath)
+                duration = float(len(audio_data)) / samplerate
+                json_lines.append(
+                    json.dumps({
+                        'audio_filepath': audio_filepath,
+                        'duration': duration,
+                        'text': text
+                    }))
+    with codecs.open(manifest_path, 'w', 'utf-8') as out_file:
+        for line in json_lines:
+            out_file.write(line + '\n')
+
+
+def prepare_dataset(url, md5sum, target_dir, manifest_path):
+    """Download, unpack and create summmary manifest file.
+    """
+    if not os.path.exists(os.path.join(target_dir, "LibriSpeech")):
+        # download
+        filepath = download(url, md5sum, target_dir)
+        # unpack
+        unpack(filepath, target_dir)
+    else:
+        print("Skip downloading and unpacking. Data already exists in %s." %
+              target_dir)
+    # create manifest json file
+    create_manifest(target_dir, manifest_path)
+
+
+def main():
+    if args.target_dir.startswith('~'):
+        args.target_dir = os.path.expanduser(args.target_dir)
+
+    prepare_dataset(
+        url=URL_TRAIN_CLEAN,
+        md5sum=MD5_TRAIN_CLEAN,
+        target_dir=os.path.join(args.target_dir, "train-clean"),
+        manifest_path=args.manifest_prefix + ".train-clean")
+    prepare_dataset(
+        url=URL_DEV_CLEAN,
+        md5sum=MD5_DEV_CLEAN,
+        target_dir=os.path.join(args.target_dir, "dev-clean"),
+        manifest_path=args.manifest_prefix + ".dev-clean")
+
+
+if __name__ == '__main__':
+    main()
diff --git a/examples/dataset/musan/musan.py b/examples/dataset/musan/musan.py
new file mode 100644
index 000000000..0d01057e4
--- /dev/null
+++ b/examples/dataset/musan/musan.py
@@ -0,0 +1,123 @@
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Prepare Aishell mandarin dataset
+
+Download, unpack and create manifest files.
+Manifest file is a json-format file with each line containing the
+meta data (i.e. audio filepath, transcript and audio duration)
+of each audio file in the data set.
+"""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import os
+import codecs
+import soundfile
+import json
+import argparse
+from utils.utility import download, unpack
+
+DATA_HOME = os.path.expanduser('~/.cache/paddle/dataset/speech')
+
+URL_ROOT = 'https://www.openslr.org/resources/17'
+DATA_URL = URL_ROOT + '/musan.tar.gz'
+MD5_DATA = ''
+
+parser = argparse.ArgumentParser(description=__doc__)
+parser.add_argument(
+    "--target_dir",
+    default=DATA_HOME + "/musan",
+    type=str,
+    help="Directory to save the dataset. (default: %(default)s)")
+parser.add_argument(
+    "--manifest_prefix",
+    default="manifest",
+    type=str,
+    help="Filepath prefix for output manifests. (default: %(default)s)")
+args = parser.parse_args()
+
+
+def create_manifest(data_dir, manifest_path_prefix):
+    print("Creating manifest %s ..." % manifest_path_prefix)
+    json_lines = []
+    transcript_path = os.path.join(data_dir, 'transcript',
+                                   'aishell_transcript_v0.8.txt')
+    transcript_dict = {}
+    for line in codecs.open(transcript_path, 'r', 'utf-8'):
+        line = line.strip()
+        if line == '': continue
+        audio_id, text = line.split(' ', 1)
+        # remove withespace
+        text = ''.join(text.split())
+        transcript_dict[audio_id] = text
+
+    data_types = ['train', 'dev', 'test']
+    for type in data_types:
+        del json_lines[:]
+        audio_dir = os.path.join(data_dir, 'wav', type)
+        for subfolder, _, filelist in sorted(os.walk(audio_dir)):
+            for fname in filelist:
+                audio_path = os.path.join(subfolder, fname)
+                audio_id = fname[:-4]
+                # if no transcription for audio then skipped
+                if audio_id not in transcript_dict:
+                    continue
+                audio_data, samplerate = soundfile.read(audio_path)
+                duration = float(len(audio_data) / samplerate)
+                text = transcript_dict[audio_id]
+                json_lines.append(
+                    json.dumps(
+                        {
+                            'audio_filepath': audio_path,
+                            'duration': duration,
+                            'text': text
+                        },
+                        ensure_ascii=False))
+        manifest_path = manifest_path_prefix + '.' + type
+        with codecs.open(manifest_path, 'w', 'utf-8') as fout:
+            for line in json_lines:
+                fout.write(line + '\n')
+
+
+def prepare_dataset(url, md5sum, target_dir, manifest_path):
+    """Download, unpack and create manifest file."""
+    data_dir = os.path.join(target_dir, 'data_aishell')
+    if not os.path.exists(data_dir):
+        filepath = download(url, md5sum, target_dir)
+        unpack(filepath, target_dir)
+        # unpack all audio tar files
+        audio_dir = os.path.join(data_dir, 'wav')
+        for subfolder, _, filelist in sorted(os.walk(audio_dir)):
+            for ftar in filelist:
+                unpack(os.path.join(subfolder, ftar), subfolder, True)
+    else:
+        print("Skip downloading and unpacking. Data already exists in %s." %
+              target_dir)
+    create_manifest(data_dir, manifest_path)
+
+
+def main():
+    if args.target_dir.startswith('~'):
+        args.target_dir = os.path.expanduser(args.target_dir)
+
+    prepare_dataset(
+        url=DATA_URL,
+        md5sum=MD5_DATA,
+        target_dir=args.target_dir,
+        manifest_path=args.manifest_prefix)
+
+
+if __name__ == '__main__':
+    main()
diff --git a/examples/dataset/rir_noise/rir_noise.py b/examples/dataset/rir_noise/rir_noise.py
new file mode 100644
index 000000000..dd2b5c64f
--- /dev/null
+++ b/examples/dataset/rir_noise/rir_noise.py
@@ -0,0 +1,123 @@
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Prepare Aishell mandarin dataset
+
+Download, unpack and create manifest files.
+Manifest file is a json-format file with each line containing the
+meta data (i.e. audio filepath, transcript and audio duration)
+of each audio file in the data set.
+"""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import os
+import codecs
+import soundfile
+import json
+import argparse
+from data_utils.utility import download, unpack
+
+DATA_HOME = os.path.expanduser('~/.cache/paddle/dataset/speech')
+
+URL_ROOT = 'http://www.openslr.org/resources/28'
+DATA_URL = URL_ROOT + '/rirs_noises.zip'
+MD5_DATA = ''
+
+parser = argparse.ArgumentParser(description=__doc__)
+parser.add_argument(
+    "--target_dir",
+    default=DATA_HOME + "/Aishell",
+    type=str,
+    help="Directory to save the dataset. (default: %(default)s)")
+parser.add_argument(
+    "--manifest_prefix",
+    default="manifest",
+    type=str,
+    help="Filepath prefix for output manifests. (default: %(default)s)")
+args = parser.parse_args()
+
+
+def create_manifest(data_dir, manifest_path_prefix):
+    print("Creating manifest %s ..." % manifest_path_prefix)
+    json_lines = []
+    transcript_path = os.path.join(data_dir, 'transcript',
+                                   'aishell_transcript_v0.8.txt')
+    transcript_dict = {}
+    for line in codecs.open(transcript_path, 'r', 'utf-8'):
+        line = line.strip()
+        if line == '': continue
+        audio_id, text = line.split(' ', 1)
+        # remove withespace
+        text = ''.join(text.split())
+        transcript_dict[audio_id] = text
+
+    data_types = ['train', 'dev', 'test']
+    for type in data_types:
+        del json_lines[:]
+        audio_dir = os.path.join(data_dir, 'wav', type)
+        for subfolder, _, filelist in sorted(os.walk(audio_dir)):
+            for fname in filelist:
+                audio_path = os.path.join(subfolder, fname)
+                audio_id = fname[:-4]
+                # if no transcription for audio then skipped
+                if audio_id not in transcript_dict:
+                    continue
+                audio_data, samplerate = soundfile.read(audio_path)
+                duration = float(len(audio_data) / samplerate)
+                text = transcript_dict[audio_id]
+                json_lines.append(
+                    json.dumps(
+                        {
+                            'audio_filepath': audio_path,
+                            'duration': duration,
+                            'text': text
+                        },
+                        ensure_ascii=False))
+        manifest_path = manifest_path_prefix + '.' + type
+        with codecs.open(manifest_path, 'w', 'utf-8') as fout:
+            for line in json_lines:
+                fout.write(line + '\n')
+
+
+def prepare_dataset(url, md5sum, target_dir, manifest_path):
+    """Download, unpack and create manifest file."""
+    data_dir = os.path.join(target_dir, 'data_aishell')
+    if not os.path.exists(data_dir):
+        filepath = download(url, md5sum, target_dir)
+        unpack(filepath, target_dir)
+        # unpack all audio tar files
+        audio_dir = os.path.join(data_dir, 'wav')
+        for subfolder, _, filelist in sorted(os.walk(audio_dir)):
+            for ftar in filelist:
+                unpack(os.path.join(subfolder, ftar), subfolder, True)
+    else:
+        print("Skip downloading and unpacking. Data already exists in %s." %
+              target_dir)
+    create_manifest(data_dir, manifest_path)
+
+
+def main():
+    if args.target_dir.startswith('~'):
+        args.target_dir = os.path.expanduser(args.target_dir)
+
+    prepare_dataset(
+        url=DATA_URL,
+        md5sum=MD5_DATA,
+        target_dir=args.target_dir,
+        manifest_path=args.manifest_prefix)
+
+
+if __name__ == '__main__':
+    main()
diff --git a/data/voxforge/run_data.sh b/examples/dataset/voxforge/run_data.sh
similarity index 58%
rename from data/voxforge/run_data.sh
rename to examples/dataset/voxforge/run_data.sh
index 0276744ae..5af9d0cc6 100644
--- a/data/voxforge/run_data.sh
+++ b/examples/dataset/voxforge/run_data.sh
@@ -1,9 +1,12 @@
 #! /usr/bin/env bash
 
+TARGET_DIR=${MAIN_ROOT}/examples/dataset/voxforge
+mkdir -p ${TARGET_DIR}
+
 # download data, generate manifests
-PYTHONPATH=../../:$PYTHONPATH python voxforge.py \
---manifest_prefix='./manifest' \
---target_dir='./dataset/VoxForge' \
+python ${MAIN_ROOT}/examples/dataset/voxforge/voxforge.py \
+--manifest_prefix="${TARGET_DIR}/manifest" \
+--target_dir="${TARGET_DIR}" \
 --is_merge_dialect=True \
 --dialects 'american' 'british' 'australian' 'european' 'irish' 'canadian' 'indian'
 
diff --git a/data/voxforge/voxforge.py b/examples/dataset/voxforge/voxforge.py
similarity index 98%
rename from data/voxforge/voxforge.py
rename to examples/dataset/voxforge/voxforge.py
index 3fb0ded88..abf1ccff6 100644
--- a/data/voxforge/voxforge.py
+++ b/examples/dataset/voxforge/voxforge.py
@@ -27,9 +27,9 @@ import json
 import argparse
 import shutil
 import subprocess
-from data_utils.utility import download_multi, unpack, getfile_insensitive
+from utils.utility import download_multi, unpack, getfile_insensitive
 
-DATA_HOME = './dataset'
+DATA_HOME = os.path.expanduser('~/.cache/paddle/dataset/speech')
 
 DATA_URL = 'http://www.repository.voxforge1.org/downloads/SpeechCorpus/Trunk/' \
            'Audio/Main/16kHz_16bit'
diff --git a/examples/librispeech/.gitignore b/examples/librispeech/.gitignore
new file mode 100644
index 000000000..44038ca5b
--- /dev/null
+++ b/examples/librispeech/.gitignore
@@ -0,0 +1,2 @@
+data
+ckpt*
diff --git a/examples/librispeech/conf/deepspeech2.yaml b/examples/librispeech/conf/deepspeech2.yaml
index 457a56b2e..9e2e29396 100644
--- a/examples/librispeech/conf/deepspeech2.yaml
+++ b/examples/librispeech/conf/deepspeech2.yaml
@@ -1,12 +1,12 @@
 # https://yaml.org/type/float.html
 data:
-  train_manifest: data/manifest.tiny
-  dev_manifest: data/manifest.tiny
-  test_manifest: data/manifest.tiny
+  train_manifest: data/manifest.train
+  dev_manifest: data/manifest.dev-clean
+  test_manifest: data/manifest.test-clean
   mean_std_filepath: data/mean_std.npz
   vocab_filepath: data/vocab.txt 
   augmentation_config: conf/augmentation.config
-  batch_size: 4
+  batch_size: 20
   max_duration: 27.0
   min_duration: 0.0
   specgram_type: linear
@@ -26,26 +26,22 @@ model:
   num_conv_layers: 2
   num_rnn_layers: 3
   rnn_layer_size: 2048
-  use_gru: True 
+  use_gru: False 
   share_rnn_weights: True 
 training:
-  n_epoch: 20
-  lr: 1e-5 
+  n_epoch: 50
+  lr: 5e-4
+  lr_decay: 0.83
   weight_decay: 1e-06
-  global_grad_clip: 400.0
-  max_iteration: 500000
-  plot_interval: 1000
-  save_interval: 1000
-  valid_interval: 1000
+  global_grad_clip: 5.0
 decoding:
   batch_size: 128
   error_rate_type: wer
   decoding_method: ctc_beam_search
-  lang_model_path: models/lm/common_crawl_00.prune01111.trie.klm
+  lang_model_path: data/lm/common_crawl_00.prune01111.trie.klm
   alpha: 2.5
   beta: 0.3
   beam_size: 500
   cutoff_prob: 1.0
   cutoff_top_n: 40
   num_proc_bsearch: 8
-
diff --git a/examples/librispeech/local/data.sh b/examples/librispeech/local/data.sh
index cbcad7b8d..ca65d640c 100644
--- a/examples/librispeech/local/data.sh
+++ b/examples/librispeech/local/data.sh
@@ -1,11 +1,13 @@
 #! /usr/bin/env bash
 
 mkdir -p data
+TARGET_DIR=${MAIN_ROOT}/examples/dataset
+mkdir -p ${TARGET_DIR}
 
 # download data, generate manifests
-PYTHONPATH=.:$PYTHONPATH python3 local/librispeech.py \
+PYTHONPATH=.:$PYTHONPATH python3 ${TARGET_DIR}/librispeech/librispeech.py \
 --manifest_prefix="data/manifest" \
---target_dir="${MAIN_ROOT}/dataset/librispeech" \
+--target_dir="${TARGET_DIR}/librispeech" \
 --full_download="True"
 
 if [ $? -ne 0 ]; then
@@ -15,9 +17,8 @@ fi
 
 cat data/manifest.train-* | shuf > data/manifest.train
 
-
 # build vocabulary
-python3 ${MAIN_ROOT}/tools/build_vocab.py \
+python3 ${MAIN_ROOT}/utils/build_vocab.py \
 --count_threshold=0 \
 --vocab_path="data/vocab.txt" \
 --manifest_paths="data/manifest.train"
@@ -27,9 +28,8 @@ if [ $? -ne 0 ]; then
     exit 1
 fi
 
-
 # compute mean and stddev for normalizer
-python3 ${MAIN_ROOT}/tools/compute_mean_std.py \
+python3 ${MAIN_ROOT}/utils/compute_mean_std.py \
 --manifest_path="data/manifest.train" \
 --num_samples=2000 \
 --specgram_type="linear" \
@@ -40,6 +40,5 @@ if [ $? -ne 0 ]; then
     exit 1
 fi
 
-
 echo "LibriSpeech Data preparation done."
 exit 0
diff --git a/examples/librispeech/local/download_lm_en.sh b/examples/librispeech/local/download_lm_en.sh
new file mode 100644
index 000000000..05ea793fb
--- /dev/null
+++ b/examples/librispeech/local/download_lm_en.sh
@@ -0,0 +1,20 @@
+#! /usr/bin/env bash
+
+. ${MAIN_ROOT}/utils/utility.sh
+
+DIR=data/lm
+mkdir -p ${DIR}
+
+URL=https://deepspeech.bj.bcebos.com/en_lm/common_crawl_00.prune01111.trie.klm
+MD5="099a601759d467cd0a8523ff939819c5"
+TARGET=${DIR}/common_crawl_00.prune01111.trie.klm
+
+echo "Download language model ..."
+download $URL $MD5 $TARGET
+if [ $? -ne 0 ]; then
+    echo "Fail to download the language model!"
+    exit 1
+fi
+
+
+exit 0
diff --git a/models/librispeech/download_model.sh b/examples/librispeech/local/download_model.sh
similarity index 68%
rename from models/librispeech/download_model.sh
rename to examples/librispeech/local/download_model.sh
index edf853054..f13bde0f2 100644
--- a/models/librispeech/download_model.sh
+++ b/examples/librispeech/local/download_model.sh
@@ -1,10 +1,13 @@
 #! /usr/bin/env bash
 
-. ../../utils/utility.sh
+. ${MAIN_ROOT}/utils/utility.sh
+
+DIR=data/pretrain
+mkdir -p ${DIR}
 
 URL='https://deepspeech.bj.bcebos.com/eng_models/librispeech_model_fluid.tar.gz'
 MD5=fafb11fe57c3ecd107147056453f5348
-TARGET=./librispeech_model_fluid.tar.gz
+TARGET=${DIR}/librispeech_model_fluid.tar.gz
 
 
 echo "Download LibriSpeech model ..."
@@ -13,7 +16,6 @@ if [ $? -ne 0 ]; then
     echo "Fail to download LibriSpeech model!"
     exit 1
 fi
-tar -zxvf $TARGET
-
+tar -zxvf $TARGET -C ${DIR}
 
 exit 0
diff --git a/examples/librispeech/local/infer.sh b/examples/librispeech/local/infer.sh
index 33959b381..4b6a0b01f 100644
--- a/examples/librispeech/local/infer.sh
+++ b/examples/librispeech/local/infer.sh
@@ -1,43 +1,21 @@
 #! /usr/bin/env bash
 
 # download language model
-cd ${MAIN_ROOT}/models/lm > /dev/null
-bash download_lm_en.sh
+bash local/download_lm_en.sh
 if [ $? -ne 0 ]; then
     exit 1
 fi
-cd - > /dev/null
 
+python3 -u ${BIN_DIR}/infer.py \
+--device 'gpu' \
+--nproc 1 \
+--config conf/deepspeech2.yaml \
+--output ckpt
 
-# infer
-CUDA_VISIBLE_DEVICES=0 \
-python3 -u ${MAIN_ROOT}/infer.py \
---num_samples=10 \
---beam_size=500 \
---num_proc_bsearch=8 \
---num_conv_layers=2 \
---num_rnn_layers=3 \
---rnn_layer_size=2048 \
---alpha=2.5 \
---beta=0.3 \
---cutoff_prob=1.0 \
---cutoff_top_n=40 \
---use_gru=False \
---use_gpu=True \
---share_rnn_weights=True \
---infer_manifest="data/manifest.test-clean" \
---mean_std_path="data/mean_std.npz" \
---vocab_path="data/vocab.txt" \
---model_path="checkpoints/step_final" \
---lang_model_path="${MAIN_ROOT}/models/lm/common_crawl_00.prune01111.trie.klm" \
---decoding_method="ctc_beam_search" \
---error_rate_type="wer" \
---specgram_type="linear"
 
 if [ $? -ne 0 ]; then
     echo "Failed in inference!"
     exit 1
 fi
 
-
 exit 0
diff --git a/examples/librispeech/local/infer_golden.sh b/examples/librispeech/local/infer_golden.sh
index 21663681b..d17b4328d 100644
--- a/examples/librispeech/local/infer_golden.sh
+++ b/examples/librispeech/local/infer_golden.sh
@@ -1,22 +1,16 @@
 #! /usr/bin/env bash
 
 # download language model
-cd ${MAIN_ROOT}/models/lm > /dev/null
-bash download_lm_en.sh
+bash local/download_lm_en.sh
 if [ $? -ne 0 ]; then
     exit 1
 fi
-cd - > /dev/null
-
 
 # download well-trained model
-cd ${MAIN_ROOT}/models/librispeech > /dev/null
-bash download_model.sh
+bash local/download_model.sh
 if [ $? -ne 0 ]; then
     exit 1
 fi
-cd - > /dev/null
-
 
 # infer
 CUDA_VISIBLE_DEVICES=0 \
diff --git a/examples/librispeech/local/test.sh b/examples/librispeech/local/test.sh
index cd8c07542..f39fbaef1 100644
--- a/examples/librispeech/local/test.sh
+++ b/examples/librispeech/local/test.sh
@@ -1,38 +1,17 @@
 #! /usr/bin/env bash
 
 # download language model
-cd ${MAIN_ROOT}/models/lm > /dev/null
-bash download_lm_en.sh
+bash local/download_lm_en.sh
 if [ $? -ne 0 ]; then
     exit 1
 fi
-cd - > /dev/null
 
+python3 -u ${BIN_DIR}/test.py \
+--device 'gpu' \
+--nproc 1 \
+--config conf/deepspeech2.yaml \
+--output ckpt
 
-# evaluate model
-CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 \
-python3 -u ${MAIN_ROOT}/test.py \
---batch_size=128 \
---beam_size=500 \
---num_proc_bsearch=8 \
---num_conv_layers=2 \
---num_rnn_layers=3 \
---rnn_layer_size=2048 \
---alpha=2.5 \
---beta=0.3 \
---cutoff_prob=1.0 \
---cutoff_top_n=40 \
---use_gru=False \
---use_gpu=True \
---share_rnn_weights=True \
---test_manifest="data/manifest.test-clean" \
---mean_std_path="data/mean_std.npz" \
---vocab_path="data/vocab.txt" \
---model_path="checkpoints/step_final" \
---lang_model_path="${MAIN_ROOT}/models/lm/common_crawl_00.prune01111.trie.klm" \
---decoding_method="ctc_beam_search" \
---error_rate_type="wer" \
---specgram_type="linear"
 
 if [ $? -ne 0 ]; then
     echo "Failed in evaluation!"
diff --git a/examples/librispeech/local/test_golden.sh b/examples/librispeech/local/test_golden.sh
index 54ec6ad03..d6b1bc8e9 100644
--- a/examples/librispeech/local/test_golden.sh
+++ b/examples/librispeech/local/test_golden.sh
@@ -1,26 +1,21 @@
 #! /usr/bin/env bash
 
 # download language model
-cd ${MAIN_ROOT}/models/lm > /dev/null
-bash download_lm_en.sh
+bash local/download_lm_en.sh
 if [ $? -ne 0 ]; then
     exit 1
 fi
-cd - > /dev/null
-
 
 # download well-trained model
-cd ${MAIN_ROOT}/models/librispeech > /dev/null
-bash download_model.sh
+bash local/download_model.sh
 if [ $? -ne 0 ]; then
     exit 1
 fi
-cd - > /dev/null
 
 
 # evaluate model
 CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 \
-python3 -u ${MAIN_ROOT}/test.py \
+python3 -u $MAIN_ROOT/test.py \
 --batch_size=128 \
 --beam_size=500 \
 --num_proc_bsearch=8 \
@@ -35,10 +30,10 @@ python3 -u ${MAIN_ROOT}/test.py \
 --use_gpu=True \
 --share_rnn_weights=True \
 --test_manifest="data/manifest.test-clean" \
---mean_std_path="${MAIN_ROOT}/models/librispeech/mean_std.npz" \
---vocab_path="${MAIN_ROOT}/models/librispeech/vocab.txt" \
---model_path="${MAIN_ROOT}/models/librispeech" \
---lang_model_path="${MAIN_ROOT}/models/lm/common_crawl_00.prune01111.trie.klm" \
+--mean_std_path="$MAIN_ROOT/models/librispeech/mean_std.npz" \
+--vocab_path="$MAIN_ROOT/models/librispeech/vocab.txt" \
+--model_path="$MAIN_ROOT/models/librispeech" \
+--lang_model_path="$MAIN_ROOT/models/lm/common_crawl_00.prune01111.trie.klm" \
 --decoding_method="ctc_beam_search" \
 --error_rate_type="wer" \
 --specgram_type="linear"
diff --git a/examples/librispeech/local/train.sh b/examples/librispeech/local/train.sh
index 32aa2657b..59a94181b 100644
--- a/examples/librispeech/local/train.sh
+++ b/examples/librispeech/local/train.sh
@@ -1,36 +1,15 @@
 #! /usr/bin/env bash
 
-# train model
-# if you wish to resume from an exists model, uncomment --init_from_pretrained_model
 export FLAGS_sync_nccl_allreduce=0
 
-CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 \
-python3 -u ${MAIN_ROOT}/train.py \
---batch_size=20 \
---num_epoch=50 \
---num_conv_layers=2 \
---num_rnn_layers=3 \
---rnn_layer_size=2048 \
---num_iter_print=100 \
---save_epoch=1 \
---num_samples=280000 \
---learning_rate=5e-4 \
---max_duration=27.0 \
---min_duration=0.0 \
---test_off=False \
---use_sortagrad=True \
---use_gru=False \
---use_gpu=True \
---is_local=True \
---share_rnn_weights=True \
---train_manifest="data/manifest.train" \
---dev_manifest="data/manifest.dev-clean" \
---mean_std_path="data/mean_std.npz" \
---vocab_path="data/vocab.txt" \
---output_model_dir="./checkpoints/libri" \
---augment_conf_path="${MAIN_ROOT}/conf/augmentation.config" \
---specgram_type="linear" \
---shuffle_method="batch_shuffle_clipped" \
+ngpu=$(echo ${CUDA_VISIBLE_DEVICES} | python -c 'import sys; a = sys.stdin.read(); print(len(a.split(",")));')
+echo "using $ngpu gpus..."
+
+python3 -u ${BIN_DIR}/train.py \
+--device 'gpu' \
+--nproc ${ngpu} \
+--config conf/deepspeech2.yaml \
+--output ckpt
 
 if [ $? -ne 0 ]; then
     echo "Failed in training!"
diff --git a/examples/librispeech/local/tune.sh b/examples/librispeech/local/tune.sh
index 848f0b8f9..4bb81d29b 100644
--- a/examples/librispeech/local/tune.sh
+++ b/examples/librispeech/local/tune.sh
@@ -1,15 +1,19 @@
 #! /usr/bin/env bash
 
+if [ $# != 1 ];then
+    echo "usage: tune ckpt_path"
+    exit 1
+fi
+
 # grid-search for hyper-parameters in language model
-CUDA_VISIBLE_DEVICES=0,1,2,3 \
-python3 -u ${MAIN_ROOT}tools/tune.py \
+python3 -u ${BIN_DIR}/tune.py \
+--device 'gpu' \
+--nproc 1 \
+--config conf/deepspeech2.yaml \
 --num_batches=-1 \
 --batch_size=128 \
 --beam_size=500 \
 --num_proc_bsearch=12 \
---num_conv_layers=2 \
---num_rnn_layers=3 \
---rnn_layer_size=2048 \
 --num_alphas=45 \
 --num_betas=8 \
 --alpha_from=1.0 \
@@ -18,16 +22,7 @@ python3 -u ${MAIN_ROOT}tools/tune.py \
 --beta_to=0.45 \
 --cutoff_prob=1.0 \
 --cutoff_top_n=40 \
---use_gru=False \
---use_gpu=True \
---share_rnn_weights=True \
---tune_manifest="data/manifest.dev-clean" \
---mean_std_path="data/mean_std.npz" \
---vocab_path="${MAIN_ROOT}/models/librispeech/vocab.txt" \
---model_path="${MAIN_ROOT}/models/librispeech" \
---lang_model_path="${MAIN_ROOT}/models/lm/common_crawl_00.prune01111.trie.klm" \
---error_rate_type="wer" \
---specgram_type="linear"
+--checkpoint_path ${1}
 
 if [ $? -ne 0 ]; then
     echo "Failed in tuning!"
diff --git a/examples/librispeech/models b/examples/librispeech/models
deleted file mode 120000
index 9e68e9945..000000000
--- a/examples/librispeech/models
+++ /dev/null
@@ -1 +0,0 @@
-../../models
\ No newline at end of file
diff --git a/examples/librispeech/path.sh b/examples/librispeech/path.sh
index a55139e11..a179631b3 100644
--- a/examples/librispeech/path.sh
+++ b/examples/librispeech/path.sh
@@ -8,3 +8,7 @@ export PYTHONIOENCODING=UTF-8
 export PYTHONPATH=${MAIN_ROOT}:${PYTHONPATH}
 
 export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:/usr/local/lib/
+
+
+MODEL=deepspeech2
+export BIN_DIR=${MAIN_ROOT}/deepspeech/exps/${MODEL}/bin
diff --git a/examples/librispeech/run.sh b/examples/librispeech/run.sh
index c5f66ae1d..ff87d38bf 100644
--- a/examples/librispeech/run.sh
+++ b/examples/librispeech/run.sh
@@ -1,4 +1,5 @@
 #!/bin/bash
+set -e
 
 source path.sh
 
@@ -6,13 +7,10 @@ source path.sh
 bash ./local/data.sh
 
 # train model
-bash ./local/train.sh
+CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 bash ./local/train.sh
 
 # test model
-bash ./local/test.sh
+CUDA_VISIBLE_DEVICES=0  bash ./local/test.sh
 
 # infer model
-bash ./local/infer.sh
-
-# tune model
-#bash ./local/tune.sh
+CUDA_VISIBLE_DEVICES=0 bash ./local/infer.sh
diff --git a/examples/tiny/.gitignore b/examples/tiny/.gitignore
new file mode 100644
index 000000000..073c3b9eb
--- /dev/null
+++ b/examples/tiny/.gitignore
@@ -0,0 +1,2 @@
+ckpt*
+data
diff --git a/examples/tiny/conf/deepspeech2.yaml b/examples/tiny/conf/deepspeech2.yaml
index dc7d59d47..c7dd83f3c 100644
--- a/examples/tiny/conf/deepspeech2.yaml
+++ b/examples/tiny/conf/deepspeech2.yaml
@@ -34,19 +34,14 @@ training:
   lr_decay: 1.0 
   weight_decay: 1e-06
   global_grad_clip: 5.0
-  max_iteration: 500000
-  plot_interval: 1000
-  save_interval: 1000
-  valid_interval: 1000
 decoding:
   batch_size: 128
   error_rate_type: wer
   decoding_method: ctc_beam_search
-  lang_model_path: models/lm/common_crawl_00.prune01111.trie.klm
+  lang_model_path: data/lm/common_crawl_00.prune01111.trie.klm
   alpha: 2.5
   beta: 0.3
   beam_size: 500
   cutoff_prob: 1.0
   cutoff_top_n: 40
   num_proc_bsearch: 8
-
diff --git a/examples/tiny/local/data.sh b/examples/tiny/local/data.sh
index 3ad387dbc..d834ec677 100644
--- a/examples/tiny/local/data.sh
+++ b/examples/tiny/local/data.sh
@@ -1,14 +1,13 @@
 #! /usr/bin/env bash
 
-# prepare folder
-if [ ! -e data ]; then
-    mkdir data
-fi
+mkdir -p data
+TARGET_DIR=${MAIN_ROOT}/examples/dataset
+mkdir -p ${TARGET_DIR}
 
 # download data, generate manifests
-PYTHONPATH=.:$PYTHONPATH python3 ../librispeech/local/librispeech.py \
+PYTHONPATH=.:$PYTHONPATH python3 ${TARGET_DIR}/librispeech/librispeech.py \
 --manifest_prefix="data/manifest" \
---target_dir="${MAIN_ROOT}/dataset/librispeech" \
+--target_dir="${TARGET_DIR}/librispeech" \
 --full_download="False"
 
 if [ $? -ne 0 ]; then
@@ -19,7 +18,7 @@ fi
 head -n 64 data/manifest.dev-clean  > data/manifest.tiny
 
 # build vocabulary
-python3 ${MAIN_ROOT}/tools/build_vocab.py \
+python3 ${MAIN_ROOT}/utils/build_vocab.py \
 --count_threshold=0 \
 --vocab_path="data/vocab.txt" \
 --manifest_paths="data/manifest.tiny"
@@ -31,7 +30,7 @@ fi
 
 
 # compute mean and stddev for normalizer
-python3 ${MAIN_ROOT}/tools/compute_mean_std.py \
+python3 ${MAIN_ROOT}/utils/compute_mean_std.py \
 --manifest_path="data/manifest.tiny" \
 --num_samples=64 \
 --specgram_type="linear" \
@@ -42,6 +41,5 @@ if [ $? -ne 0 ]; then
     exit 1
 fi
 
-
 echo "LibriSpeech Data preparation done."
 exit 0
diff --git a/examples/tiny/local/download_lm_en.sh b/examples/tiny/local/download_lm_en.sh
new file mode 100644
index 000000000..05ea793fb
--- /dev/null
+++ b/examples/tiny/local/download_lm_en.sh
@@ -0,0 +1,20 @@
+#! /usr/bin/env bash
+
+. ${MAIN_ROOT}/utils/utility.sh
+
+DIR=data/lm
+mkdir -p ${DIR}
+
+URL=https://deepspeech.bj.bcebos.com/en_lm/common_crawl_00.prune01111.trie.klm
+MD5="099a601759d467cd0a8523ff939819c5"
+TARGET=${DIR}/common_crawl_00.prune01111.trie.klm
+
+echo "Download language model ..."
+download $URL $MD5 $TARGET
+if [ $? -ne 0 ]; then
+    echo "Fail to download the language model!"
+    exit 1
+fi
+
+
+exit 0
diff --git a/examples/tiny/local/download_model.sh b/examples/tiny/local/download_model.sh
new file mode 100644
index 000000000..f13bde0f2
--- /dev/null
+++ b/examples/tiny/local/download_model.sh
@@ -0,0 +1,21 @@
+#! /usr/bin/env bash
+
+. ${MAIN_ROOT}/utils/utility.sh
+
+DIR=data/pretrain
+mkdir -p ${DIR}
+
+URL='https://deepspeech.bj.bcebos.com/eng_models/librispeech_model_fluid.tar.gz'
+MD5=fafb11fe57c3ecd107147056453f5348
+TARGET=${DIR}/librispeech_model_fluid.tar.gz
+
+
+echo "Download LibriSpeech model ..."
+download $URL $MD5 $TARGET
+if [ $? -ne 0 ]; then
+    echo "Fail to download LibriSpeech model!"
+    exit 1
+fi
+tar -zxvf $TARGET -C ${DIR}
+
+exit 0
diff --git a/examples/tiny/local/infer.sh b/examples/tiny/local/infer.sh
index 74e8982d2..3aff6b78b 100644
--- a/examples/tiny/local/infer.sh
+++ b/examples/tiny/local/infer.sh
@@ -1,15 +1,13 @@
 #! /usr/bin/env bash
 
 # download language model
-cd $MAIN_ROOT/models/lm > /dev/null
-bash download_lm_en.sh
+bash local/download_lm_en.sh
 if [ $? -ne 0 ]; then
     exit 1
 fi
-cd - > /dev/null
 
-CUDA_VISIBLE_DEVICES=0,1,2,3 \
-python3 -u ${MAIN_ROOT}/infer.py \
+CUDA_VISIBLE_DEVICES=0 \
+python3 -u ${BIN_DIR}/infer.py \
 --device 'gpu' \
 --nproc 1 \
 --config conf/deepspeech2.yaml \
diff --git a/examples/tiny/local/infer_golden.sh b/examples/tiny/local/infer_golden.sh
index 21663681b..d17b4328d 100644
--- a/examples/tiny/local/infer_golden.sh
+++ b/examples/tiny/local/infer_golden.sh
@@ -1,22 +1,16 @@
 #! /usr/bin/env bash
 
 # download language model
-cd ${MAIN_ROOT}/models/lm > /dev/null
-bash download_lm_en.sh
+bash local/download_lm_en.sh
 if [ $? -ne 0 ]; then
     exit 1
 fi
-cd - > /dev/null
-
 
 # download well-trained model
-cd ${MAIN_ROOT}/models/librispeech > /dev/null
-bash download_model.sh
+bash local/download_model.sh
 if [ $? -ne 0 ]; then
     exit 1
 fi
-cd - > /dev/null
-
 
 # infer
 CUDA_VISIBLE_DEVICES=0 \
diff --git a/examples/tiny/local/test.sh b/examples/tiny/local/test.sh
index cfedd1ca8..fedebf96d 100644
--- a/examples/tiny/local/test.sh
+++ b/examples/tiny/local/test.sh
@@ -1,15 +1,13 @@
 #! /usr/bin/env bash
 
 # download language model
-cd $MAIN_ROOT/models/lm > /dev/null
-bash download_lm_en.sh
+bash local/download_lm_en.sh
 if [ $? -ne 0 ]; then
     exit 1
 fi
-cd - > /dev/null
 
 CUDA_VISIBLE_DEVICES=0 \
-python3 -u ${MAIN_ROOT}/test.py \
+python3 -u ${BIN_DIR}/test.py \
 --device 'gpu' \
 --nproc 1 \
 --config conf/deepspeech2.yaml \
diff --git a/examples/tiny/local/test_golden.sh b/examples/tiny/local/test_golden.sh
index 9983fade8..d6b1bc8e9 100644
--- a/examples/tiny/local/test_golden.sh
+++ b/examples/tiny/local/test_golden.sh
@@ -1,21 +1,16 @@
 #! /usr/bin/env bash
 
 # download language model
-cd $MAIN_ROOT/models/lm > /dev/null
-bash download_lm_en.sh
+bash local/download_lm_en.sh
 if [ $? -ne 0 ]; then
     exit 1
 fi
-cd - > /dev/null
-
 
 # download well-trained model
-cd $MAIN_ROOT/models/librispeech > /dev/null
-bash download_model.sh
+bash local/download_model.sh
 if [ $? -ne 0 ]; then
     exit 1
 fi
-cd - > /dev/null
 
 
 # evaluate model
diff --git a/examples/tiny/local/train.sh b/examples/tiny/local/train.sh
index dfd229172..369ccc924 100644
--- a/examples/tiny/local/train.sh
+++ b/examples/tiny/local/train.sh
@@ -2,9 +2,8 @@
 
 export FLAGS_sync_nccl_allreduce=0
 
-#CUDA_VISIBLE_DEVICES=0,1,2,3 \
 CUDA_VISIBLE_DEVICES=0 \
-python3 -u ${MAIN_ROOT}/train.py \
+python3 -u ${BIN_DIR}/train.py \
 --device 'gpu' \
 --nproc 1 \
 --config conf/deepspeech2.yaml \
diff --git a/examples/tiny/local/tune.sh b/examples/tiny/local/tune.sh
index b5cc4d6a1..4bb81d29b 100644
--- a/examples/tiny/local/tune.sh
+++ b/examples/tiny/local/tune.sh
@@ -1,15 +1,19 @@
 #! /usr/bin/env bash
 
+if [ $# != 1 ];then
+    echo "usage: tune ckpt_path"
+    exit 1
+fi
+
 # grid-search for hyper-parameters in language model
-CUDA_VISIBLE_DEVICES=0,1,2,3 \
-python3 -u $MAIN_ROOT/tools/tune.py \
+python3 -u ${BIN_DIR}/tune.py \
+--device 'gpu' \
+--nproc 1 \
+--config conf/deepspeech2.yaml \
 --num_batches=-1 \
 --batch_size=128 \
 --beam_size=500 \
 --num_proc_bsearch=12 \
---num_conv_layers=2 \
---num_rnn_layers=3 \
---rnn_layer_size=2048 \
 --num_alphas=45 \
 --num_betas=8 \
 --alpha_from=1.0 \
@@ -18,16 +22,7 @@ python3 -u $MAIN_ROOT/tools/tune.py \
 --beta_to=0.45 \
 --cutoff_prob=1.0 \
 --cutoff_top_n=40 \
---use_gru=False \
---use_gpu=True \
---share_rnn_weights=True \
---tune_manifest="data/manifest.dev-clean" \
---mean_std_path="data/mean_std.npz" \
---vocab_path="data/vocab.txt" \
---model_path="$MAIN_ROOT/models/librispeech" \
---lang_model_path="$MAIN_ROOT/models/lm/common_crawl_00.prune01111.trie.klm" \
---error_rate_type="wer" \
---specgram_type="linear"
+--checkpoint_path ${1}
 
 if [ $? -ne 0 ]; then
     echo "Failed in tuning!"
diff --git a/examples/tiny/models b/examples/tiny/models
deleted file mode 120000
index 150d99d4d..000000000
--- a/examples/tiny/models
+++ /dev/null
@@ -1 +0,0 @@
-../../models/
\ No newline at end of file
diff --git a/examples/tiny/path.sh b/examples/tiny/path.sh
index a55139e11..a179631b3 100644
--- a/examples/tiny/path.sh
+++ b/examples/tiny/path.sh
@@ -8,3 +8,7 @@ export PYTHONIOENCODING=UTF-8
 export PYTHONPATH=${MAIN_ROOT}:${PYTHONPATH}
 
 export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:/usr/local/lib/
+
+
+MODEL=deepspeech2
+export BIN_DIR=${MAIN_ROOT}/deepspeech/exps/${MODEL}/bin
diff --git a/examples/tiny/run.sh b/examples/tiny/run.sh
index 01ad06516..2b5ed5308 100644
--- a/examples/tiny/run.sh
+++ b/examples/tiny/run.sh
@@ -14,6 +14,3 @@ bash ./local/test.sh
 
 # infer model
 bash ./local/infer.sh
-
-## tune model
-#bash ./local/tune.sh
diff --git a/dataloader.ipynb b/notebook/dataloader.ipynb
similarity index 100%
rename from dataloader.ipynb
rename to notebook/dataloader.ipynb
diff --git a/train_test.ipynb b/notebook/train_test.ipynb
similarity index 100%
rename from train_test.ipynb
rename to notebook/train_test.ipynb
diff --git a/setup.sh b/setup.sh
index 115ddcdc1..8d3a0994e 100644
--- a/setup.sh
+++ b/setup.sh
@@ -38,7 +38,7 @@ fi
 # install decoders
 python3 -c "import pkg_resources; pkg_resources.require(\"swig_decoders==1.1\")"
 if [ $? != 0 ]; then
-    cd decoders/swig > /dev/null
+    cd deepspeech/decoders/swig > /dev/null
     sh setup.sh
     cd - > /dev/null
 fi
diff --git a/tests/network_test.py b/tests/network_test.py
index 7e35c05cc..7e8d62c2b 100644
--- a/tests/network_test.py
+++ b/tests/network_test.py
@@ -12,10 +12,11 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from model_utils.network import DeepSpeech2
 import paddle
 import numpy as np
 
+from deepspeech.models.network import DeepSpeech2
+
 if __name__ == '__main__':
 
     batch_size = 2
diff --git a/utils/tests/test_error_rate.py b/tests/test_error_rate.py
similarity index 99%
rename from utils/tests/test_error_rate.py
rename to tests/test_error_rate.py
index 80c5b192a..646d5739f 100644
--- a/utils/tests/test_error_rate.py
+++ b/tests/test_error_rate.py
@@ -14,7 +14,7 @@
 """Test error rate."""
 
 import unittest
-from utils import error_rate
+from deepspeech.utils import error_rate
 
 
 class TestParse(unittest.TestCase):
diff --git a/tools/build_vocab.py b/utils/build_vocab.py
similarity index 92%
rename from tools/build_vocab.py
rename to utils/build_vocab.py
index 5dc6f35bb..cb17de57c 100644
--- a/tools/build_vocab.py
+++ b/utils/build_vocab.py
@@ -12,7 +12,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 """Build vocabulary from manifest files.
-
 Each item in vocabulary file is a character.
 """
 
@@ -22,15 +21,16 @@ import codecs
 import json
 from collections import Counter
 import os.path
-from data_utils.utility import read_manifest
-from utils.utility import add_arguments, print_arguments
+
+from deepspeech.frontend.utility import read_manifest
+from deepspeech.utils.utility import add_arguments, print_arguments
 
 parser = argparse.ArgumentParser(description=__doc__)
 add_arg = functools.partial(add_arguments, argparser=parser)
 # yapf: disable
 add_arg('count_threshold',  int,    0,  "Truncation threshold for char counts.")
 add_arg('vocab_path',       str,
-        'data/librispeech/vocab.txt',
+        'examples/librispeech/data/vocab.txt',
         "Filepath to write the vocabulary.")
 add_arg('manifest_paths',   str,
         None,
diff --git a/tools/compute_mean_std.py b/utils/compute_mean_std.py
similarity index 87%
rename from tools/compute_mean_std.py
rename to utils/compute_mean_std.py
index e0245fc5b..80fe88813 100644
--- a/tools/compute_mean_std.py
+++ b/utils/compute_mean_std.py
@@ -15,10 +15,10 @@
 
 import argparse
 import functools
-from data_utils.normalizer import FeatureNormalizer
-from data_utils.augmentor.augmentation import AugmentationPipeline
-from data_utils.featurizer.audio_featurizer import AudioFeaturizer
-from utils.utility import add_arguments, print_arguments
+from deepspeech.frontend.normalizer import FeatureNormalizer
+from deepspeech.frontend.augmentor.augmentation import AugmentationPipeline
+from deepspeech.frontend.featurizer.audio_featurizer import AudioFeaturizer
+from deepspeech.utils.utility import add_arguments, print_arguments
 
 parser = argparse.ArgumentParser(description=__doc__)
 add_arg = functools.partial(add_arguments, argparser=parser)
diff --git a/tools/profile.sh b/utils/profile.sh
similarity index 100%
rename from tools/profile.sh
rename to utils/profile.sh
diff --git a/utils/utility.py b/utils/utility.py
index cd7166593..1d3be04d4 100644
--- a/utils/utility.py
+++ b/utils/utility.py
@@ -11,47 +11,51 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-"""Contains common utility functions."""
 
-import distutils.util
-
-
-def print_arguments(args):
-    """Print argparse's arguments.
-
-    Usage:
-
-    .. code-block:: python
-
-        parser = argparse.ArgumentParser()
-        parser.add_argument("name", default="Jonh", type=str, help="User name.")
-        args = parser.parse_args()
-        print_arguments(args)
-
-    :param args: Input argparse.Namespace for printing.
-    :type args: argparse.Namespace
-    """
-    print("-----------  Configuration Arguments -----------")
-    for arg, value in sorted(vars(args).items()):
-        print("%s: %s" % (arg, value))
-    print("------------------------------------------------")
-
-
-def add_arguments(argname, type, default, help, argparser, **kwargs):
-    """Add argparse's argument.
-
-    Usage:
-
-    .. code-block:: python
-
-        parser = argparse.ArgumentParser()
-        add_argument("name", str, "Jonh", "User name.", parser)
-        args = parser.parse_args()
-    """
-    type = distutils.util.strtobool if type == bool else type
-    argparser.add_argument(
-        "--" + argname,
-        default=default,
-        type=type,
-        help=help + ' Default: %(default)s.',
-        **kwargs)
+import os
+import tarfile
+from paddle.dataset.common import md5file
+
+
+def getfile_insensitive(path):
+    """Get the actual file path when given insensitive filename."""
+    directory, filename = os.path.split(path)
+    directory, filename = (directory or '.'), filename.lower()
+    for f in os.listdir(directory):
+        newpath = os.path.join(directory, f)
+        if os.path.isfile(newpath) and f.lower() == filename:
+            return newpath
+
+
+def download_multi(url, target_dir, extra_args):
+    """Download multiple files from url to target_dir."""
+    if not os.path.exists(target_dir): os.makedirs(target_dir)
+    print("Downloading %s ..." % url)
+    ret_code = os.system("wget -c " + url + ' ' + extra_args + " -P " +
+                         target_dir)
+    return ret_code
+
+
+def download(url, md5sum, target_dir):
+    """Download file from url to target_dir, and check md5sum."""
+    if not os.path.exists(target_dir): os.makedirs(target_dir)
+    filepath = os.path.join(target_dir, url.split("/")[-1])
+    if not (os.path.exists(filepath) and md5file(filepath) == md5sum):
+        print("Downloading %s ..." % url)
+        os.system("wget -c " + url + " -P " + target_dir)
+        print("\nMD5 Chesksum %s ..." % filepath)
+        if not md5file(filepath) == md5sum:
+            raise RuntimeError("MD5 checksum failed.")
+    else:
+        print("File exists, skip downloading. (%s)" % filepath)
+    return filepath
+
+
+def unpack(filepath, target_dir, rm_tar=False):
+    """Unpack the file to the target_dir."""
+    print("Unpacking %s ..." % filepath)
+    tar = tarfile.open(filepath)
+    tar.extractall(target_dir)
+    tar.close()
+    if rm_tar == True:
+        os.remove(filepath)