refactor egs

5 years ago · 123d7a6f3f
parent ba7cf0782e
commit 123d7a6f3f
34 changed files with 308 additions and 404 deletions
--- a/data/librispeech/librispeech.py
+++ b/data/librispeech/librispeech.py
@ -1,159 +0,0 @@
 # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 """Prepare Librispeech ASR datasets.
 Download, unpack and create manifest files.
 Manifest file is a json-format file with each line containing the
 meta data (i.e. audio filepath, transcript and audio duration)
 of each audio file in the data set.
 """
 import distutils.util
 import os
 import sys
 import argparse
 import soundfile
 import json
 import codecs
 import io
 from data_utils.utility import download, unpack
 URL_ROOT = "http://www.openslr.org/resources/12"
 URL_ROOT = "https://openslr.magicdatatech.com/resources/12"
 URL_TEST_CLEAN = URL_ROOT + "/test-clean.tar.gz"
 URL_TEST_OTHER = URL_ROOT + "/test-other.tar.gz"
 URL_DEV_CLEAN = URL_ROOT + "/dev-clean.tar.gz"
 URL_DEV_OTHER = URL_ROOT + "/dev-other.tar.gz"
 URL_TRAIN_CLEAN_100 = URL_ROOT + "/train-clean-100.tar.gz"
 URL_TRAIN_CLEAN_360 = URL_ROOT + "/train-clean-360.tar.gz"
 URL_TRAIN_OTHER_500 = URL_ROOT + "/train-other-500.tar.gz"
 MD5_TEST_CLEAN = "32fa31d27d2e1cad72775fee3f4849a9"
 MD5_TEST_OTHER = "fb5a50374b501bb3bac4815ee91d3135"
 MD5_DEV_CLEAN = "42e2234ba48799c1f50f24a7926300a1"
 MD5_DEV_OTHER = "c8d0bcc9cca99d4f8b62fcc847357931"
 MD5_TRAIN_CLEAN_100 = "2a93770f6d5c6c964bc36631d331a522"
 MD5_TRAIN_CLEAN_360 = "c0e676e450a7ff2f54aeade5171606fa"
 MD5_TRAIN_OTHER_500 = "d1a0fd59409feb2c614ce4d30c387708"
 parser = argparse.ArgumentParser(description=__doc__)
 parser.add_argument(
    "--target_dir",
    default='~/.cache/paddle/dataset/speech/libri',
    type=str,
    help="Directory to save the dataset. (default: %(default)s)")
 parser.add_argument(
    "--manifest_prefix",
    default="manifest",
    type=str,
    help="Filepath prefix for output manifests. (default: %(default)s)")
 parser.add_argument(
    "--full_download",
    default="True",
    type=distutils.util.strtobool,
    help="Download all datasets for Librispeech."
    " If False, only download a minimal requirement (test-clean, dev-clean"
    " train-clean-100). (default: %(default)s)")
 args = parser.parse_args()
 def create_manifest(data_dir, manifest_path):
    """Create a manifest json file summarizing the data set, with each line
    containing the meta data (i.e. audio filepath, transcription text, audio
    duration) of each audio file within the data set.
    """
    print("Creating manifest %s ..." % manifest_path)
    json_lines = []
    for subfolder, _, filelist in sorted(os.walk(data_dir)):
        text_filelist = [
            filename for filename in filelist if filename.endswith('trans.txt')
        ]
        if len(text_filelist) > 0:
            text_filepath = os.path.join(subfolder, text_filelist[0])
            for line in io.open(text_filepath, encoding="utf8"):
                segments = line.strip().split()
                text = ' '.join(segments[1:]).lower()
                audio_filepath = os.path.join(subfolder, segments[0] + '.flac')
                audio_data, samplerate = soundfile.read(audio_filepath)
                duration = float(len(audio_data)) / samplerate
                json_lines.append(
                    json.dumps({
                        'audio_filepath': audio_filepath,
                        'duration': duration,
                        'text': text
                    }))
    with codecs.open(manifest_path, 'w', 'utf-8') as out_file:
        for line in json_lines:
            out_file.write(line + '\n')
 def prepare_dataset(url, md5sum, target_dir, manifest_path):
    """Download, unpack and create summmary manifest file.
    """
    if not os.path.exists(os.path.join(target_dir, "LibriSpeech")):
        # download
        filepath = download(url, md5sum, target_dir)
        # unpack
        unpack(filepath, target_dir)
    else:
        print("Skip downloading and unpacking. Data already exists in %s." %
              target_dir)
    # create manifest json file
    create_manifest(target_dir, manifest_path)
 def main():
    if args.target_dir.startswith('~'):
        args.target_dir = os.path.expanduser(args.target_dir)
    prepare_dataset(
        url=URL_TEST_CLEAN,
        md5sum=MD5_TEST_CLEAN,
        target_dir=os.path.join(args.target_dir, "test-clean"),
        manifest_path=args.manifest_prefix + ".test-clean")
    prepare_dataset(
        url=URL_DEV_CLEAN,
        md5sum=MD5_DEV_CLEAN,
        target_dir=os.path.join(args.target_dir, "dev-clean"),
        manifest_path=args.manifest_prefix + ".dev-clean")
    if args.full_download:
        prepare_dataset(
            url=URL_TRAIN_CLEAN_100,
            md5sum=MD5_TRAIN_CLEAN_100,
            target_dir=os.path.join(args.target_dir, "train-clean-100"),
            manifest_path=args.manifest_prefix + ".train-clean-100")
        prepare_dataset(
            url=URL_TEST_OTHER,
            md5sum=MD5_TEST_OTHER,
            target_dir=os.path.join(args.target_dir, "test-other"),
            manifest_path=args.manifest_prefix + ".test-other")
        prepare_dataset(
            url=URL_DEV_OTHER,
            md5sum=MD5_DEV_OTHER,
            target_dir=os.path.join(args.target_dir, "dev-other"),
            manifest_path=args.manifest_prefix + ".dev-other")
        prepare_dataset(
            url=URL_TRAIN_CLEAN_360,
            md5sum=MD5_TRAIN_CLEAN_360,
            target_dir=os.path.join(args.target_dir, "train-clean-360"),
            manifest_path=args.manifest_prefix + ".train-clean-360")
        prepare_dataset(
            url=URL_TRAIN_OTHER_500,
            md5sum=MD5_TRAIN_OTHER_500,
            target_dir=os.path.join(args.target_dir, "train-other-500"),
            manifest_path=args.manifest_prefix + ".train-other-500")
 if __name__ == '__main__':
    main()
--- a/examples/aishell/local/aishell.py
+++ b/examples/aishell/local/aishell.py
--- a/examples/aishell/local/run_data.sh
+++ b/examples/aishell/local/run_data.sh
@ -1,11 +1,9 @@
 #! /usr/bin/env bash
 cd ../.. > /dev/null
 # download data, generate manifests
-PYTHONPATH=.:$PYTHONPATH python3 data/aishell/aishell.py \
+PYTHONPATH=.:$PYTHONPATH python3 local/aishell.py \
--manifest_prefix='data/aishell/manifest' \
+--manifest_prefix="data/manifest" \
--target_dir='./dataset/aishell'
+--target_dir="./dataset/aishell"
 if [ $? -ne 0 ]; then
    echo "Prepare Aishell failed. Terminated."
@ -14,10 +12,10 @@ fi
 # build vocabulary
-python3 tools/build_vocab.py \
+python3 ${MAIN_ROOT}/tools/build_vocab.py \
 --count_threshold=0 \
--vocab_path='data/aishell/vocab.txt' \
+--vocab_path="data/vocab.txt" \
--manifest_paths 'data/aishell/manifest.train' 'data/aishell/manifest.dev'
+--manifest_paths "data/manifest.train" "data/manifest.dev"
 if [ $? -ne 0 ]; then
    echo "Build vocabulary failed. Terminated."
@ -26,11 +24,11 @@ fi
 # compute mean and stddev for normalizer
-python3 tools/compute_mean_std.py \
+python3 ${MAIN_ROOT}/tools/compute_mean_std.py \
--manifest_path='data/aishell/manifest.train' \
+--manifest_path="data/manifest.train" \
 --num_samples=2000 \
--specgram_type='linear' \
+--specgram_type="linear" \
--output_path='data/aishell/mean_std.npz'
+--output_path="data/mean_std.npz"
 if [ $? -ne 0 ]; then
    echo "Compute mean and stddev failed. Terminated."
--- a/examples/aishell/local/run_infer.sh
+++ b/examples/aishell/local/run_infer.sh
@ -27,14 +27,14 @@ python3 -u infer.py \
 --use_gru=True \
 --use_gpu=True \
 --share_rnn_weights=False \
--infer_manifest='data/aishell/manifest.test' \
+--infer_manifest="data/aishell/manifest.test" \
--mean_std_path='data/aishell/mean_std.npz' \
+--mean_std_path="data/aishell/mean_std.npz" \
--vocab_path='data/aishell/vocab.txt' \
+--vocab_path="data/aishell/vocab.txt" \
--model_path='checkpoints/aishell/step_final' \
+--model_path="checkpoints/aishell/step_final" \
--lang_model_path='models/lm/zh_giga.no_cna_cmn.prune01244.klm' \
+--lang_model_path="models/lm/zh_giga.no_cna_cmn.prune01244.klm" \
--decoding_method='ctc_beam_search' \
+--decoding_method="ctc_beam_search" \
--error_rate_type='cer' \
+--error_rate_type="cer" \
--specgram_type='linear'
+--specgram_type="linear"
 if [ $? -ne 0 ]; then
    echo "Failed in inference!"
--- a/examples/aishell/local/run_infer_golden.sh
+++ b/examples/aishell/local/run_infer_golden.sh
@ -36,14 +36,14 @@ python3 -u infer.py \
 --use_gru=True \
 --use_gpu=False \
 --share_rnn_weights=False \
--infer_manifest='data/aishell/manifest.test' \
+--infer_manifest="data/aishell/manifest.test" \
--mean_std_path='models/aishell/mean_std.npz' \
+--mean_std_path="models/aishell/mean_std.npz" \
--vocab_path='models/aishell/vocab.txt' \
+--vocab_path="models/aishell/vocab.txt" \
--model_path='models/aishell' \
+--model_path="models/aishell" \
--lang_model_path='models/lm/zh_giga.no_cna_cmn.prune01244.klm' \
+--lang_model_path="models/lm/zh_giga.no_cna_cmn.prune01244.klm" \
--decoding_method='ctc_beam_search' \
+--decoding_method="ctc_beam_search" \
--error_rate_type='cer' \
+--error_rate_type="cer" \
--specgram_type='linear'
+--specgram_type="linear"
 if [ $? -ne 0 ]; then
    echo "Failed in inference!"
--- a/examples/aishell/local/run_test.sh
+++ b/examples/aishell/local/run_test.sh
@ -27,14 +27,14 @@ python3 -u test.py \
 --use_gru=True \
 --use_gpu=True \
 --share_rnn_weights=False \
--test_manifest='data/aishell/manifest.test' \
+--test_manifest="data/aishell/manifest.test" \
--mean_std_path='data/aishell/mean_std.npz' \
+--mean_std_path="data/aishell/mean_std.npz" \
--vocab_path='data/aishell/vocab.txt' \
+--vocab_path="data/aishell/vocab.txt" \
--model_path='checkpoints/aishell/step_final' \
+--model_path="checkpoints/aishell/step_final" \
--lang_model_path='models/lm/zh_giga.no_cna_cmn.prune01244.klm' \
+--lang_model_path="models/lm/zh_giga.no_cna_cmn.prune01244.klm" \
--decoding_method='ctc_beam_search' \
+--decoding_method="ctc_beam_search" \
--error_rate_type='cer' \
+--error_rate_type="cer" \
--specgram_type='linear'
+--specgram_type="linear"
 if [ $? -ne 0 ]; then
    echo "Failed in evaluation!"
--- a/examples/aishell/local/run_test_golden.sh
+++ b/examples/aishell/local/run_test_golden.sh
@ -36,14 +36,14 @@ python3 -u test.py \
 --use_gru=True \
 --use_gpu=True \
 --share_rnn_weights=False \
--test_manifest='data/aishell/manifest.test' \
+--test_manifest="data/aishell/manifest.test" \
--mean_std_path='models/aishell/mean_std.npz' \
+--mean_std_path="models/aishell/mean_std.npz" \
--vocab_path='models/aishell/vocab.txt' \
+--vocab_path="models/aishell/vocab.txt" \
--model_path='models/aishell' \
+--model_path="models/aishell" \
--lang_model_path='models/lm/zh_giga.no_cna_cmn.prune01244.klm' \
+--lang_model_path="models/lm/zh_giga.no_cna_cmn.prune01244.klm" \
--decoding_method='ctc_beam_search' \
+--decoding_method="ctc_beam_search" \
--error_rate_type='cer' \
+--error_rate_type="cer" \
--specgram_type='linear'
+--specgram_type="linear"
 if [ $? -ne 0 ]; then
    echo "Failed in evaluation!"
--- a/examples/aishell/local/run_train.sh
+++ b/examples/aishell/local/run_train.sh
@ -24,14 +24,14 @@ python3 -u train.py \
 --use_gpu=True \
 --is_local=True \
 --share_rnn_weights=False \
--train_manifest='data/aishell/manifest.train' \
+--train_manifest="data/aishell/manifest.train" \
--dev_manifest='data/aishell/manifest.dev' \
+--dev_manifest="data/aishell/manifest.dev" \
--mean_std_path='data/aishell/mean_std.npz' \
+--mean_std_path="data/aishell/mean_std.npz" \
--vocab_path='data/aishell/vocab.txt' \
+--vocab_path="data/aishell/vocab.txt" \
--output_model_dir='./checkpoints/aishell' \
+--output_model_dir="./checkpoints/aishell" \
--augment_conf_path='conf/augmentation.config' \
+--augment_conf_path="conf/augmentation.config" \
--specgram_type='linear' \
+--specgram_type="linear" \
--shuffle_method='batch_shuffle_clipped' \
+--shuffle_method="batch_shuffle_clipped" \
 if [ $? -ne 0 ]; then
    echo "Failed in training!"
--- a/examples/aishell/path.sh
+++ b/examples/aishell/path.sh
@ -0,0 +1,8 @@
 export MAIN_ROOT=${PWD}/../../
 export PATH=${MAIN_ROOT}:${PWD}/tools:${PATH}
 export LC_ALL=C
 # Use UTF-8 in Python to avoid UnicodeDecodeError when LC_ALL=C
 export PYTHONIOENCODING=UTF-8 
 export PYTHONPATH=${MAIN_ROOT}:${PYTHONPATH}
--- a/examples/aishell/run.sh
+++ b/examples/aishell/run.sh
@ -0,0 +1,24 @@
 #!/bin/bash
 source path.sh
 # prepare data
 bash ./local/run_data.sh
 # test pretrain model
 bash ./local/run_test_golden.sh
 # test pretain model
 bash ./local/run_infer_golden.sh
 # train model
 bash ./local/run_train.sh
 # test model
 bash ./local/run_test.sh
 # infer model
 bash ./local/run_infer.sh
 # tune model
 bash ./local/run_tune.sh
--- a/examples/baidu_en8k/path.sh
+++ b/examples/baidu_en8k/path.sh
@ -0,0 +1,8 @@
 export MAIN_ROOT=${PWD}/../../
 export PATH=${MAIN_ROOT}:${PWD}/tools:${PATH}
 export LC_ALL=C
 # Use UTF-8 in Python to avoid UnicodeDecodeError when LC_ALL=C
 export PYTHONIOENCODING=UTF-8 
 export PYTHONPATH=${MAIN_ROOT}:${PYTHONPATH}
--- a/examples/baidu_en8k/run_infer_golden.sh
+++ b/examples/baidu_en8k/run_infer_golden.sh
@ -1,9 +1,9 @@
 #! /usr/bin/env bash
-cd ../.. > /dev/null
+source path.sh
 # download language model
-cd models/lm > /dev/null
+cd ${MAIN_ROOT}/models/lm > /dev/null
 bash download_lm_en.sh
 if [ $? -ne 0 ]; then
    exit 1
@ -12,7 +12,7 @@ cd - > /dev/null
 # download well-trained model
-cd models/baidu_en8k > /dev/null
+cd ${MAIN_ROOT}/models/baidu_en8k > /dev/null
 bash download_model.sh
 if [ $? -ne 0 ]; then
    exit 1
@ -22,7 +22,7 @@ cd - > /dev/null
 # infer
 CUDA_VISIBLE_DEVICES=0 \
-python3 -u infer.py \
+python3 -u ${MAIN_ROOT}/infer.py \
 --num_samples=10 \
 --beam_size=500 \
 --num_proc_bsearch=5 \
@ -36,14 +36,14 @@ python3 -u infer.py \
 --use_gru=True \
 --use_gpu=False \
 --share_rnn_weights=False \
--infer_manifest='data/librispeech/manifest.test-clean' \
+--infer_manifest="${MAIN_ROOT}/examples/librispeech/data/manifest.test-clean" \
--mean_std_path='models/baidu_en8k/mean_std.npz' \
+--mean_std_path="${MAIN_ROOT}/models/baidu_en8k/mean_std.npz" \
--vocab_path='models/baidu_en8k/vocab.txt' \
+--vocab_path="${MAIN_ROOT}/models/baidu_en8k/vocab.txt" \
--model_path='models/baidu_en8k' \
+--model_path="${MAIN_ROOT}/models/baidu_en8k" \
--lang_model_path='models/lm/common_crawl_00.prune01111.trie.klm' \
+--lang_model_path="${MAIN_ROOT}/models/lm/common_crawl_00.prune01111.trie.klm" \
--decoding_method='ctc_beam_search' \
+--decoding_method="ctc_beam_search" \
--error_rate_type='wer' \
+--error_rate_type="wer" \
--specgram_type='linear'
+--specgram_type="linear"
 if [ $? -ne 0 ]; then
    echo "Failed in inference!"
--- a/examples/baidu_en8k/run_test_golden.sh
+++ b/examples/baidu_en8k/run_test_golden.sh
@ -37,14 +37,14 @@ python3 -u test.py \
 --use_gru=True \
 --use_gpu=False \
 --share_rnn_weights=False \
--test_manifest='data/librispeech/manifest.test-clean' \
+--test_manifest="data/librispeech/manifest.test-clean" \
--mean_std_path='models/baidu_en8k/mean_std.npz' \
+--mean_std_path="models/baidu_en8k/mean_std.npz" \
--vocab_path='models/baidu_en8k/vocab.txt' \
+--vocab_path="models/baidu_en8k/vocab.txt" \
--model_path='models/baidu_en8k' \
+--model_path="models/baidu_en8k" \
--lang_model_path='models/lm/common_crawl_00.prune01111.trie.klm' \
+--lang_model_path="models/lm/common_crawl_00.prune01111.trie.klm" \
--decoding_method='ctc_beam_search' \
+--decoding_method="ctc_beam_search" \
--error_rate_type='wer' \
+--error_rate_type="wer" \
--specgram_type='linear'
+--specgram_type="linear"
 if [ $? -ne 0 ]; then
    echo "Failed in evaluation!"
--- a/examples/deploy_demo/path.sh
+++ b/examples/deploy_demo/path.sh
@ -0,0 +1,8 @@
 export MAIN_ROOT=${PWD}/../../
 export PATH=${MAIN_ROOT}:${PWD}/tools:${PATH}
 export LC_ALL=C
 # Use UTF-8 in Python to avoid UnicodeDecodeError when LC_ALL=C
 export PYTHONIOENCODING=UTF-8 
 export PYTHONPATH=${MAIN_ROOT}:${PYTHONPATH}
--- a/examples/deploy_demo/run_demo_client.sh
+++ b/examples/deploy_demo/run_demo_client.sh
@ -1,11 +1,11 @@
 #! /usr/bin/env bash
-cd ../.. > /dev/null
+source path.sh
 # start demo client
 CUDA_VISIBLE_DEVICES=0 \
-python3 -u deploy/demo_client.py \
+python3 -u ${MAIN_ROOT}/deploy/demo_client.py \
--host_ip='localhost' \
+--host_ip="localhost" \
 --host_port=8086 \
 if [ $? -ne 0 ]; then
--- a/examples/deploy_demo/run_english_demo_server.sh
+++ b/examples/deploy_demo/run_english_demo_server.sh
@ -1,10 +1,10 @@
 #! /usr/bin/env bash
 # TODO: replace the model with a mandarin model
-cd ../.. > /dev/null
+source path.sh
 # download language model
-cd models/lm > /dev/null
+cd ${MAIN_ROOT}/models/lm > /dev/null
 bash download_lm_en.sh
 if [ $? -ne 0 ]; then
    exit 1
@ -13,7 +13,7 @@ cd - > /dev/null
 # download well-trained model
-cd models/baidu_en8k > /dev/null
+cd ${MAIN_ROOT}/models/baidu_en8k > /dev/null
 bash download_model.sh
 if [ $? -ne 0 ]; then
    exit 1
@ -23,8 +23,8 @@ cd - > /dev/null
 # start demo server
 CUDA_VISIBLE_DEVICES=0 \
-python3 -u deploy/demo_server.py \
+python3 -u ${MAIN_ROOT}/deploy/demo_server.py \
--host_ip='localhost' \
+--host_ip="localhost" \
 --host_port=8086 \
 --num_conv_layers=2 \
 --num_rnn_layers=3 \
@ -36,14 +36,14 @@ python3 -u deploy/demo_server.py \
 --use_gru=True \
 --use_gpu=True \
 --share_rnn_weights=False \
--speech_save_dir='demo_cache' \
+--speech_save_dir="demo_cache" \
--warmup_manifest='data/tiny/manifest.test-clean' \
+--warmup_manifest="${MAIN_ROOT}/examples/tiny/data/manifest.test-clean" \
--mean_std_path='models/baidu_en8k/mean_std.npz' \
+--mean_std_path="${MAIN_ROOT}/models/baidu_en8k/mean_std.npz" \
--vocab_path='models/baidu_en8k/vocab.txt' \
+--vocab_path="${MAIN_ROOT}/models/baidu_en8k/vocab.txt" \
--model_path='models/baidu_en8k' \
+--model_path="${MAIN_ROOT}/models/baidu_en8k" \
--lang_model_path='models/lm/common_crawl_00.prune01111.trie.klm' \
+--lang_model_path="${MAIN_ROOT}/models/lm/common_crawl_00.prune01111.trie.klm" \
--decoding_method='ctc_beam_search' \
+--decoding_method="ctc_beam_search" \
--specgram_type='linear'
+--specgram_type="linear"
 if [ $? -ne 0 ]; then
    echo "Failed in starting demo server!"
--- a/examples/librispeech/local/run_data.sh
+++ b/examples/librispeech/local/run_data.sh
@ -0,0 +1,43 @@
 #! /usr/bin/env bash
 # download data, generate manifests
 PYTHONPATH=.:$PYTHONPATH python3 local/librispeech.py \
 --manifest_prefix="data/manifest" \
 --target_dir="./dataset/librispeech" \
 --full_download="True"
 if [ $? -ne 0 ]; then
    echo "Prepare LibriSpeech failed. Terminated."
    exit 1
 fi
 cat data/manifest.train-* | shuf > data/manifest.train
 # build vocabulary
 python3 ${MAIN_ROOT}/tools/build_vocab.py \
 --count_threshold=0 \
 --vocab_path="data/vocab.txt" \
 --manifest_paths="data/manifest.train"
 if [ $? -ne 0 ]; then
    echo "Build vocabulary failed. Terminated."
    exit 1
 fi
 # compute mean and stddev for normalizer
 python3 ${MAIN_ROOT}/tools/compute_mean_std.py \
 --manifest_path="data/manifest.train" \
 --num_samples=2000 \
 --specgram_type="linear" \
 --output_path="data/mean_std.npz"
 if [ $? -ne 0 ]; then
    echo "Compute mean and stddev failed. Terminated."
    exit 1
 fi
 echo "LibriSpeech Data preparation done."
 exit 0
--- a/examples/librispeech/local/run_infer.sh
+++ b/examples/librispeech/local/run_infer.sh
@ -1,9 +1,7 @@
 #! /usr/bin/env bash
 cd ../.. > /dev/null
 # download language model
-cd models/lm > /dev/null
+cd ${MAIN_ROOT}/models/lm > /dev/null
 bash download_lm_en.sh
 if [ $? -ne 0 ]; then
    exit 1
@ -13,7 +11,7 @@ cd - > /dev/null
 # infer
 CUDA_VISIBLE_DEVICES=0 \
-python3 -u infer.py \
+python3 -u ${MAIN_ROOT}/infer.py \
 --num_samples=10 \
 --beam_size=500 \
 --num_proc_bsearch=8 \
@ -27,14 +25,14 @@ python3 -u infer.py \
 --use_gru=False \
 --use_gpu=True \
 --share_rnn_weights=True \
--infer_manifest='data/librispeech/manifest.test-clean' \
+--infer_manifest="data/manifest.test-clean" \
--mean_std_path='data/librispeech/mean_std.npz' \
+--mean_std_path="data/mean_std.npz" \
--vocab_path='data/librispeech/vocab.txt' \
+--vocab_path="data/vocab.txt" \
--model_path='checkpoints/libri/step_final' \
+--model_path="checkpoints/step_final" \
--lang_model_path='models/lm/common_crawl_00.prune01111.trie.klm' \
+--lang_model_path="${MAIN_ROOT}/models/lm/common_crawl_00.prune01111.trie.klm" \
--decoding_method='ctc_beam_search' \
+--decoding_method="ctc_beam_search" \
--error_rate_type='wer' \
+--error_rate_type="wer" \
--specgram_type='linear'
+--specgram_type="linear"
 if [ $? -ne 0 ]; then
    echo "Failed in inference!"
--- a/examples/librispeech/local/run_infer_golden.sh
+++ b/examples/librispeech/local/run_infer_golden.sh
@ -1,9 +1,7 @@
 #! /usr/bin/env bash
 cd ../.. > /dev/null
 # download language model
-cd models/lm > /dev/null
+cd ${MAIN_ROOT}/models/lm > /dev/null
 bash download_lm_en.sh
 if [ $? -ne 0 ]; then
    exit 1
@ -12,7 +10,7 @@ cd - > /dev/null
 # download well-trained model
-cd models/librispeech > /dev/null
+cd ${MAIN_ROOT}/models/librispeech > /dev/null
 bash download_model.sh
 if [ $? -ne 0 ]; then
    exit 1
@ -22,7 +20,7 @@ cd - > /dev/null
 # infer
 CUDA_VISIBLE_DEVICES=0 \
-python3 -u infer.py \
+python3 -u ${MAIN_ROOT}/infer.py \
 --num_samples=10 \
 --beam_size=500 \
 --num_proc_bsearch=8 \
@ -36,14 +34,14 @@ python3 -u infer.py \
 --use_gru=False \
 --use_gpu=True \
 --share_rnn_weights=True \
--infer_manifest='data/librispeech/manifest.test-clean' \
+--infer_manifest="data/manifest.test-clean" \
--mean_std_path='models/librispeech/mean_std.npz' \
+--mean_std_path="${MAIN_ROOT}/models/librispeech/mean_std.npz" \
--vocab_path='models/librispeech/vocab.txt' \
+--vocab_path="${MAIN_ROOT}/models/librispeech/vocab.txt" \
--model_path='models/librispeech' \
+--model_path="${MAIN_ROOT}/models/librispeech" \
--lang_model_path='models/lm/common_crawl_00.prune01111.trie.klm' \
+--lang_model_path="${MAIN_ROOT}/models/lm/common_crawl_00.prune01111.trie.klm" \
--decoding_method='ctc_beam_search' \
+--decoding_method="ctc_beam_search" \
--error_rate_type='wer' \
+--error_rate_type="wer" \
--specgram_type='linear'
+--specgram_type="linear"
 if [ $? -ne 0 ]; then
    echo "Failed in inference!"
--- a/examples/librispeech/local/run_test.sh
+++ b/examples/librispeech/local/run_test.sh
@ -1,9 +1,7 @@
 #! /usr/bin/env bash
 cd ../.. > /dev/null
 # download language model
-cd models/lm > /dev/null
+cd ${MAIN_ROOT}/models/lm > /dev/null
 bash download_lm_en.sh
 if [ $? -ne 0 ]; then
    exit 1
@ -13,7 +11,7 @@ cd - > /dev/null
 # evaluate model
 CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 \
-python3 -u test.py \
+python3 -u ${MAIN_ROOT}/test.py \
 --batch_size=128 \
 --beam_size=500 \
 --num_proc_bsearch=8 \
@ -27,14 +25,14 @@ python3 -u test.py \
 --use_gru=False \
 --use_gpu=True \
 --share_rnn_weights=True \
--test_manifest='data/librispeech/manifest.test-clean' \
+--test_manifest="data/manifest.test-clean" \
--mean_std_path='data/librispeech/mean_std.npz' \
+--mean_std_path="data/mean_std.npz" \
--vocab_path='data/librispeech/vocab.txt' \
+--vocab_path="data/vocab.txt" \
--model_path='checkpoints/libri/step_final' \
+--model_path="checkpoints/step_final" \
--lang_model_path='models/lm/common_crawl_00.prune01111.trie.klm' \
+--lang_model_path="${MAIN_ROOT}/models/lm/common_crawl_00.prune01111.trie.klm" \
--decoding_method='ctc_beam_search' \
+--decoding_method="ctc_beam_search" \
--error_rate_type='wer' \
+--error_rate_type="wer" \
--specgram_type='linear'
+--specgram_type="linear"
 if [ $? -ne 0 ]; then
    echo "Failed in evaluation!"
--- a/examples/librispeech/local/run_test_golden.sh
+++ b/examples/librispeech/local/run_test_golden.sh
@ -1,9 +1,7 @@
 #! /usr/bin/env bash
 cd ../.. > /dev/null
 # download language model
-cd models/lm > /dev/null
+cd ${MAIN_ROOT}/models/lm > /dev/null
 bash download_lm_en.sh
 if [ $? -ne 0 ]; then
    exit 1
@ -12,7 +10,7 @@ cd - > /dev/null
 # download well-trained model
-cd models/librispeech > /dev/null
+cd ${MAIN_ROOT}/models/librispeech > /dev/null
 bash download_model.sh
 if [ $? -ne 0 ]; then
    exit 1
@ -22,7 +20,7 @@ cd - > /dev/null
 # evaluate model
 CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 \
-python3 -u test.py \
+python3 -u ${MAIN_ROOT}/test.py \
 --batch_size=128 \
 --beam_size=500 \
 --num_proc_bsearch=8 \
@ -36,14 +34,14 @@ python3 -u test.py \
 --use_gru=False \
 --use_gpu=True \
 --share_rnn_weights=True \
--test_manifest='data/librispeech/manifest.test-clean' \
+--test_manifest="data/manifest.test-clean" \
--mean_std_path='models/librispeech/mean_std.npz' \
+--mean_std_path="${MAIN_ROOT}/models/librispeech/mean_std.npz" \
--vocab_path='models/librispeech/vocab.txt' \
+--vocab_path="${MAIN_ROOT}/models/librispeech/vocab.txt" \
--model_path='models/librispeech' \
+--model_path="${MAIN_ROOT}/models/librispeech" \
--lang_model_path='models/lm/common_crawl_00.prune01111.trie.klm' \
+--lang_model_path="${MAIN_ROOT}/models/lm/common_crawl_00.prune01111.trie.klm" \
--decoding_method='ctc_beam_search' \
+--decoding_method="ctc_beam_search" \
--error_rate_type='wer' \
+--error_rate_type="wer" \
--specgram_type='linear'
+--specgram_type="linear"
 if [ $? -ne 0 ]; then
    echo "Failed in evaluation!"
--- a/examples/librispeech/local/run_train.sh
+++ b/examples/librispeech/local/run_train.sh
@ -1,13 +1,11 @@
 #! /usr/bin/env bash
 cd ../.. > /dev/null
 # train model
 # if you wish to resume from an exists model, uncomment --init_from_pretrained_model
 export FLAGS_sync_nccl_allreduce=0
 CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 \
-python3 -u train.py \
+python3 -u ${MAIN_ROOT}/train.py \
 --batch_size=20 \
 --num_epoch=50 \
 --num_conv_layers=2 \
@ -25,14 +23,14 @@ python3 -u train.py \
 --use_gpu=True \
 --is_local=True \
 --share_rnn_weights=True \
--train_manifest='data/librispeech/manifest.train' \
+--train_manifest="data/manifest.train" \
--dev_manifest='data/librispeech/manifest.dev-clean' \
+--dev_manifest="data/manifest.dev-clean" \
--mean_std_path='data/librispeech/mean_std.npz' \
+--mean_std_path="data/mean_std.npz" \
--vocab_path='data/librispeech/vocab.txt' \
+--vocab_path="data/vocab.txt" \
--output_model_dir='./checkpoints/libri' \
+--output_model_dir="./checkpoints/libri" \
--augment_conf_path='conf/augmentation.config' \
+--augment_conf_path="${MAIN_ROOT}/conf/augmentation.config" \
--specgram_type='linear' \
+--specgram_type="linear" \
--shuffle_method='batch_shuffle_clipped' \
+--shuffle_method="batch_shuffle_clipped" \
 if [ $? -ne 0 ]; then
    echo "Failed in training!"
--- a/examples/librispeech/local/run_tune.sh
+++ b/examples/librispeech/local/run_tune.sh
@ -1,7 +1,5 @@
 #! /usr/bin/env bash
 cd ../.. > /dev/null
 # grid-search for hyper-parameters in language model
 CUDA_VISIBLE_DEVICES=0,1,2,3 \
 python3 -u tools/tune.py \
@ -23,13 +21,13 @@ python3 -u tools/tune.py \
 --use_gru=False \
 --use_gpu=True \
 --share_rnn_weights=True \
--tune_manifest='data/librispeech/manifest.dev-clean' \
+--tune_manifest="data/manifest.dev-clean" \
--mean_std_path='data/librispeech/mean_std.npz' \
+--mean_std_path="data/mean_std.npz" \
--vocab_path='models/librispeech/vocab.txt' \
+--vocab_path="${MAIN_ROOT}/models/librispeech/vocab.txt" \
--model_path='models/librispeech' \
+--model_path="${MAIN_ROOT}/models/librispeech" \
--lang_model_path='models/lm/common_crawl_00.prune01111.trie.klm' \
+--lang_model_path="${MAIN_ROOT}/models/lm/common_crawl_00.prune01111.trie.klm" \
--error_rate_type='wer' \
+--error_rate_type="wer" \
--specgram_type='linear'
+--specgram_type="linear"
 if [ $? -ne 0 ]; then
    echo "Failed in tuning!"
--- a/examples/librispeech/path.sh
+++ b/examples/librispeech/path.sh
@ -0,0 +1,8 @@
 export MAIN_ROOT=${PWD}/../../
 export PATH=${MAIN_ROOT}:${PWD}/tools:${PATH}
 export LC_ALL=C
 # Use UTF-8 in Python to avoid UnicodeDecodeError when LC_ALL=C
 export PYTHONIOENCODING=UTF-8 
 export PYTHONPATH=${MAIN_ROOT}:${PYTHONPATH}
--- a/examples/librispeech/run.sh
+++ b/examples/librispeech/run.sh
@ -0,0 +1,24 @@
 #!/bin/bash
 source path.sh
 # prepare data
 bash ./local/run_data.sh
 # test pretrain model
 bash ./local/run_test_golden.sh
 # test pretain model
 bash ./local/run_infer_golden.sh
 # train model
 bash ./local/run_train.sh
 # test model
 bash ./local/run_test.sh
 # infer model
 bash ./local/run_infer.sh
 # tune model
 bash ./local/run_tune.sh
--- a/examples/librispeech/run_data.sh
+++ b/examples/librispeech/run_data.sh
@ -1,45 +0,0 @@
 #! /usr/bin/env bash
 cd ../.. > /dev/null
 # download data, generate manifests
 PYTHONPATH=.:$PYTHONPATH python3 data/librispeech/librispeech.py \
 --manifest_prefix='data/librispeech/manifest' \
 --target_dir='./dataset/librispeech' \
 --full_download='True'
 if [ $? -ne 0 ]; then
    echo "Prepare LibriSpeech failed. Terminated."
    exit 1
 fi
 cat data/librispeech/manifest.train-* | shuf > data/librispeech/manifest.train
 # build vocabulary
 python3 tools/build_vocab.py \
 --count_threshold=0 \
 --vocab_path='data/librispeech/vocab.txt' \
 --manifest_paths='data/librispeech/manifest.train'
 if [ $? -ne 0 ]; then
    echo "Build vocabulary failed. Terminated."
    exit 1
 fi
 # compute mean and stddev for normalizer
 python3 tools/compute_mean_std.py \
 --manifest_path='data/librispeech/manifest.train' \
 --num_samples=2000 \
 --specgram_type='linear' \
 --output_path='data/librispeech/mean_std.npz'
 if [ $? -ne 0 ]; then
    echo "Compute mean and stddev failed. Terminated."
    exit 1
 fi
 echo "LibriSpeech Data preparation done."
 exit 0
--- a/examples/tiny/local/run_data.sh
+++ b/examples/tiny/local/run_data.sh
@ -7,9 +7,9 @@ fi
 # download data, generate manifests
 PYTHONPATH=.:$PYTHONPATH python3 ../librispeech/local/librispeech.py \
--manifest_prefix='data/manifest' \
+--manifest_prefix="data/manifest" \
 --target_dir="${MAIN_ROOT}/dataset/librispeech" \
--full_download='False'
+--full_download="False"
 if [ $? -ne 0 ]; then
    echo "Prepare LibriSpeech failed. Terminated."
@ -21,8 +21,8 @@ head -n 64 data/manifest.dev-clean  > data/manifest.tiny
 # build vocabulary
 python3 ${MAIN_ROOT}/tools/build_vocab.py \
 --count_threshold=0 \
--vocab_path='data/vocab.txt' \
+--vocab_path="data/vocab.txt" \
--manifest_paths='data/manifest.tiny'
+--manifest_paths="data/manifest.tiny"
 if [ $? -ne 0 ]; then
    echo "Build vocabulary failed. Terminated."
@ -32,10 +32,10 @@ fi
 # compute mean and stddev for normalizer
 python3 ${MAIN_ROOT}/tools/compute_mean_std.py \
--manifest_path='data/manifest.tiny' \
+--manifest_path="data/manifest.tiny" \
 --num_samples=64 \
--specgram_type='linear' \
+--specgram_type="linear" \
--output_path='data/mean_std.npz'
+--output_path="data/mean_std.npz"
 if [ $? -ne 0 ]; then
    echo "Compute mean and stddev failed. Terminated."
--- a/examples/tiny/local/run_infer.sh
+++ b/examples/tiny/local/run_infer.sh
@ -25,14 +25,14 @@ python3 -u $MAIN_ROOT/infer.py \
 --use_gru=False \
 --use_gpu=True \
 --share_rnn_weights=True \
--infer_manifest='data/manifest.test-clean' \
+--infer_manifest="data/manifest.test-clean" \
--mean_std_path='data/mean_std.npz' \
+--mean_std_path="data/mean_std.npz" \
--vocab_path='data/vocab.txt' \
+--vocab_path="data/vocab.txt" \
--model_path='checkpoints/step_final' \
+--model_path="checkpoints/step_final" \
 --lang_model_path="$MAIN_ROOT/models/lm/common_crawl_00.prune01111.trie.klm" \
--decoding_method='ctc_beam_search' \
+--decoding_method="ctc_beam_search" \
--error_rate_type='wer' \
+--error_rate_type="wer" \
--specgram_type='linear'
+--specgram_type="linear"
 if [ $? -ne 0 ]; then
    echo "Failed in inference!"
--- a/examples/tiny/local/run_infer_golden.sh
+++ b/examples/tiny/local/run_infer_golden.sh
@ -34,14 +34,14 @@ python3 -u ${MAIN_ROOT}/infer.py \
 --use_gru=False \
 --use_gpu=True \
 --share_rnn_weights=True \
--infer_manifest='data/manifest.test-clean' \
+--infer_manifest="data/manifest.test-clean" \
 --mean_std_path="${MAIN_ROOT}/models/librispeech/mean_std.npz" \
 --vocab_path="${MAIN_ROOT}/models/librispeech/vocab.txt" \
 --model_path="${MAIN_ROOT}/models/librispeech" \
 --lang_model_path="${MAIN_ROOT}/models/lm/common_crawl_00.prune01111.trie.klm" \
--decoding_method='ctc_beam_search' \
+--decoding_method="ctc_beam_search" \
--error_rate_type='wer' \
+--error_rate_type="wer" \
--specgram_type='linear'
+--specgram_type="linear"
 if [ $? -ne 0 ]; then
    echo "Failed in inference!"
--- a/examples/tiny/local/run_test.sh
+++ b/examples/tiny/local/run_test.sh
@ -25,14 +25,14 @@ python3 -u $MAIN_ROOT/test.py \
 --use_gru=False \
 --use_gpu=True \
 --share_rnn_weights=True \
--test_manifest='data/manifest.test-clean' \
+--test_manifest="data/manifest.test-clean" \
--mean_std_path='data/mean_std.npz' \
+--mean_std_path="data/mean_std.npz" \
--vocab_path='data/vocab.txt' \
+--vocab_path="data/vocab.txt" \
--model_path='checkpoints/step_final' \
+--model_path="checkpoints/step_final" \
 --lang_model_path="$MAIN_ROOT/models/lm/common_crawl_00.prune01111.trie.klm" \
--decoding_method='ctc_beam_search' \
+--decoding_method="ctc_beam_search" \
--error_rate_type='wer' \
+--error_rate_type="wer" \
--specgram_type='linear'
+--specgram_type="linear"
 if [ $? -ne 0 ]; then
    echo "Failed in evaluation!"
--- a/examples/tiny/local/run_test_golden.sh
+++ b/examples/tiny/local/run_test_golden.sh
@ -34,14 +34,14 @@ python3 -u $MAIN_ROOT/test.py \
 --use_gru=False \
 --use_gpu=True \
 --share_rnn_weights=True \
--test_manifest='data/manifest.test-clean' \
+--test_manifest="data/manifest.test-clean" \
 --mean_std_path="$MAIN_ROOT/models/librispeech/mean_std.npz" \
 --vocab_path="$MAIN_ROOT/models/librispeech/vocab.txt" \
 --model_path="$MAIN_ROOT/models/librispeech" \
 --lang_model_path="$MAIN_ROOT/models/lm/common_crawl_00.prune01111.trie.klm" \
--decoding_method='ctc_beam_search' \
+--decoding_method="ctc_beam_search" \
--error_rate_type='wer' \
+--error_rate_type="wer" \
--specgram_type='linear'
+--specgram_type="linear"
 if [ $? -ne 0 ]; then
    echo "Failed in evaluation!"
--- a/examples/tiny/local/run_train.sh
+++ b/examples/tiny/local/run_train.sh
@ -22,14 +22,14 @@ python3 -u ${MAIN_ROOT}/train.py \
 --use_gpu=True \
 --is_local=True \
 --share_rnn_weights=True \
--train_manifest='data/manifest.tiny' \
+--train_manifest="data/manifest.tiny" \
--dev_manifest='data/manifest.tiny' \
+--dev_manifest="data/manifest.tiny" \
--mean_std_path='data/mean_std.npz' \
+--mean_std_path="data/mean_std.npz" \
--vocab_path='data/vocab.txt' \
+--vocab_path="data/vocab.txt" \
--output_model_dir='./checkpoints/' \
+--output_model_dir="./checkpoints/" \
 --augment_conf_path="${MAIN_ROOT}/conf/augmentation.config" \
--specgram_type='linear' \
+--specgram_type="linear" \
--shuffle_method='batch_shuffle_clipped' \
+--shuffle_method="batch_shuffle_clipped" \
 if [ $? -ne 0 ]; then
    echo "Failed in training!"
--- a/examples/tiny/local/run_tune.sh
+++ b/examples/tiny/local/run_tune.sh
@ -21,13 +21,13 @@ python3 -u $MAIN_ROOT/tools/tune.py \
 --use_gru=False \
 --use_gpu=True \
 --share_rnn_weights=True \
--tune_manifest='data/manifest.dev-clean' \
+--tune_manifest="data/manifest.dev-clean" \
--mean_std_path='data/mean_std.npz' \
+--mean_std_path="data/mean_std.npz" \
--vocab_path='data/vocab.txt' \
+--vocab_path="data/vocab.txt" \
 --model_path="$MAIN_ROOT/models/librispeech" \
 --lang_model_path="$MAIN_ROOT/models/lm/common_crawl_00.prune01111.trie.klm" \
--error_rate_type='wer' \
+--error_rate_type="wer" \
--specgram_type='linear'
+--specgram_type="linear"
 if [ $? -ne 0 ]; then
    echo "Failed in tuning!"
--- a/requirements.txt
+++ b/requirements.txt
@ -2,4 +2,3 @@ scipy==1.2.1
 resampy==0.1.5
 SoundFile==0.9.0.post1
 python_speech_features
 paddlepaddle-gpu==1.8.5