refactor egs

5 years ago · 123d7a6f3f
parent ba7cf0782e
commit 123d7a6f3f
34 changed files with 308 additions and 404 deletions
--- a/data/librispeech/librispeech.py
+++ b/data/librispeech/librispeech.py
@ -1,159 +0,0 @@
-# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""Prepare Librispeech ASR datasets.
-
-Download, unpack and create manifest files.
-Manifest file is a json-format file with each line containing the
-meta data (i.e. audio filepath, transcript and audio duration)
-of each audio file in the data set.
-"""
-
-import distutils.util
-import os
-import sys
-import argparse
-import soundfile
-import json
-import codecs
-import io
-from data_utils.utility import download, unpack
-
-URL_ROOT = "http://www.openslr.org/resources/12"
-URL_ROOT = "https://openslr.magicdatatech.com/resources/12"
-URL_TEST_CLEAN = URL_ROOT + "/test-clean.tar.gz"
-URL_TEST_OTHER = URL_ROOT + "/test-other.tar.gz"
-URL_DEV_CLEAN = URL_ROOT + "/dev-clean.tar.gz"
-URL_DEV_OTHER = URL_ROOT + "/dev-other.tar.gz"
-URL_TRAIN_CLEAN_100 = URL_ROOT + "/train-clean-100.tar.gz"
-URL_TRAIN_CLEAN_360 = URL_ROOT + "/train-clean-360.tar.gz"
-URL_TRAIN_OTHER_500 = URL_ROOT + "/train-other-500.tar.gz"
-
-MD5_TEST_CLEAN = "32fa31d27d2e1cad72775fee3f4849a9"
-MD5_TEST_OTHER = "fb5a50374b501bb3bac4815ee91d3135"
-MD5_DEV_CLEAN = "42e2234ba48799c1f50f24a7926300a1"
-MD5_DEV_OTHER = "c8d0bcc9cca99d4f8b62fcc847357931"
-MD5_TRAIN_CLEAN_100 = "2a93770f6d5c6c964bc36631d331a522"
-MD5_TRAIN_CLEAN_360 = "c0e676e450a7ff2f54aeade5171606fa"
-MD5_TRAIN_OTHER_500 = "d1a0fd59409feb2c614ce4d30c387708"
-
-parser = argparse.ArgumentParser(description=__doc__)
-parser.add_argument(
-    "--target_dir",
-    default='~/.cache/paddle/dataset/speech/libri',
-    type=str,
-    help="Directory to save the dataset. (default: %(default)s)")
-parser.add_argument(
-    "--manifest_prefix",
-    default="manifest",
-    type=str,
-    help="Filepath prefix for output manifests. (default: %(default)s)")
-parser.add_argument(
-    "--full_download",
-    default="True",
-    type=distutils.util.strtobool,
-    help="Download all datasets for Librispeech."
-    " If False, only download a minimal requirement (test-clean, dev-clean"
-    " train-clean-100). (default: %(default)s)")
-args = parser.parse_args()
-
-
-def create_manifest(data_dir, manifest_path):
-    """Create a manifest json file summarizing the data set, with each line
-    containing the meta data (i.e. audio filepath, transcription text, audio
-    duration) of each audio file within the data set.
-    """
-    print("Creating manifest %s ..." % manifest_path)
-    json_lines = []
-    for subfolder, _, filelist in sorted(os.walk(data_dir)):
-        text_filelist = [
-            filename for filename in filelist if filename.endswith('trans.txt')
-        ]
-        if len(text_filelist) > 0:
-            text_filepath = os.path.join(subfolder, text_filelist[0])
-            for line in io.open(text_filepath, encoding="utf8"):
-                segments = line.strip().split()
-                text = ' '.join(segments[1:]).lower()
-                audio_filepath = os.path.join(subfolder, segments[0] + '.flac')
-                audio_data, samplerate = soundfile.read(audio_filepath)
-                duration = float(len(audio_data)) / samplerate
-                json_lines.append(
-                    json.dumps({
-                        'audio_filepath': audio_filepath,
-                        'duration': duration,
-                        'text': text
-                    }))
-    with codecs.open(manifest_path, 'w', 'utf-8') as out_file:
-        for line in json_lines:
-            out_file.write(line + '\n')
-
-
-def prepare_dataset(url, md5sum, target_dir, manifest_path):
-    """Download, unpack and create summmary manifest file.
-    """
-    if not os.path.exists(os.path.join(target_dir, "LibriSpeech")):
-        # download
-        filepath = download(url, md5sum, target_dir)
-        # unpack
-        unpack(filepath, target_dir)
-    else:
-        print("Skip downloading and unpacking. Data already exists in %s." %
-              target_dir)
-    # create manifest json file
-    create_manifest(target_dir, manifest_path)
-
-
-def main():
-    if args.target_dir.startswith('~'):
-        args.target_dir = os.path.expanduser(args.target_dir)
-
-    prepare_dataset(
-        url=URL_TEST_CLEAN,
-        md5sum=MD5_TEST_CLEAN,
-        target_dir=os.path.join(args.target_dir, "test-clean"),
-        manifest_path=args.manifest_prefix + ".test-clean")
-    prepare_dataset(
-        url=URL_DEV_CLEAN,
-        md5sum=MD5_DEV_CLEAN,
-        target_dir=os.path.join(args.target_dir, "dev-clean"),
-        manifest_path=args.manifest_prefix + ".dev-clean")
-    if args.full_download:
-        prepare_dataset(
-            url=URL_TRAIN_CLEAN_100,
-            md5sum=MD5_TRAIN_CLEAN_100,
-            target_dir=os.path.join(args.target_dir, "train-clean-100"),
-            manifest_path=args.manifest_prefix + ".train-clean-100")
-        prepare_dataset(
-            url=URL_TEST_OTHER,
-            md5sum=MD5_TEST_OTHER,
-            target_dir=os.path.join(args.target_dir, "test-other"),
-            manifest_path=args.manifest_prefix + ".test-other")
-        prepare_dataset(
-            url=URL_DEV_OTHER,
-            md5sum=MD5_DEV_OTHER,
-            target_dir=os.path.join(args.target_dir, "dev-other"),
-            manifest_path=args.manifest_prefix + ".dev-other")
-        prepare_dataset(
-            url=URL_TRAIN_CLEAN_360,
-            md5sum=MD5_TRAIN_CLEAN_360,
-            target_dir=os.path.join(args.target_dir, "train-clean-360"),
-            manifest_path=args.manifest_prefix + ".train-clean-360")
-        prepare_dataset(
-            url=URL_TRAIN_OTHER_500,
-            md5sum=MD5_TRAIN_OTHER_500,
-            target_dir=os.path.join(args.target_dir, "train-other-500"),
-            manifest_path=args.manifest_prefix + ".train-other-500")
-
-
-if __name__ == '__main__':
-    main()
--- a/examples/aishell/local/aishell.py
+++ b/examples/aishell/local/aishell.py
--- a/examples/aishell/local/run_data.sh
+++ b/examples/aishell/local/run_data.sh
@ -1,11 +1,9 @@
 #! /usr/bin/env bash

-cd ../.. > /dev/null
-
 # download data, generate manifests
-PYTHONPATH=.:$PYTHONPATH python3 data/aishell/aishell.py \
--manifest_prefix='data/aishell/manifest' \
--target_dir='./dataset/aishell'
+PYTHONPATH=.:$PYTHONPATH python3 local/aishell.py \
+--manifest_prefix="data/manifest" \
+--target_dir="./dataset/aishell"

 if [ $? -ne 0 ]; then
    echo "Prepare Aishell failed. Terminated."
@ -14,10 +12,10 @@ fi


 # build vocabulary
-python3 tools/build_vocab.py \
+python3 ${MAIN_ROOT}/tools/build_vocab.py \
 --count_threshold=0 \
--vocab_path='data/aishell/vocab.txt' \
--manifest_paths 'data/aishell/manifest.train' 'data/aishell/manifest.dev'
+--vocab_path="data/vocab.txt" \
+--manifest_paths "data/manifest.train" "data/manifest.dev"

 if [ $? -ne 0 ]; then
    echo "Build vocabulary failed. Terminated."
@ -26,11 +24,11 @@ fi


 # compute mean and stddev for normalizer
-python3 tools/compute_mean_std.py \
--manifest_path='data/aishell/manifest.train' \
+python3 ${MAIN_ROOT}/tools/compute_mean_std.py \
+--manifest_path="data/manifest.train" \
 --num_samples=2000 \
--specgram_type='linear' \
--output_path='data/aishell/mean_std.npz'
+--specgram_type="linear" \
+--output_path="data/mean_std.npz"

 if [ $? -ne 0 ]; then
    echo "Compute mean and stddev failed. Terminated."
--- a/examples/aishell/local/run_infer.sh
+++ b/examples/aishell/local/run_infer.sh
@ -27,14 +27,14 @@ python3 -u infer.py \
 --use_gru=True \
 --use_gpu=True \
 --share_rnn_weights=False \
--infer_manifest='data/aishell/manifest.test' \
--mean_std_path='data/aishell/mean_std.npz' \
--vocab_path='data/aishell/vocab.txt' \
--model_path='checkpoints/aishell/step_final' \
--lang_model_path='models/lm/zh_giga.no_cna_cmn.prune01244.klm' \
--decoding_method='ctc_beam_search' \
--error_rate_type='cer' \
--specgram_type='linear'
+--infer_manifest="data/aishell/manifest.test" \
+--mean_std_path="data/aishell/mean_std.npz" \
+--vocab_path="data/aishell/vocab.txt" \
+--model_path="checkpoints/aishell/step_final" \
+--lang_model_path="models/lm/zh_giga.no_cna_cmn.prune01244.klm" \
+--decoding_method="ctc_beam_search" \
+--error_rate_type="cer" \
+--specgram_type="linear"

 if [ $? -ne 0 ]; then
    echo "Failed in inference!"
--- a/examples/aishell/local/run_infer_golden.sh
+++ b/examples/aishell/local/run_infer_golden.sh
@ -36,14 +36,14 @@ python3 -u infer.py \
 --use_gru=True \
 --use_gpu=False \
 --share_rnn_weights=False \
--infer_manifest='data/aishell/manifest.test' \
--mean_std_path='models/aishell/mean_std.npz' \
--vocab_path='models/aishell/vocab.txt' \
--model_path='models/aishell' \
--lang_model_path='models/lm/zh_giga.no_cna_cmn.prune01244.klm' \
--decoding_method='ctc_beam_search' \
--error_rate_type='cer' \
--specgram_type='linear'
+--infer_manifest="data/aishell/manifest.test" \
+--mean_std_path="models/aishell/mean_std.npz" \
+--vocab_path="models/aishell/vocab.txt" \
+--model_path="models/aishell" \
+--lang_model_path="models/lm/zh_giga.no_cna_cmn.prune01244.klm" \
+--decoding_method="ctc_beam_search" \
+--error_rate_type="cer" \
+--specgram_type="linear"

 if [ $? -ne 0 ]; then
    echo "Failed in inference!"
--- a/examples/aishell/local/run_test.sh
+++ b/examples/aishell/local/run_test.sh
@ -27,14 +27,14 @@ python3 -u test.py \
 --use_gru=True \
 --use_gpu=True \
 --share_rnn_weights=False \
--test_manifest='data/aishell/manifest.test' \
--mean_std_path='data/aishell/mean_std.npz' \
--vocab_path='data/aishell/vocab.txt' \
--model_path='checkpoints/aishell/step_final' \
--lang_model_path='models/lm/zh_giga.no_cna_cmn.prune01244.klm' \
--decoding_method='ctc_beam_search' \
--error_rate_type='cer' \
--specgram_type='linear'
+--test_manifest="data/aishell/manifest.test" \
+--mean_std_path="data/aishell/mean_std.npz" \
+--vocab_path="data/aishell/vocab.txt" \
+--model_path="checkpoints/aishell/step_final" \
+--lang_model_path="models/lm/zh_giga.no_cna_cmn.prune01244.klm" \
+--decoding_method="ctc_beam_search" \
+--error_rate_type="cer" \
+--specgram_type="linear"

 if [ $? -ne 0 ]; then
    echo "Failed in evaluation!"
--- a/examples/aishell/local/run_test_golden.sh
+++ b/examples/aishell/local/run_test_golden.sh
@ -36,14 +36,14 @@ python3 -u test.py \
 --use_gru=True \
 --use_gpu=True \
 --share_rnn_weights=False \
--test_manifest='data/aishell/manifest.test' \
--mean_std_path='models/aishell/mean_std.npz' \
--vocab_path='models/aishell/vocab.txt' \
--model_path='models/aishell' \
--lang_model_path='models/lm/zh_giga.no_cna_cmn.prune01244.klm' \
--decoding_method='ctc_beam_search' \
--error_rate_type='cer' \
--specgram_type='linear'
+--test_manifest="data/aishell/manifest.test" \
+--mean_std_path="models/aishell/mean_std.npz" \
+--vocab_path="models/aishell/vocab.txt" \
+--model_path="models/aishell" \
+--lang_model_path="models/lm/zh_giga.no_cna_cmn.prune01244.klm" \
+--decoding_method="ctc_beam_search" \
+--error_rate_type="cer" \
+--specgram_type="linear"

 if [ $? -ne 0 ]; then
    echo "Failed in evaluation!"
--- a/examples/aishell/local/run_train.sh
+++ b/examples/aishell/local/run_train.sh
@ -24,14 +24,14 @@ python3 -u train.py \
 --use_gpu=True \
 --is_local=True \
 --share_rnn_weights=False \
--train_manifest='data/aishell/manifest.train' \
--dev_manifest='data/aishell/manifest.dev' \
--mean_std_path='data/aishell/mean_std.npz' \
--vocab_path='data/aishell/vocab.txt' \
--output_model_dir='./checkpoints/aishell' \
--augment_conf_path='conf/augmentation.config' \
--specgram_type='linear' \
--shuffle_method='batch_shuffle_clipped' \
+--train_manifest="data/aishell/manifest.train" \
+--dev_manifest="data/aishell/manifest.dev" \
+--mean_std_path="data/aishell/mean_std.npz" \
+--vocab_path="data/aishell/vocab.txt" \
+--output_model_dir="./checkpoints/aishell" \
+--augment_conf_path="conf/augmentation.config" \
+--specgram_type="linear" \
+--shuffle_method="batch_shuffle_clipped" \

 if [ $? -ne 0 ]; then
    echo "Failed in training!"
--- a/examples/aishell/path.sh
+++ b/examples/aishell/path.sh
@ -0,0 +1,8 @@
+export MAIN_ROOT=${PWD}/../../
+
+export PATH=${MAIN_ROOT}:${PWD}/tools:${PATH}
+export LC_ALL=C
+
+# Use UTF-8 in Python to avoid UnicodeDecodeError when LC_ALL=C
+export PYTHONIOENCODING=UTF-8 
+export PYTHONPATH=${MAIN_ROOT}:${PYTHONPATH}
--- a/examples/aishell/run.sh
+++ b/examples/aishell/run.sh
@ -0,0 +1,24 @@
+#!/bin/bash
+
+source path.sh
+
+# prepare data
+bash ./local/run_data.sh
+
+# test pretrain model
+bash ./local/run_test_golden.sh
+
+# test pretain model
+bash ./local/run_infer_golden.sh
+
+# train model
+bash ./local/run_train.sh
+
+# test model
+bash ./local/run_test.sh
+
+# infer model
+bash ./local/run_infer.sh
+
+# tune model
+bash ./local/run_tune.sh
--- a/examples/baidu_en8k/path.sh
+++ b/examples/baidu_en8k/path.sh
@ -0,0 +1,8 @@
+export MAIN_ROOT=${PWD}/../../
+
+export PATH=${MAIN_ROOT}:${PWD}/tools:${PATH}
+export LC_ALL=C
+
+# Use UTF-8 in Python to avoid UnicodeDecodeError when LC_ALL=C
+export PYTHONIOENCODING=UTF-8 
+export PYTHONPATH=${MAIN_ROOT}:${PYTHONPATH}
--- a/examples/baidu_en8k/run_infer_golden.sh
+++ b/examples/baidu_en8k/run_infer_golden.sh
@ -1,9 +1,9 @@
 #! /usr/bin/env bash

-cd ../.. > /dev/null
+source path.sh

 # download language model
-cd models/lm > /dev/null
+cd ${MAIN_ROOT}/models/lm > /dev/null
 bash download_lm_en.sh
 if [ $? -ne 0 ]; then
    exit 1
@ -12,7 +12,7 @@ cd - > /dev/null


 # download well-trained model
-cd models/baidu_en8k > /dev/null
+cd ${MAIN_ROOT}/models/baidu_en8k > /dev/null
 bash download_model.sh
 if [ $? -ne 0 ]; then
    exit 1
@ -22,7 +22,7 @@ cd - > /dev/null

 # infer
 CUDA_VISIBLE_DEVICES=0 \
-python3 -u infer.py \
+python3 -u ${MAIN_ROOT}/infer.py \
 --num_samples=10 \
 --beam_size=500 \
 --num_proc_bsearch=5 \
@ -36,14 +36,14 @@ python3 -u infer.py \
 --use_gru=True \
 --use_gpu=False \
 --share_rnn_weights=False \
--infer_manifest='data/librispeech/manifest.test-clean' \
--mean_std_path='models/baidu_en8k/mean_std.npz' \
--vocab_path='models/baidu_en8k/vocab.txt' \
--model_path='models/baidu_en8k' \
--lang_model_path='models/lm/common_crawl_00.prune01111.trie.klm' \
--decoding_method='ctc_beam_search' \
--error_rate_type='wer' \
--specgram_type='linear'
+--infer_manifest="${MAIN_ROOT}/examples/librispeech/data/manifest.test-clean" \
+--mean_std_path="${MAIN_ROOT}/models/baidu_en8k/mean_std.npz" \
+--vocab_path="${MAIN_ROOT}/models/baidu_en8k/vocab.txt" \
+--model_path="${MAIN_ROOT}/models/baidu_en8k" \
+--lang_model_path="${MAIN_ROOT}/models/lm/common_crawl_00.prune01111.trie.klm" \
+--decoding_method="ctc_beam_search" \
+--error_rate_type="wer" \
+--specgram_type="linear"

 if [ $? -ne 0 ]; then
    echo "Failed in inference!"
--- a/examples/baidu_en8k/run_test_golden.sh
+++ b/examples/baidu_en8k/run_test_golden.sh
@ -37,14 +37,14 @@ python3 -u test.py \
 --use_gru=True \
 --use_gpu=False \
 --share_rnn_weights=False \
--test_manifest='data/librispeech/manifest.test-clean' \
--mean_std_path='models/baidu_en8k/mean_std.npz' \
--vocab_path='models/baidu_en8k/vocab.txt' \
--model_path='models/baidu_en8k' \
--lang_model_path='models/lm/common_crawl_00.prune01111.trie.klm' \
--decoding_method='ctc_beam_search' \
--error_rate_type='wer' \
--specgram_type='linear'
+--test_manifest="data/librispeech/manifest.test-clean" \
+--mean_std_path="models/baidu_en8k/mean_std.npz" \
+--vocab_path="models/baidu_en8k/vocab.txt" \
+--model_path="models/baidu_en8k" \
+--lang_model_path="models/lm/common_crawl_00.prune01111.trie.klm" \
+--decoding_method="ctc_beam_search" \
+--error_rate_type="wer" \
+--specgram_type="linear"

 if [ $? -ne 0 ]; then
    echo "Failed in evaluation!"
--- a/examples/deploy_demo/path.sh
+++ b/examples/deploy_demo/path.sh
@ -0,0 +1,8 @@
+export MAIN_ROOT=${PWD}/../../
+
+export PATH=${MAIN_ROOT}:${PWD}/tools:${PATH}
+export LC_ALL=C
+
+# Use UTF-8 in Python to avoid UnicodeDecodeError when LC_ALL=C
+export PYTHONIOENCODING=UTF-8 
+export PYTHONPATH=${MAIN_ROOT}:${PYTHONPATH}
--- a/examples/deploy_demo/run_demo_client.sh
+++ b/examples/deploy_demo/run_demo_client.sh
@ -1,11 +1,11 @@
 #! /usr/bin/env bash

-cd ../.. > /dev/null
+source path.sh

 # start demo client
 CUDA_VISIBLE_DEVICES=0 \
-python3 -u deploy/demo_client.py \
--host_ip='localhost' \
+python3 -u ${MAIN_ROOT}/deploy/demo_client.py \
+--host_ip="localhost" \
 --host_port=8086 \

 if [ $? -ne 0 ]; then
--- a/examples/deploy_demo/run_english_demo_server.sh
+++ b/examples/deploy_demo/run_english_demo_server.sh
@ -1,10 +1,10 @@
 #! /usr/bin/env bash
 # TODO: replace the model with a mandarin model

-cd ../.. > /dev/null
+source path.sh

 # download language model
-cd models/lm > /dev/null
+cd ${MAIN_ROOT}/models/lm > /dev/null
 bash download_lm_en.sh
 if [ $? -ne 0 ]; then
    exit 1
@ -13,7 +13,7 @@ cd - > /dev/null


 # download well-trained model
-cd models/baidu_en8k > /dev/null
+cd ${MAIN_ROOT}/models/baidu_en8k > /dev/null
 bash download_model.sh
 if [ $? -ne 0 ]; then
    exit 1
@ -23,8 +23,8 @@ cd - > /dev/null

 # start demo server
 CUDA_VISIBLE_DEVICES=0 \
-python3 -u deploy/demo_server.py \
--host_ip='localhost' \
+python3 -u ${MAIN_ROOT}/deploy/demo_server.py \
+--host_ip="localhost" \
 --host_port=8086 \
 --num_conv_layers=2 \
 --num_rnn_layers=3 \
@ -36,14 +36,14 @@ python3 -u deploy/demo_server.py \
 --use_gru=True \
 --use_gpu=True \
 --share_rnn_weights=False \
--speech_save_dir='demo_cache' \
--warmup_manifest='data/tiny/manifest.test-clean' \
--mean_std_path='models/baidu_en8k/mean_std.npz' \
--vocab_path='models/baidu_en8k/vocab.txt' \
--model_path='models/baidu_en8k' \
--lang_model_path='models/lm/common_crawl_00.prune01111.trie.klm' \
--decoding_method='ctc_beam_search' \
--specgram_type='linear'
+--speech_save_dir="demo_cache" \
+--warmup_manifest="${MAIN_ROOT}/examples/tiny/data/manifest.test-clean" \
+--mean_std_path="${MAIN_ROOT}/models/baidu_en8k/mean_std.npz" \
+--vocab_path="${MAIN_ROOT}/models/baidu_en8k/vocab.txt" \
+--model_path="${MAIN_ROOT}/models/baidu_en8k" \
+--lang_model_path="${MAIN_ROOT}/models/lm/common_crawl_00.prune01111.trie.klm" \
+--decoding_method="ctc_beam_search" \
+--specgram_type="linear"

 if [ $? -ne 0 ]; then
    echo "Failed in starting demo server!"
--- a/examples/librispeech/local/run_data.sh
+++ b/examples/librispeech/local/run_data.sh
@ -0,0 +1,43 @@
+#! /usr/bin/env bash
+
+# download data, generate manifests
+PYTHONPATH=.:$PYTHONPATH python3 local/librispeech.py \
+--manifest_prefix="data/manifest" \
+--target_dir="./dataset/librispeech" \
+--full_download="True"
+
+if [ $? -ne 0 ]; then
+    echo "Prepare LibriSpeech failed. Terminated."
+    exit 1
+fi
+
+cat data/manifest.train-* | shuf > data/manifest.train
+
+
+# build vocabulary
+python3 ${MAIN_ROOT}/tools/build_vocab.py \
+--count_threshold=0 \
+--vocab_path="data/vocab.txt" \
+--manifest_paths="data/manifest.train"
+
+if [ $? -ne 0 ]; then
+    echo "Build vocabulary failed. Terminated."
+    exit 1
+fi
+
+
+# compute mean and stddev for normalizer
+python3 ${MAIN_ROOT}/tools/compute_mean_std.py \
+--manifest_path="data/manifest.train" \
+--num_samples=2000 \
+--specgram_type="linear" \
+--output_path="data/mean_std.npz"
+
+if [ $? -ne 0 ]; then
+    echo "Compute mean and stddev failed. Terminated."
+    exit 1
+fi
+
+
+echo "LibriSpeech Data preparation done."
+exit 0
--- a/examples/librispeech/local/run_infer.sh
+++ b/examples/librispeech/local/run_infer.sh
@ -1,9 +1,7 @@
 #! /usr/bin/env bash

-cd ../.. > /dev/null
-
 # download language model
-cd models/lm > /dev/null
+cd ${MAIN_ROOT}/models/lm > /dev/null
 bash download_lm_en.sh
 if [ $? -ne 0 ]; then
    exit 1
@ -13,7 +11,7 @@ cd - > /dev/null

 # infer
 CUDA_VISIBLE_DEVICES=0 \
-python3 -u infer.py \
+python3 -u ${MAIN_ROOT}/infer.py \
 --num_samples=10 \
 --beam_size=500 \
 --num_proc_bsearch=8 \
@ -27,14 +25,14 @@ python3 -u infer.py \
 --use_gru=False \
 --use_gpu=True \
 --share_rnn_weights=True \
--infer_manifest='data/librispeech/manifest.test-clean' \
--mean_std_path='data/librispeech/mean_std.npz' \
--vocab_path='data/librispeech/vocab.txt' \
--model_path='checkpoints/libri/step_final' \
--lang_model_path='models/lm/common_crawl_00.prune01111.trie.klm' \
--decoding_method='ctc_beam_search' \
--error_rate_type='wer' \
--specgram_type='linear'
+--infer_manifest="data/manifest.test-clean" \
+--mean_std_path="data/mean_std.npz" \
+--vocab_path="data/vocab.txt" \
+--model_path="checkpoints/step_final" \
+--lang_model_path="${MAIN_ROOT}/models/lm/common_crawl_00.prune01111.trie.klm" \
+--decoding_method="ctc_beam_search" \
+--error_rate_type="wer" \
+--specgram_type="linear"

 if [ $? -ne 0 ]; then
    echo "Failed in inference!"
--- a/examples/librispeech/local/run_infer_golden.sh
+++ b/examples/librispeech/local/run_infer_golden.sh
@ -1,9 +1,7 @@
 #! /usr/bin/env bash

-cd ../.. > /dev/null
-
 # download language model
-cd models/lm > /dev/null
+cd ${MAIN_ROOT}/models/lm > /dev/null
 bash download_lm_en.sh
 if [ $? -ne 0 ]; then
    exit 1
@ -12,7 +10,7 @@ cd - > /dev/null


 # download well-trained model
-cd models/librispeech > /dev/null
+cd ${MAIN_ROOT}/models/librispeech > /dev/null
 bash download_model.sh
 if [ $? -ne 0 ]; then
    exit 1
@ -22,7 +20,7 @@ cd - > /dev/null

 # infer
 CUDA_VISIBLE_DEVICES=0 \
-python3 -u infer.py \
+python3 -u ${MAIN_ROOT}/infer.py \
 --num_samples=10 \
 --beam_size=500 \
 --num_proc_bsearch=8 \
@ -36,14 +34,14 @@ python3 -u infer.py \
 --use_gru=False \
 --use_gpu=True \
 --share_rnn_weights=True \
--infer_manifest='data/librispeech/manifest.test-clean' \
--mean_std_path='models/librispeech/mean_std.npz' \
--vocab_path='models/librispeech/vocab.txt' \
--model_path='models/librispeech' \
--lang_model_path='models/lm/common_crawl_00.prune01111.trie.klm' \
--decoding_method='ctc_beam_search' \
--error_rate_type='wer' \
--specgram_type='linear'
+--infer_manifest="data/manifest.test-clean" \
+--mean_std_path="${MAIN_ROOT}/models/librispeech/mean_std.npz" \
+--vocab_path="${MAIN_ROOT}/models/librispeech/vocab.txt" \
+--model_path="${MAIN_ROOT}/models/librispeech" \
+--lang_model_path="${MAIN_ROOT}/models/lm/common_crawl_00.prune01111.trie.klm" \
+--decoding_method="ctc_beam_search" \
+--error_rate_type="wer" \
+--specgram_type="linear"

 if [ $? -ne 0 ]; then
    echo "Failed in inference!"
--- a/examples/librispeech/local/run_test.sh
+++ b/examples/librispeech/local/run_test.sh
@ -1,9 +1,7 @@
 #! /usr/bin/env bash

-cd ../.. > /dev/null
-
 # download language model
-cd models/lm > /dev/null
+cd ${MAIN_ROOT}/models/lm > /dev/null
 bash download_lm_en.sh
 if [ $? -ne 0 ]; then
    exit 1
@ -13,7 +11,7 @@ cd - > /dev/null

 # evaluate model
 CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 \
-python3 -u test.py \
+python3 -u ${MAIN_ROOT}/test.py \
 --batch_size=128 \
 --beam_size=500 \
 --num_proc_bsearch=8 \
@ -27,14 +25,14 @@ python3 -u test.py \
 --use_gru=False \
 --use_gpu=True \
 --share_rnn_weights=True \
--test_manifest='data/librispeech/manifest.test-clean' \
--mean_std_path='data/librispeech/mean_std.npz' \
--vocab_path='data/librispeech/vocab.txt' \
--model_path='checkpoints/libri/step_final' \
--lang_model_path='models/lm/common_crawl_00.prune01111.trie.klm' \
--decoding_method='ctc_beam_search' \
--error_rate_type='wer' \
--specgram_type='linear'
+--test_manifest="data/manifest.test-clean" \
+--mean_std_path="data/mean_std.npz" \
+--vocab_path="data/vocab.txt" \
+--model_path="checkpoints/step_final" \
+--lang_model_path="${MAIN_ROOT}/models/lm/common_crawl_00.prune01111.trie.klm" \
+--decoding_method="ctc_beam_search" \
+--error_rate_type="wer" \
+--specgram_type="linear"

 if [ $? -ne 0 ]; then
    echo "Failed in evaluation!"
--- a/examples/librispeech/local/run_test_golden.sh
+++ b/examples/librispeech/local/run_test_golden.sh
@ -1,9 +1,7 @@
 #! /usr/bin/env bash

-cd ../.. > /dev/null
-
 # download language model
-cd models/lm > /dev/null
+cd ${MAIN_ROOT}/models/lm > /dev/null
 bash download_lm_en.sh
 if [ $? -ne 0 ]; then
    exit 1
@ -12,7 +10,7 @@ cd - > /dev/null


 # download well-trained model
-cd models/librispeech > /dev/null
+cd ${MAIN_ROOT}/models/librispeech > /dev/null
 bash download_model.sh
 if [ $? -ne 0 ]; then
    exit 1
@ -22,7 +20,7 @@ cd - > /dev/null

 # evaluate model
 CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 \
-python3 -u test.py \
+python3 -u ${MAIN_ROOT}/test.py \
 --batch_size=128 \
 --beam_size=500 \
 --num_proc_bsearch=8 \
@ -36,14 +34,14 @@ python3 -u test.py \
 --use_gru=False \
 --use_gpu=True \
 --share_rnn_weights=True \
--test_manifest='data/librispeech/manifest.test-clean' \
--mean_std_path='models/librispeech/mean_std.npz' \
--vocab_path='models/librispeech/vocab.txt' \
--model_path='models/librispeech' \
--lang_model_path='models/lm/common_crawl_00.prune01111.trie.klm' \
--decoding_method='ctc_beam_search' \
--error_rate_type='wer' \
--specgram_type='linear'
+--test_manifest="data/manifest.test-clean" \
+--mean_std_path="${MAIN_ROOT}/models/librispeech/mean_std.npz" \
+--vocab_path="${MAIN_ROOT}/models/librispeech/vocab.txt" \
+--model_path="${MAIN_ROOT}/models/librispeech" \
+--lang_model_path="${MAIN_ROOT}/models/lm/common_crawl_00.prune01111.trie.klm" \
+--decoding_method="ctc_beam_search" \
+--error_rate_type="wer" \
+--specgram_type="linear"

 if [ $? -ne 0 ]; then
    echo "Failed in evaluation!"
--- a/examples/librispeech/local/run_train.sh
+++ b/examples/librispeech/local/run_train.sh
@ -1,13 +1,11 @@
 #! /usr/bin/env bash

-cd ../.. > /dev/null
-
 # train model
 # if you wish to resume from an exists model, uncomment --init_from_pretrained_model
 export FLAGS_sync_nccl_allreduce=0

 CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 \
-python3 -u train.py \
+python3 -u ${MAIN_ROOT}/train.py \
 --batch_size=20 \
 --num_epoch=50 \
 --num_conv_layers=2 \
@ -25,14 +23,14 @@ python3 -u train.py \
 --use_gpu=True \
 --is_local=True \
 --share_rnn_weights=True \
--train_manifest='data/librispeech/manifest.train' \
--dev_manifest='data/librispeech/manifest.dev-clean' \
--mean_std_path='data/librispeech/mean_std.npz' \
--vocab_path='data/librispeech/vocab.txt' \
--output_model_dir='./checkpoints/libri' \
--augment_conf_path='conf/augmentation.config' \
--specgram_type='linear' \
--shuffle_method='batch_shuffle_clipped' \
+--train_manifest="data/manifest.train" \
+--dev_manifest="data/manifest.dev-clean" \
+--mean_std_path="data/mean_std.npz" \
+--vocab_path="data/vocab.txt" \
+--output_model_dir="./checkpoints/libri" \
+--augment_conf_path="${MAIN_ROOT}/conf/augmentation.config" \
+--specgram_type="linear" \
+--shuffle_method="batch_shuffle_clipped" \

 if [ $? -ne 0 ]; then
    echo "Failed in training!"
--- a/examples/librispeech/local/run_tune.sh
+++ b/examples/librispeech/local/run_tune.sh
@ -1,7 +1,5 @@
 #! /usr/bin/env bash

-cd ../.. > /dev/null
-
 # grid-search for hyper-parameters in language model
 CUDA_VISIBLE_DEVICES=0,1,2,3 \
 python3 -u tools/tune.py \
@ -23,13 +21,13 @@ python3 -u tools/tune.py \
 --use_gru=False \
 --use_gpu=True \
 --share_rnn_weights=True \
--tune_manifest='data/librispeech/manifest.dev-clean' \
--mean_std_path='data/librispeech/mean_std.npz' \
--vocab_path='models/librispeech/vocab.txt' \
--model_path='models/librispeech' \
--lang_model_path='models/lm/common_crawl_00.prune01111.trie.klm' \
--error_rate_type='wer' \
--specgram_type='linear'
+--tune_manifest="data/manifest.dev-clean" \
+--mean_std_path="data/mean_std.npz" \
+--vocab_path="${MAIN_ROOT}/models/librispeech/vocab.txt" \
+--model_path="${MAIN_ROOT}/models/librispeech" \
+--lang_model_path="${MAIN_ROOT}/models/lm/common_crawl_00.prune01111.trie.klm" \
+--error_rate_type="wer" \
+--specgram_type="linear"

 if [ $? -ne 0 ]; then
    echo "Failed in tuning!"
--- a/examples/librispeech/path.sh
+++ b/examples/librispeech/path.sh
@ -0,0 +1,8 @@
+export MAIN_ROOT=${PWD}/../../
+
+export PATH=${MAIN_ROOT}:${PWD}/tools:${PATH}
+export LC_ALL=C
+
+# Use UTF-8 in Python to avoid UnicodeDecodeError when LC_ALL=C
+export PYTHONIOENCODING=UTF-8 
+export PYTHONPATH=${MAIN_ROOT}:${PYTHONPATH}
--- a/examples/librispeech/run.sh
+++ b/examples/librispeech/run.sh
@ -0,0 +1,24 @@
+#!/bin/bash
+
+source path.sh
+
+# prepare data
+bash ./local/run_data.sh
+
+# test pretrain model
+bash ./local/run_test_golden.sh
+
+# test pretain model
+bash ./local/run_infer_golden.sh
+
+# train model
+bash ./local/run_train.sh
+
+# test model
+bash ./local/run_test.sh
+
+# infer model
+bash ./local/run_infer.sh
+
+# tune model
+bash ./local/run_tune.sh
--- a/examples/librispeech/run_data.sh
+++ b/examples/librispeech/run_data.sh
@ -1,45 +0,0 @@
-#! /usr/bin/env bash
-
-cd ../.. > /dev/null
-
-# download data, generate manifests
-PYTHONPATH=.:$PYTHONPATH python3 data/librispeech/librispeech.py \
--manifest_prefix='data/librispeech/manifest' \
--target_dir='./dataset/librispeech' \
--full_download='True'
-
-if [ $? -ne 0 ]; then
-    echo "Prepare LibriSpeech failed. Terminated."
-    exit 1
-fi
-
-cat data/librispeech/manifest.train-* | shuf > data/librispeech/manifest.train
-
-
-# build vocabulary
-python3 tools/build_vocab.py \
--count_threshold=0 \
--vocab_path='data/librispeech/vocab.txt' \
--manifest_paths='data/librispeech/manifest.train'
-
-if [ $? -ne 0 ]; then
-    echo "Build vocabulary failed. Terminated."
-    exit 1
-fi
-
-
-# compute mean and stddev for normalizer
-python3 tools/compute_mean_std.py \
--manifest_path='data/librispeech/manifest.train' \
--num_samples=2000 \
--specgram_type='linear' \
--output_path='data/librispeech/mean_std.npz'
-
-if [ $? -ne 0 ]; then
-    echo "Compute mean and stddev failed. Terminated."
-    exit 1
-fi
-
-
-echo "LibriSpeech Data preparation done."
-exit 0
--- a/examples/tiny/local/run_data.sh
+++ b/examples/tiny/local/run_data.sh
@ -7,9 +7,9 @@ fi

 # download data, generate manifests
 PYTHONPATH=.:$PYTHONPATH python3 ../librispeech/local/librispeech.py \
--manifest_prefix='data/manifest' \
+--manifest_prefix="data/manifest" \
 --target_dir="${MAIN_ROOT}/dataset/librispeech" \
--full_download='False'
+--full_download="False"

 if [ $? -ne 0 ]; then
    echo "Prepare LibriSpeech failed. Terminated."
@ -21,8 +21,8 @@ head -n 64 data/manifest.dev-clean  > data/manifest.tiny
 # build vocabulary
 python3 ${MAIN_ROOT}/tools/build_vocab.py \
 --count_threshold=0 \
--vocab_path='data/vocab.txt' \
--manifest_paths='data/manifest.tiny'
+--vocab_path="data/vocab.txt" \
+--manifest_paths="data/manifest.tiny"

 if [ $? -ne 0 ]; then
    echo "Build vocabulary failed. Terminated."
@ -32,10 +32,10 @@ fi

 # compute mean and stddev for normalizer
 python3 ${MAIN_ROOT}/tools/compute_mean_std.py \
--manifest_path='data/manifest.tiny' \
+--manifest_path="data/manifest.tiny" \
 --num_samples=64 \
--specgram_type='linear' \
--output_path='data/mean_std.npz'
+--specgram_type="linear" \
+--output_path="data/mean_std.npz"

 if [ $? -ne 0 ]; then
    echo "Compute mean and stddev failed. Terminated."
--- a/examples/tiny/local/run_infer.sh
+++ b/examples/tiny/local/run_infer.sh
@ -25,14 +25,14 @@ python3 -u $MAIN_ROOT/infer.py \
 --use_gru=False \
 --use_gpu=True \
 --share_rnn_weights=True \
--infer_manifest='data/manifest.test-clean' \
--mean_std_path='data/mean_std.npz' \
--vocab_path='data/vocab.txt' \
--model_path='checkpoints/step_final' \
+--infer_manifest="data/manifest.test-clean" \
+--mean_std_path="data/mean_std.npz" \
+--vocab_path="data/vocab.txt" \
+--model_path="checkpoints/step_final" \
 --lang_model_path="$MAIN_ROOT/models/lm/common_crawl_00.prune01111.trie.klm" \
--decoding_method='ctc_beam_search' \
--error_rate_type='wer' \
--specgram_type='linear'
+--decoding_method="ctc_beam_search" \
+--error_rate_type="wer" \
+--specgram_type="linear"

 if [ $? -ne 0 ]; then
    echo "Failed in inference!"
--- a/examples/tiny/local/run_infer_golden.sh
+++ b/examples/tiny/local/run_infer_golden.sh
@ -34,14 +34,14 @@ python3 -u ${MAIN_ROOT}/infer.py \
 --use_gru=False \
 --use_gpu=True \
 --share_rnn_weights=True \
--infer_manifest='data/manifest.test-clean' \
+--infer_manifest="data/manifest.test-clean" \
 --mean_std_path="${MAIN_ROOT}/models/librispeech/mean_std.npz" \
 --vocab_path="${MAIN_ROOT}/models/librispeech/vocab.txt" \
 --model_path="${MAIN_ROOT}/models/librispeech" \
 --lang_model_path="${MAIN_ROOT}/models/lm/common_crawl_00.prune01111.trie.klm" \
--decoding_method='ctc_beam_search' \
--error_rate_type='wer' \
--specgram_type='linear'
+--decoding_method="ctc_beam_search" \
+--error_rate_type="wer" \
+--specgram_type="linear"

 if [ $? -ne 0 ]; then
    echo "Failed in inference!"
--- a/examples/tiny/local/run_test.sh
+++ b/examples/tiny/local/run_test.sh
@ -25,14 +25,14 @@ python3 -u $MAIN_ROOT/test.py \
 --use_gru=False \
 --use_gpu=True \
 --share_rnn_weights=True \
--test_manifest='data/manifest.test-clean' \
--mean_std_path='data/mean_std.npz' \
--vocab_path='data/vocab.txt' \
--model_path='checkpoints/step_final' \
+--test_manifest="data/manifest.test-clean" \
+--mean_std_path="data/mean_std.npz" \
+--vocab_path="data/vocab.txt" \
+--model_path="checkpoints/step_final" \
 --lang_model_path="$MAIN_ROOT/models/lm/common_crawl_00.prune01111.trie.klm" \
--decoding_method='ctc_beam_search' \
--error_rate_type='wer' \
--specgram_type='linear'
+--decoding_method="ctc_beam_search" \
+--error_rate_type="wer" \
+--specgram_type="linear"

 if [ $? -ne 0 ]; then
    echo "Failed in evaluation!"
--- a/examples/tiny/local/run_test_golden.sh
+++ b/examples/tiny/local/run_test_golden.sh
@ -34,14 +34,14 @@ python3 -u $MAIN_ROOT/test.py \
 --use_gru=False \
 --use_gpu=True \
 --share_rnn_weights=True \
--test_manifest='data/manifest.test-clean' \
+--test_manifest="data/manifest.test-clean" \
 --mean_std_path="$MAIN_ROOT/models/librispeech/mean_std.npz" \
 --vocab_path="$MAIN_ROOT/models/librispeech/vocab.txt" \
 --model_path="$MAIN_ROOT/models/librispeech" \
 --lang_model_path="$MAIN_ROOT/models/lm/common_crawl_00.prune01111.trie.klm" \
--decoding_method='ctc_beam_search' \
--error_rate_type='wer' \
--specgram_type='linear'
+--decoding_method="ctc_beam_search" \
+--error_rate_type="wer" \
+--specgram_type="linear"

 if [ $? -ne 0 ]; then
    echo "Failed in evaluation!"
--- a/examples/tiny/local/run_train.sh
+++ b/examples/tiny/local/run_train.sh
@ -22,14 +22,14 @@ python3 -u ${MAIN_ROOT}/train.py \
 --use_gpu=True \
 --is_local=True \
 --share_rnn_weights=True \
--train_manifest='data/manifest.tiny' \
--dev_manifest='data/manifest.tiny' \
--mean_std_path='data/mean_std.npz' \
--vocab_path='data/vocab.txt' \
--output_model_dir='./checkpoints/' \
+--train_manifest="data/manifest.tiny" \
+--dev_manifest="data/manifest.tiny" \
+--mean_std_path="data/mean_std.npz" \
+--vocab_path="data/vocab.txt" \
+--output_model_dir="./checkpoints/" \
 --augment_conf_path="${MAIN_ROOT}/conf/augmentation.config" \
--specgram_type='linear' \
--shuffle_method='batch_shuffle_clipped' \
+--specgram_type="linear" \
+--shuffle_method="batch_shuffle_clipped" \

 if [ $? -ne 0 ]; then
    echo "Failed in training!"
--- a/examples/tiny/local/run_tune.sh
+++ b/examples/tiny/local/run_tune.sh
@ -21,13 +21,13 @@ python3 -u $MAIN_ROOT/tools/tune.py \
 --use_gru=False \
 --use_gpu=True \
 --share_rnn_weights=True \
--tune_manifest='data/manifest.dev-clean' \
--mean_std_path='data/mean_std.npz' \
--vocab_path='data/vocab.txt' \
+--tune_manifest="data/manifest.dev-clean" \
+--mean_std_path="data/mean_std.npz" \
+--vocab_path="data/vocab.txt" \
 --model_path="$MAIN_ROOT/models/librispeech" \
 --lang_model_path="$MAIN_ROOT/models/lm/common_crawl_00.prune01111.trie.klm" \
--error_rate_type='wer' \
--specgram_type='linear'
+--error_rate_type="wer" \
+--specgram_type="linear"

 if [ $? -ne 0 ]; then
    echo "Failed in tuning!"
--- a/requirements.txt
+++ b/requirements.txt
@ -2,4 +2,3 @@ scipy==1.2.1
 resampy==0.1.5
 SoundFile==0.9.0.post1
 python_speech_features
-paddlepaddle-gpu==1.8.5