merge the change

4 years ago · 285e0c9cad
parent 264bba760b 5ed56b3f7e
commit 285e0c9cad
115 changed files with 1339 additions and 427 deletions
--- a/.gitignore
+++ b/.gitignore
@ -18,5 +18,7 @@ tools/sox-14.4.2
 tools/soxbindings
 tools/montreal-forced-aligner/
 tools/Montreal-Forced-Aligner/
 tools/sctk
 tools/sctk-20159b5/
 *output/
--- a/deepspeech/exps/deepspeech2/bin/train.py
+++ b/deepspeech/exps/deepspeech2/bin/train.py
@ -27,7 +27,7 @@ def main_sp(config, args):
 def main(config, args):
-    if args.device == "gpu" and args.nprocs > 1:
+    if args.nprocs > 0:
        dist.spawn(main_sp, args=(config, args), nprocs=args.nprocs)
    else:
        main_sp(config, args)
--- a/deepspeech/exps/deepspeech2/model.py
+++ b/deepspeech/exps/deepspeech2/model.py
@ -19,6 +19,7 @@ from contextlib import nullcontext
 from pathlib import Path
 from typing import Optional
 import jsonlines
 import numpy as np
 import paddle
 from paddle import distributed as dist
@ -305,9 +306,10 @@ class DeepSpeech2Tester(DeepSpeech2Trainer):
            len_refs += len_ref
            num_ins += 1
            if fout:
-                fout.write(utt + " " + result + "\n")
+                fout.write({"utt": utt, "ref": target, "hyp": result})
-            logger.info("\nTarget Transcription: %s\nOutput Transcription: %s" %
+            logger.info(f"Utt: {utt}")
-                        (target, result))
+            logger.info(f"Ref: {target}")
            logger.info(f"Hyp: {result}")
            logger.info("Current error rate [%s] = %f" %
                        (cfg.error_rate_type, error_rate_func(target, result)))
@ -350,7 +352,7 @@ class DeepSpeech2Tester(DeepSpeech2Trainer):
        cfg = self.config
        error_rate_type = None
        errors_sum, len_refs, num_ins = 0.0, 0, 0
-        with open(self.args.result_file, 'w') as fout:
+        with jsonlines.open(self.args.result_file, 'w') as fout:
            for i, batch in enumerate(self.test_loader):
                utts, audio, audio_len, texts, texts_len = batch
                metrics = self.compute_metrics(utts, audio, audio_len, texts,
@ -403,7 +405,7 @@ class DeepSpeech2Tester(DeepSpeech2Trainer):
    def setup(self):
        """Setup the experiment.
        """
-        paddle.set_device(self.args.device)
+        paddle.set_device('gpu' if self.args.nprocs > 0 else 'cpu')
        self.setup_output_dir()
        self.setup_checkpointer()
@ -635,7 +637,7 @@ class DeepSpeech2ExportTester(DeepSpeech2Tester):
    def setup(self):
        """Setup the experiment.
        """
-        paddle.set_device(self.args.device)
+        paddle.set_device('gpu' if self.args.nprocs > 0 else 'cpu')
        self.setup_output_dir()
--- a/deepspeech/exps/u2/bin/train.py
+++ b/deepspeech/exps/u2/bin/train.py
@ -32,7 +32,7 @@ def main_sp(config, args):
 def main(config, args):
-    if args.device == "gpu" and args.nprocs > 1:
+    if args.nprocs > 0:
        dist.spawn(main_sp, args=(config, args), nprocs=args.nprocs)
    else:
        main_sp(config, args)
--- a/deepspeech/exps/u2/model.py
+++ b/deepspeech/exps/u2/model.py
@ -22,6 +22,7 @@ from contextlib import nullcontext
 from pathlib import Path
 from typing import Optional
 import jsonlines
 import numpy as np
 import paddle
 from paddle import distributed as dist
@ -466,9 +467,10 @@ class U2Tester(U2Trainer):
            len_refs += len_ref
            num_ins += 1
            if fout:
-                fout.write(utt + " " + result + "\n")
+                fout.write({"utt": utt, "ref": target, "hyp": result})
-            logger.info("\nTarget Transcription: %s\nOutput Transcription: %s" %
+            logger.info(f"Utt: {utt}")
-                        (target, result))
+            logger.info(f"Ref: {target}")
            logger.info(f"Hyp: {result}")
            logger.info("One example error rate [%s] = %f" %
                        (cfg.error_rate_type, error_rate_func(target, result)))
@ -493,7 +495,7 @@ class U2Tester(U2Trainer):
        errors_sum, len_refs, num_ins = 0.0, 0, 0
        num_frames = 0.0
        num_time = 0.0
-        with open(self.args.result_file, 'w') as fout:
+        with jsonlines.open(self.args.result_file, 'w') as fout:
            for i, batch in enumerate(self.test_loader):
                metrics = self.compute_metrics(*batch, fout=fout)
                num_frames += metrics['num_frames']
@ -653,7 +655,7 @@ class U2Tester(U2Trainer):
    def setup(self):
        """Setup the experiment.
        """
-        paddle.set_device(self.args.device)
+        paddle.set_device('gpu' if self.args.nprocs > 0 else 'cpu')
        self.setup_output_dir()
        self.setup_checkpointer()
--- a/deepspeech/exps/u2_kaldi/bin/train.py
+++ b/deepspeech/exps/u2_kaldi/bin/train.py
@ -36,7 +36,7 @@ def main_sp(config, args):
 def main(config, args):
-    if args.device == "gpu" and args.nprocs > 1:
+    if args.nprocs > 0:
        dist.spawn(main_sp, args=(config, args), nprocs=args.nprocs)
    else:
        main_sp(config, args)
--- a/deepspeech/exps/u2_kaldi/model.py
+++ b/deepspeech/exps/u2_kaldi/model.py
@ -21,6 +21,7 @@ from contextlib import nullcontext
 from pathlib import Path
 from typing import Optional
 import jsonlines
 import numpy as np
 import paddle
 from paddle import distributed as dist
@ -445,9 +446,10 @@ class U2Tester(U2Trainer):
            len_refs += len_ref
            num_ins += 1
            if fout:
-                fout.write(utt + " " + result + "\n")
+                fout.write({"utt": utt, "ref": target, "hyp": result})
-            logger.info("\nTarget Transcription: %s\nOutput Transcription: %s" %
+            logger.info(f"Utt: {utt}")
-                        (target, result))
+            logger.info(f"Ref: {target}")
            logger.info(f"Hyp: {result}")
            logger.info("One example error rate [%s] = %f" %
                        (cfg.error_rate_type, error_rate_func(target, result)))
@ -472,7 +474,7 @@ class U2Tester(U2Trainer):
        errors_sum, len_refs, num_ins = 0.0, 0, 0
        num_frames = 0.0
        num_time = 0.0
-        with open(self.args.result_file, 'w') as fout:
+        with jsonlines.open(self.args.result_file, 'w') as fout:
            for i, batch in enumerate(self.test_loader):
                metrics = self.compute_metrics(*batch, fout=fout)
                num_frames += metrics['num_frames']
@ -637,7 +639,7 @@ class U2Tester(U2Trainer):
    def setup(self):
        """Setup the experiment.
        """
-        paddle.set_device(self.args.device)
+        paddle.set_device('gpu' if self.args.nprocs > 0 else 'cpu')
        self.setup_output_dir()
        self.setup_checkpointer()
--- a/deepspeech/exps/u2_st/bin/train.py
+++ b/deepspeech/exps/u2_st/bin/train.py
@ -30,7 +30,7 @@ def main_sp(config, args):
 def main(config, args):
-    if args.device == "gpu" and args.nprocs > 1:
+    if args.nprocs > 0:
        dist.spawn(main_sp, args=(config, args), nprocs=args.nprocs)
    else:
        main_sp(config, args)
--- a/deepspeech/exps/u2_st/model.py
+++ b/deepspeech/exps/u2_st/model.py
@ -21,6 +21,7 @@ from contextlib import nullcontext
 from pathlib import Path
 from typing import Optional
 import jsonlines
 import numpy as np
 import paddle
 from paddle import distributed as dist
@ -479,8 +480,10 @@ class U2STTester(U2STTrainer):
            len_refs += len(target.split())
            num_ins += 1
            if fout:
-                fout.write(utt + " " + result + "\n")
+                fout.write({"utt": utt, "ref": target, "hyp": result})
-            logger.info("\nReference: %s\nHypothesis: %s" % (target, result))
+            logger.info(f"Utt: {utt}")
            logger.info(f"Ref: {target}")
            logger.info(f"Hyp: {result}")
            logger.info("One example BLEU = %s" %
                        (bleu_func([result], [[target]]).prec_str))
@ -508,7 +511,7 @@ class U2STTester(U2STTrainer):
        len_refs, num_ins = 0, 0
        num_frames = 0.0
        num_time = 0.0
-        with open(self.args.result_file, 'w') as fout:
+        with jsonlines.open(self.args.result_file, 'w') as fout:
            for i, batch in enumerate(self.test_loader):
                metrics = self.compute_translation_metrics(
                    *batch, bleu_func=bleu_func, fout=fout)
@ -661,7 +664,7 @@ class U2STTester(U2STTrainer):
    def setup(self):
        """Setup the experiment.
        """
-        paddle.set_device(self.args.device)
+        paddle.set_device('gpu' if self.args.nprocs > 0 else 'cpu')
        self.setup_output_dir()
        self.setup_checkpointer()
--- a/deepspeech/frontend/utility.py
+++ b/deepspeech/frontend/utility.py
@ -12,13 +12,13 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 """Contains data helper functions."""
 import codecs
 import json
 import math
 from typing import List
 from typing import Optional
 from typing import Text
 import jsonlines
 import numpy as np
 from deepspeech.utils.log import Log
@ -92,12 +92,8 @@ def read_manifest(
    """
    manifest = []
-    for json_line in codecs.open(manifest_path, 'r', 'utf-8'):
+    with jsonlines.open(manifest_path, 'r') as reader:
-        try:
+        for json_data in reader:
            json_data = json.loads(json_line)
        except Exception as e:
            raise IOError("Error reading manifest: %s" % str(e))
            feat_len = json_data["feat_shape"][
                0] if 'feat_shape' in json_data else 1.0
            token_len = json_data["token_shape"][
--- a/deepspeech/training/cli.py
+++ b/deepspeech/training/cli.py
@ -14,6 +14,20 @@
 import argparse
 class ExtendAction(argparse.Action):
    """
    [Since Python 3.8, the "extend" is available directly in stdlib]
    (https://docs.python.org/3.8/library/argparse.html#action).
    If you only have to support 3.8+ then defining it yourself is no longer required. 
    Usage of stdlib "extend" action is exactly the same way as this answer originally described:
    """
    def __call__(self, parser, namespace, values, option_string=None):
        items = getattr(namespace, self.dest) or []
        items.extend(values)
        setattr(namespace, self.dest, items)
 def default_argument_parser():
    r"""A simple yet genral argument parser for experiments with parakeet.
@ -30,7 +44,7 @@ def default_argument_parser():
    The ``--checkpoint_path`` specifies the checkpoint to load from.
-    The ``--device`` and ``--nprocs`` specifies how to run the training.
+    The ``--nprocs`` specifies how to run the training.
    See Also
@ -42,6 +56,7 @@ def default_argument_parser():
        the parser
    """
    parser = argparse.ArgumentParser()
    parser.register('action', 'extend', ExtendAction)
    train_group = parser.add_argument_group(
        title='Train Options', description=None)
@ -51,12 +66,6 @@ def default_argument_parser():
        default=None,
        help="seed to use for paddle, np and random. None or 0 for random, else set seed."
    )
    train_group.add_argument(
        "--device",
        type=str,
        default='gpu',
        choices=["cpu", "gpu"],
        help="device cpu and gpu are supported.")
    train_group.add_argument(
        "--nprocs",
        type=int,
@ -70,10 +79,10 @@ def default_argument_parser():
        "--checkpoint_path", type=str, help="path to load checkpoint")
    train_group.add_argument(
        "--opts",
-        type=str,
+        action='extend',
-        default=[],
+        nargs=2,
-        nargs='+',
+        metavar=('key', 'val'),
-        help="overwrite --config file, passing in LIST[KEY VALUE] pairs")
+        help="overwrite --config field, passing (KEY VALUE) pairs")
    train_group.add_argument(
        "--dump-config", metavar="FILE", help="dump config to `this` file.")
--- a/deepspeech/training/trainer.py
+++ b/deepspeech/training/trainer.py
@ -86,7 +86,7 @@ class Trainer():
    >>>     config.merge_from_list(args.opts)
    >>> config.freeze()
    >>>
-    >>> if args.nprocs > 1 and args.device == "gpu":
+    >>> if args.nprocs > 0:
    >>>     dist.spawn(main_sp, args=(config, args), nprocs=args.nprocs)
    >>> else:
    >>>     main_sp(config, args)
@ -119,7 +119,7 @@ class Trainer():
    def setup(self):
        """Setup the experiment.
        """
-        paddle.set_device(self.args.device)
+        paddle.set_device('gpu' if self.args.nprocs > 0 else 'cpu')
        if self.parallel:
            self.init_parallel()
@ -139,7 +139,7 @@ class Trainer():
        """A flag indicating whether the experiment should run with
        multiprocessing.
        """
-        return self.args.device == "gpu" and self.args.nprocs > 1
+        return self.args.nprocs > 1
    def init_parallel(self):
        """Init environment for multiprocess training.
--- a/deepspeech/utils/tensor_utils.py
+++ b/deepspeech/utils/tensor_utils.py
@ -94,9 +94,19 @@ def pad_sequence(sequences: List[paddle.Tensor],
        length = tensor.shape[0]
        # use index notation to prevent duplicate references to the tensor
        if batch_first:
            # TODO (Hui Zhang): set_value op not supprot `end==start`
            # out_tensor[i, :length, ...] = tensor
            if length != 0:
                out_tensor[i, :length, ...] = tensor
            else:
                out_tensor[i, length, ...] = tensor
        else:
            # TODO (Hui Zhang): set_value op not supprot `end==start`
            # out_tensor[:length, i, ...] = tensor
            if length != 0:
                out_tensor[:length, i, ...] = tensor
            else:
                out_tensor[length, i, ...] = tensor
    return out_tensor
--- a/examples/v18_to_v2x/.gitignore
+++ b/examples/v18_to_v2x/.gitignore
--- a/examples/1xt2x/README.md
+++ b/examples/1xt2x/README.md
@ -0,0 +1,11 @@
 # 1xt2x
 Convert Deepspeech 1.8 released model to 2.x.
 ## Model
 * Deepspeech2x
 ## Exp
 * baidu_en8k
 * aishell
 * librispeech
--- a/examples/v18_to_v2x/exp_aishell/.gitignore
+++ b/examples/v18_to_v2x/exp_aishell/.gitignore
--- a/examples/v18_to_v2x/exp_aishell/conf/augmentation.json
+++ b/examples/v18_to_v2x/exp_aishell/conf/augmentation.json
--- a/examples/v18_to_v2x/exp_aishell/conf/deepspeech2.yaml
+++ b/examples/v18_to_v2x/exp_aishell/conf/deepspeech2.yaml
--- a/examples/v18_to_v2x/exp_aishell/local/data.sh
+++ b/examples/v18_to_v2x/exp_aishell/local/data.sh
--- a/examples/v18_to_v2x/exp_aishell/local/download_lm_ch.sh
+++ b/examples/v18_to_v2x/exp_aishell/local/download_lm_ch.sh
--- a/examples/v18_to_v2x/exp_aishell/local/download_model.sh
+++ b/examples/v18_to_v2x/exp_aishell/local/download_model.sh
--- a/examples/v18_to_v2x/exp_aishell/local/test.sh
+++ b/examples/v18_to_v2x/exp_aishell/local/test.sh
@ -8,10 +8,6 @@ fi
 ngpu=$(echo $CUDA_VISIBLE_DEVICES | awk -F "," '{print NF}')
 echo "using $ngpu gpus..."
 device=gpu
 if [ ${ngpu} == 0 ];then
    device=cpu
 fi
 config_path=$1
 ckpt_prefix=$2
 model_type=$3
@ -23,8 +19,7 @@ if [ $? -ne 0 ]; then
 fi
 python3 -u ${BIN_DIR}/test.py \
--device ${device} \
+--nproc ${ngpu} \
 --nproc 1 \
 --config ${config_path} \
 --result_file ${ckpt_prefix}.rsl \
 --checkpoint_path ${ckpt_prefix} \
--- a/examples/v18_to_v2x/exp_baidu_en8k/path.sh
+++ b/examples/v18_to_v2x/exp_baidu_en8k/path.sh
--- a/examples/v18_to_v2x/exp_aishell/run.sh
+++ b/examples/v18_to_v2x/exp_aishell/run.sh
--- a/examples/v18_to_v2x/exp_baidu_en8k/.gitignore
+++ b/examples/v18_to_v2x/exp_baidu_en8k/.gitignore
--- a/examples/v18_to_v2x/exp_baidu_en8k/conf/augmentation.json
+++ b/examples/v18_to_v2x/exp_baidu_en8k/conf/augmentation.json
--- a/examples/v18_to_v2x/exp_baidu_en8k/conf/deepspeech2.yaml
+++ b/examples/v18_to_v2x/exp_baidu_en8k/conf/deepspeech2.yaml
--- a/examples/v18_to_v2x/exp_baidu_en8k/local/data.sh
+++ b/examples/v18_to_v2x/exp_baidu_en8k/local/data.sh
--- a/examples/v18_to_v2x/exp_baidu_en8k/local/download_lm_en.sh
+++ b/examples/v18_to_v2x/exp_baidu_en8k/local/download_lm_en.sh
--- a/examples/v18_to_v2x/exp_baidu_en8k/local/download_model.sh
+++ b/examples/v18_to_v2x/exp_baidu_en8k/local/download_model.sh
--- a/examples/v18_to_v2x/exp_baidu_en8k/local/test.sh
+++ b/examples/v18_to_v2x/exp_baidu_en8k/local/test.sh
@ -8,10 +8,6 @@ fi
 ngpu=$(echo $CUDA_VISIBLE_DEVICES | awk -F "," '{print NF}')
 echo "using $ngpu gpus..."
 device=gpu
 if [ ${ngpu} == 0 ];then
    device=cpu
 fi
 config_path=$1
 ckpt_prefix=$2
 model_type=$3
@ -23,8 +19,7 @@ if [ $? -ne 0 ]; then
 fi
 python3 -u ${BIN_DIR}/test.py \
--device ${device} \
+--nproc ${ngpu} \
 --nproc 1 \
 --config ${config_path} \
 --result_file ${ckpt_prefix}.rsl \
 --checkpoint_path ${ckpt_prefix} \
--- a/examples/v18_to_v2x/exp_librispeech/path.sh
+++ b/examples/v18_to_v2x/exp_librispeech/path.sh
--- a/examples/v18_to_v2x/exp_baidu_en8k/run.sh
+++ b/examples/v18_to_v2x/exp_baidu_en8k/run.sh
--- a/examples/v18_to_v2x/exp_librispeech/.gitignore
+++ b/examples/v18_to_v2x/exp_librispeech/.gitignore
--- a/examples/v18_to_v2x/exp_librispeech/conf/augmentation.json
+++ b/examples/v18_to_v2x/exp_librispeech/conf/augmentation.json
--- a/examples/v18_to_v2x/exp_librispeech/conf/deepspeech2.yaml
+++ b/examples/v18_to_v2x/exp_librispeech/conf/deepspeech2.yaml
--- a/examples/v18_to_v2x/exp_librispeech/local/data.sh
+++ b/examples/v18_to_v2x/exp_librispeech/local/data.sh
--- a/examples/v18_to_v2x/exp_librispeech/local/download_lm_en.sh
+++ b/examples/v18_to_v2x/exp_librispeech/local/download_lm_en.sh
--- a/examples/v18_to_v2x/exp_librispeech/local/download_model.sh
+++ b/examples/v18_to_v2x/exp_librispeech/local/download_model.sh
--- a/examples/v18_to_v2x/exp_librispeech/local/test.sh
+++ b/examples/v18_to_v2x/exp_librispeech/local/test.sh
@ -8,10 +8,6 @@ fi
 ngpu=$(echo $CUDA_VISIBLE_DEVICES | awk -F "," '{print NF}')
 echo "using $ngpu gpus..."
 device=gpu
 if [ ${ngpu} == 0 ];then
    device=cpu
 fi
 config_path=$1
 ckpt_prefix=$2
 model_type=$3
@ -23,8 +19,7 @@ if [ $? -ne 0 ]; then
 fi
 python3 -u ${BIN_DIR}/test.py \
--device ${device} \
+--nproc ${ngpu} \
 --nproc 1 \
 --config ${config_path} \
 --result_file ${ckpt_prefix}.rsl \
 --checkpoint_path ${ckpt_prefix} \
--- a/examples/v18_to_v2x/exp_aishell/path.sh
+++ b/examples/v18_to_v2x/exp_aishell/path.sh
--- a/examples/v18_to_v2x/exp_librispeech/run.sh
+++ b/examples/v18_to_v2x/exp_librispeech/run.sh
--- a/examples/v18_to_v2x/src_deepspeech2x/init.py
+++ b/examples/v18_to_v2x/src_deepspeech2x/init.py
--- a/examples/v18_to_v2x/src_deepspeech2x/bin/test.py
+++ b/examples/v18_to_v2x/src_deepspeech2x/bin/test.py
--- a/examples/v18_to_v2x/src_deepspeech2x/models/init.py
+++ b/examples/v18_to_v2x/src_deepspeech2x/models/init.py
--- a/examples/v18_to_v2x/src_deepspeech2x/models/ds2/init.py
+++ b/examples/v18_to_v2x/src_deepspeech2x/models/ds2/init.py
--- a/examples/v18_to_v2x/src_deepspeech2x/models/ds2/deepspeech2.py
+++ b/examples/v18_to_v2x/src_deepspeech2x/models/ds2/deepspeech2.py
--- a/examples/v18_to_v2x/src_deepspeech2x/models/ds2/rnn.py
+++ b/examples/v18_to_v2x/src_deepspeech2x/models/ds2/rnn.py
--- a/examples/v18_to_v2x/src_deepspeech2x/test_model.py
+++ b/examples/v18_to_v2x/src_deepspeech2x/test_model.py
@ -401,7 +401,7 @@ class DeepSpeech2Tester(DeepSpeech2Trainer):
    def setup(self):
        """Setup the experiment.
        """
-        paddle.set_device(self.args.device)
+        paddle.set_device('gpu' if self.args.nprocs > 0 else 'cpu')
        self.setup_output_dir()
        self.setup_checkpointer()
--- a/examples/aishell/s0/local/export.sh
+++ b/examples/aishell/s0/local/export.sh
@ -13,13 +13,7 @@ ckpt_path_prefix=$2
 jit_model_export_path=$3
 model_type=$4
 device=gpu
 if [ ${ngpu} == 0 ];then
    device=cpu
 fi
 python3 -u ${BIN_DIR}/export.py \
 --device ${device} \
 --nproc ${ngpu} \
 --config ${config_path} \
 --checkpoint_path ${ckpt_path_prefix} \
--- a/examples/aishell/s0/local/test.sh
+++ b/examples/aishell/s0/local/test.sh
@ -8,10 +8,6 @@ fi
 ngpu=$(echo $CUDA_VISIBLE_DEVICES | awk -F "," '{print NF}')
 echo "using $ngpu gpus..."
 device=gpu
 if [ ${ngpu} == 0 ];then
    device=cpu
 fi
 config_path=$1
 ckpt_prefix=$2
 model_type=$3
@ -23,8 +19,7 @@ if [ $? -ne 0 ]; then
 fi
 python3 -u ${BIN_DIR}/test.py \
--device ${device} \
+--nproc ${ngpu} \
 --nproc 1 \
 --config ${config_path} \
 --result_file ${ckpt_prefix}.rsl \
 --checkpoint_path ${ckpt_prefix} \
--- a/examples/aishell/s0/local/test_export.sh
+++ b/examples/aishell/s0/local/test_export.sh
@ -8,10 +8,6 @@ fi
 ngpu=$(echo $CUDA_VISIBLE_DEVICES | awk -F "," '{print NF}')
 echo "using $ngpu gpus..."
 device=gpu
 if [ ${ngpu} == 0 ];then
    device=cpu
 fi
 config_path=$1
 jit_model_export_path=$2
 model_type=$3
@ -23,8 +19,7 @@ if [ $? -ne 0 ]; then
 fi
 python3 -u ${BIN_DIR}/test_export.py \
--device ${device} \
+--nproc ${ngpu} \
 --nproc 1 \
 --config ${config_path} \
 --result_file ${jit_model_export_path}.rsl \
 --export_path ${jit_model_export_path} \
--- a/examples/aishell/s0/local/train.sh
+++ b/examples/aishell/s0/local/train.sh
@ -12,11 +12,6 @@ config_path=$1
 ckpt_name=$2
 model_type=$3
 device=gpu
 if [ ${ngpu} == 0 ];then
    device=cpu
 fi
 mkdir -p exp
 # seed may break model convergence
@ -26,7 +21,6 @@ if [ ${seed} != 0 ]; then
 fi
 python3 -u ${BIN_DIR}/train.py \
 --device ${device} \
 --nproc ${ngpu} \
 --config ${config_path} \
 --output exp/${ckpt_name} \
--- a/examples/aishell/s1/local/align.sh
+++ b/examples/aishell/s1/local/align.sh
@ -8,10 +8,6 @@ fi
 ngpu=$(echo $CUDA_VISIBLE_DEVICES | awk -F "," '{print NF}')
 echo "using $ngpu gpus..."
 device=gpu
 if [ ${ngpu} == 0 ];then
    device=cpu
 fi
 config_path=$1
 ckpt_prefix=$2
@ -22,8 +18,7 @@ mkdir -p ${output_dir}
 # align dump in `result_file`
 # .tier, .TextGrid dump in `dir of result_file`
 python3 -u ${BIN_DIR}/alignment.py \
--device ${device} \
+--nproc ${ngpu} \
 --nproc 1 \
 --config ${config_path} \
 --result_file ${output_dir}/${type}.align \
 --checkpoint_path ${ckpt_prefix} \
--- a/examples/aishell/s1/local/export.sh
+++ b/examples/aishell/s1/local/export.sh
@ -12,13 +12,7 @@ config_path=$1
 ckpt_path_prefix=$2
 jit_model_export_path=$3
 device=gpu
 if [ ${ngpu} == 0 ];then
    device=cpu
 fi
 python3 -u ${BIN_DIR}/export.py \
 --device ${device} \
 --nproc ${ngpu} \
 --config ${config_path} \
 --checkpoint_path ${ckpt_path_prefix} \
--- a/examples/aishell/s1/local/test.sh
+++ b/examples/aishell/s1/local/test.sh
@ -8,11 +8,6 @@ fi
 ngpu=$(echo $CUDA_VISIBLE_DEVICES | awk -F "," '{print NF}')
 echo "using $ngpu gpus..."
 device=gpu
 if [ ${ngpu} == 0 ];then
    device=cpu
 fi
 config_path=$1
 ckpt_prefix=$2
@ -39,8 +34,7 @@ for type in attention ctc_greedy_search; do
    output_dir=${ckpt_prefix}
    mkdir -p ${output_dir}
    python3 -u ${BIN_DIR}/test.py \
-    --device ${device} \
+    --nproc ${ngpu} \
    --nproc 1 \
    --config ${config_path} \
    --result_file ${output_dir}/${type}.rsl \
    --checkpoint_path ${ckpt_prefix} \
@ -58,8 +52,7 @@ for type in ctc_prefix_beam_search attention_rescoring; do
    output_dir=${ckpt_prefix}
    mkdir -p ${output_dir}
    python3 -u ${BIN_DIR}/test.py \
-    --device ${device} \
+    --nproc ${ngpu} \
    --nproc 1 \
    --config ${config_path} \
    --result_file ${output_dir}/${type}.rsl \
    --checkpoint_path ${ckpt_prefix} \
--- a/examples/aishell/s1/local/train.sh
+++ b/examples/aishell/s1/local/train.sh
@ -12,11 +12,6 @@ source ${MAIN_ROOT}/utils/parse_options.sh || exit 1;
 ngpu=$(echo $CUDA_VISIBLE_DEVICES | awk -F "," '{print NF}')
 echo "using $ngpu gpus..."
 device=gpu
 if [ ${ngpu} == 0 ];then
    device=cpu
 fi
 if [ ${seed} != 0  ]; then
    export FLAGS_cudnn_deterministic=True
    echo "using seed $seed & FLAGS_cudnn_deterministic=True ..."
@ -34,7 +29,6 @@ mkdir -p exp
 python3 -u ${BIN_DIR}/train.py \
 --seed ${seed} \
 --device ${device} \
 --nproc ${ngpu} \
 --config ${config_path} \
 --output exp/${ckpt_name} \
--- a/examples/callcenter/s1/local/align.sh
+++ b/examples/callcenter/s1/local/align.sh
@ -8,10 +8,6 @@ fi
 ngpu=$(echo $CUDA_VISIBLE_DEVICES | awk -F "," '{print NF}')
 echo "using $ngpu gpus..."
 device=gpu
 if [ ${ngpu} == 0 ];then
    device=cpu
 fi
 config_path=$1
 ckpt_prefix=$2
@ -20,7 +16,6 @@ ckpt_name=$(basename ${ckpt_prefxi})
 mkdir -p exp
 batch_size=1
 output_dir=${ckpt_prefix}
 mkdir -p ${output_dir}
@ -28,8 +23,7 @@ mkdir -p ${output_dir}
 # align dump in `result_file`
 # .tier, .TextGrid dump in `dir of result_file`
 python3 -u ${BIN_DIR}/alignment.py \
--device ${device} \
+--nproc ${ngpu} \
 --nproc 1 \
 --config ${config_path} \
 --result_file ${output_dir}/${type}.align \
 --checkpoint_path ${ckpt_prefix} \
--- a/examples/callcenter/s1/local/export.sh
+++ b/examples/callcenter/s1/local/export.sh
@ -12,13 +12,7 @@ config_path=$1
 ckpt_path_prefix=$2
 jit_model_export_path=$3
 device=gpu
 if [ ${ngpu} == 0 ];then
    device=cpu
 fi
 python3 -u ${BIN_DIR}/export.py \
 --device ${device} \
 --nproc ${ngpu} \
 --config ${config_path} \
 --checkpoint_path ${ckpt_path_prefix} \
--- a/examples/callcenter/s1/local/test.sh
+++ b/examples/callcenter/s1/local/test.sh
@ -8,10 +8,6 @@ fi
 ngpu=$(echo $CUDA_VISIBLE_DEVICES | awk -F "," '{print NF}')
 echo "using $ngpu gpus..."
 device=gpu
 if [ ${ngpu} == 0 ];then
    device=cpu
 fi
 config_path=$1
 ckpt_prefix=$2
@ -32,8 +28,7 @@ for type in attention ctc_greedy_search; do
    output_dir=${ckpt_prefix}
    mkdir -p ${output_dir}
    python3 -u ${BIN_DIR}/test.py \
-    --device ${device} \
+    --nproc ${ngpu} \
    --nproc 1 \
    --config ${config_path} \
    --result_file ${output_dir}/${type}.rsl \
    --checkpoint_path ${ckpt_prefix} \
@ -51,8 +46,7 @@ for type in ctc_prefix_beam_search attention_rescoring; do
    output_dir=${ckpt_prefix}
    mkdir -p ${output_dir}
    python3 -u ${BIN_DIR}/test.py \
-    --device ${device} \
+    --nproc ${ngpu} \
    --nproc 1 \
    --config ${config_path} \
    --result_file ${output_dir}/${type}.rsl \
    --checkpoint_path ${ckpt_prefix} \
--- a/examples/callcenter/s1/local/train.sh
+++ b/examples/callcenter/s1/local/train.sh
@ -11,10 +11,6 @@ echo "using $ngpu gpus..."
 config_path=$1
 ckpt_name=$2
 device=gpu
 if [ ${ngpu} == 0 ];then
    device=cpu
 fi
 echo "using ${device}..."
 mkdir -p exp
@ -26,7 +22,6 @@ if [ ${seed} != 0 ]; then
 fi
 python3 -u ${BIN_DIR}/train.py \
 --device ${device} \
 --nproc ${ngpu} \
 --config ${config_path} \
 --output exp/${ckpt_name} \
--- a/examples/librispeech/s0/local/export.sh
+++ b/examples/librispeech/s0/local/export.sh
@ -13,13 +13,7 @@ ckpt_path_prefix=$2
 jit_model_export_path=$3
 model_type=$4
 device=gpu
 if [ ${ngpu} == 0 ];then
    device=cpu
 fi
 python3 -u ${BIN_DIR}/export.py \
 --device ${device} \
 --nproc ${ngpu} \
 --config ${config_path} \
 --checkpoint_path ${ckpt_path_prefix} \
--- a/examples/librispeech/s0/local/test.sh
+++ b/examples/librispeech/s0/local/test.sh
@ -8,10 +8,6 @@ fi
 ngpu=$(echo $CUDA_VISIBLE_DEVICES | awk -F "," '{print NF}')
 echo "using $ngpu gpus..."
 device=gpu
 if [ ${ngpu} == 0 ];then
    device=cpu
 fi
 config_path=$1
 ckpt_prefix=$2
 model_type=$3
@ -23,8 +19,7 @@ if [ $? -ne 0 ]; then
 fi
 python3 -u ${BIN_DIR}/test.py \
--device ${device} \
+--nproc ${ngpu} \
 --nproc 1 \
 --config ${config_path} \
 --result_file ${ckpt_prefix}.rsl \
 --checkpoint_path ${ckpt_prefix} \
--- a/examples/librispeech/s0/local/train.sh
+++ b/examples/librispeech/s0/local/train.sh
@ -12,12 +12,6 @@ config_path=$1
 ckpt_name=$2
 model_type=$3
 device=gpu
 if [ ${ngpu} == 0 ];then
    device=cpu
 fi
 echo "using ${device}..."
 mkdir -p exp
 # seed may break model convergence
@ -27,7 +21,6 @@ if [ ${seed} != 0 ]; then
 fi
 python3 -u ${BIN_DIR}/train.py \
 --device ${device} \
 --nproc ${ngpu} \
 --config ${config_path} \
 --output exp/${ckpt_name} \
--- a/examples/librispeech/s1/cmd.sh
+++ b/examples/librispeech/s1/cmd.sh
@ -0,0 +1,89 @@
 # ====== About run.pl, queue.pl, slurm.pl, and ssh.pl ======
 # Usage: <cmd>.pl [options] JOB=1:<nj> <log> <command...>
 # e.g.
 #   run.pl --mem 4G JOB=1:10 echo.JOB.log echo JOB
 #
 # Options:
 #   --time <time>: Limit the maximum time to execute.
 #   --mem <mem>: Limit the maximum memory usage.
 #   -–max-jobs-run <njob>: Limit the number parallel jobs. This is ignored for non-array jobs.
 #   --num-threads <ngpu>: Specify the number of CPU core.
 #   --gpu <ngpu>: Specify the number of GPU devices.
 #   --config: Change the configuration file from default.
 #
 # "JOB=1:10" is used for "array jobs" and it can control the number of parallel jobs.
 # The left string of "=", i.e. "JOB", is replaced by <N>(Nth job) in the command and the log file name,
 # e.g. "echo JOB" is changed to "echo 3" for the 3rd job and "echo 8" for 8th job respectively.
 # Note that the number must start with a positive number, so you can't use "JOB=0:10" for example.
 #
 # run.pl, queue.pl, slurm.pl, and ssh.pl have unified interface, not depending on its backend.
 # These options are mapping to specific options for each backend and
 # it is configured by "conf/queue.conf" and "conf/slurm.conf" by default.
 # If jobs failed, your configuration might be wrong for your environment.
 #
 #
 # The official documentation for run.pl, queue.pl, slurm.pl, and ssh.pl:
 #   "Parallelization in Kaldi": http://kaldi-asr.org/doc/queue.html
 # =========================================================~
 # Select the backend used by run.sh from "local", "sge", "slurm", or "ssh"
 cmd_backend='local'
 # Local machine, without any Job scheduling system
 if [ "${cmd_backend}" = local ]; then
    # The other usage
    export train_cmd="run.pl"
    # Used for "*_train.py": "--gpu" is appended optionally by run.sh
    export cuda_cmd="run.pl"
    # Used for "*_recog.py"
    export decode_cmd="run.pl"
 # "qsub" (SGE, Torque, PBS, etc.)
 elif [ "${cmd_backend}" = sge ]; then
    # The default setting is written in conf/queue.conf.
    # You must change "-q g.q" for the "queue" for your environment.
    # To know the "queue" names, type "qhost -q"
    # Note that to use "--gpu *", you have to setup "complex_value" for the system scheduler.
    export train_cmd="queue.pl"
    export cuda_cmd="queue.pl"
    export decode_cmd="queue.pl"
 # "sbatch" (Slurm)
 elif [ "${cmd_backend}" = slurm ]; then
    # The default setting is written in conf/slurm.conf.
    # You must change "-p cpu" and "-p gpu" for the "partion" for your environment.
    # To know the "partion" names, type "sinfo".
    # You can use "--gpu * " by default for slurm and it is interpreted as "--gres gpu:*"
    # The devices are allocated exclusively using "${CUDA_VISIBLE_DEVICES}".
    export train_cmd="slurm.pl"
    export cuda_cmd="slurm.pl"
    export decode_cmd="slurm.pl"
 elif [ "${cmd_backend}" = ssh ]; then
    # You have to create ".queue/machines" to specify the host to execute jobs.
    # e.g. .queue/machines
    #   host1
    #   host2
    #   host3
    # Assuming you can login them without any password, i.e. You have to set ssh keys.
    export train_cmd="ssh.pl"
    export cuda_cmd="ssh.pl"
    export decode_cmd="ssh.pl"
 # This is an example of specifying several unique options in the JHU CLSP cluster setup.
 # Users can modify/add their own command options according to their cluster environments.
 elif [ "${cmd_backend}" = jhu ]; then
    export train_cmd="queue.pl --mem 2G"
    export cuda_cmd="queue-freegpu.pl --mem 2G --gpu 1 --config conf/gpu.conf"
    export decode_cmd="queue.pl --mem 4G"
 else
    echo "$0: Error: Unknown cmd_backend=${cmd_backend}" 1>&2
    return 1
 fi
--- a/examples/librispeech/s1/local/align.sh
+++ b/examples/librispeech/s1/local/align.sh
@ -8,10 +8,6 @@ fi
 ngpu=$(echo $CUDA_VISIBLE_DEVICES | awk -F "," '{print NF}')
 echo "using $ngpu gpus..."
 device=gpu
 if [ ${ngpu} == 0 ];then
    device=cpu
 fi
 config_path=$1
 ckpt_prefix=$2
@ -22,8 +18,7 @@ mkdir -p ${output_dir}
 # align dump in `result_file`
 # .tier, .TextGrid dump in `dir of result_file`
 python3 -u ${BIN_DIR}/alignment.py \
--device ${device} \
+--nproc ${ngpu} \
 --nproc 1 \
 --config ${config_path} \
 --result_file ${output_dir}/${type}.align \
 --checkpoint_path ${ckpt_prefix} \
--- a/examples/librispeech/s1/local/export.sh
+++ b/examples/librispeech/s1/local/export.sh
@ -12,13 +12,7 @@ config_path=$1
 ckpt_path_prefix=$2
 jit_model_export_path=$3
 device=gpu
 if [ ${ngpu} == 0 ];then
    device=cpu
 fi
 python3 -u ${BIN_DIR}/export.py \
 --device ${device} \
 --nproc ${ngpu} \
 --config ${config_path} \
 --checkpoint_path ${ckpt_path_prefix} \
--- a/examples/librispeech/s1/local/test.sh
+++ b/examples/librispeech/s1/local/test.sh
@ -1,20 +1,33 @@
 #!/bin/bash
-if [ $# != 2 ];then
+set -e
-    echo "usage: ${0} config_path ckpt_path_prefix"
+
 expdir=exp
 datadir=data
 nj=32
 lmtag=
 recog_set="test-clean test-other dev-clean dev-other"
 recog_set="test-clean"
 # bpemode (unigram or bpe)
 nbpe=5000
 bpemode=unigram
 bpeprefix="data/bpe_${bpemode}_${nbpe}"
 bpemodel=${bpeprefix}.model
 if [ $# != 3 ];then
    echo "usage: ${0} config_path dict_path ckpt_path_prefix"
    exit -1
 fi
 ngpu=$(echo $CUDA_VISIBLE_DEVICES | awk -F "," '{print NF}')
 echo "using $ngpu gpus..."
 device=gpu
 if [ ${ngpu} == 0 ];then
    device=cpu
 fi
 config_path=$1
-ckpt_prefix=$2
+dict=$2
 ckpt_prefix=$3
 chunk_mode=false
 if [[ ${config_path} =~ ^.*chunk_.*yaml$ ]];then
@ -29,44 +42,46 @@ echo "chunk mode ${chunk_mode}"
 #    exit 1
 #fi
-for type in attention ctc_greedy_search; do
+pids=() # initialize pids
    echo "decoding ${type}"
    if [ ${chunk_mode} == true ];then
        # stream decoding only support batchsize=1
        batch_size=1
    else
        batch_size=64
    fi
    python3 -u ${BIN_DIR}/test.py \
    --device ${device} \
    --nproc 1 \
    --config ${config_path} \
    --result_file ${ckpt_prefix}.${type}.rsl \
    --checkpoint_path ${ckpt_prefix} \
    --opts decoding.decoding_method ${type} decoding.batch_size ${batch_size}
-    if [ $? -ne 0 ]; then
+for dmethd in attention ctc_greedy_search ctc_prefix_beam_search attention_rescoring; do
-        echo "Failed in evaluation!"
+(
-        exit 1
+    for rtask in ${recog_set}; do
-    fi
+    (
-done
+        decode_dir=decode_${rtask}_${dmethd}_$(basename ${config_path%.*})_${lmtag}
        feat_recog_dir=${datadir}
        mkdir -p ${expdir}/${decode_dir}
        mkdir -p ${feat_recog_dir}
-for type in ctc_prefix_beam_search attention_rescoring; do
+        # split data
-    echo "decoding ${type}"
+        split_json.sh ${feat_recog_dir}/manifest.${rtask} ${nj}
        #### use CPU for decoding
        ngpu=0
        # set batchsize 0 to disable batch decoding
        batch_size=1
        ${decode_cmd} JOB=1:${nj} ${expdir}/${decode_dir}/log/decode.JOB.log \
            python3 -u ${BIN_DIR}/test.py \
-    --device ${device} \
+            --nproc ${ngpu} \
    --nproc 1 \
            --config ${config_path} \
-    --result_file ${ckpt_prefix}.${type}.rsl \
+            --result_file ${expdir}/${decode_dir}/data.JOB.json \
            --checkpoint_path ${ckpt_prefix} \
-    --opts decoding.decoding_method ${type} decoding.batch_size ${batch_size}
+            --opts decoding.decoding_method ${dmethd} \
            --opts decoding.batch_size ${batch_size} \
            --opts data.test_manifest ${feat_recog_dir}/split${nj}/JOB/manifest.${rtask}
        score_sclite.sh --bpe ${nbpe} --bpemodel ${bpemodel}.model --wer true ${expdir}/${decode_dir} ${dict}
-    if [ $? -ne 0 ]; then
+    ) &
-        echo "Failed in evaluation!"
+    pids+=($!) # store background pids
-        exit 1
+    done
-    fi
+) &
 pids+=($!) # store background pids
 done
 i=0; for pid in "${pids[@]}"; do wait ${pid} || ((++i)); done
 [ ${i} -gt 0 ] && echo "$0: ${i} background jobs are failed." && false
 echo "Finished"
 exit 0
--- a/examples/librispeech/s1/local/train.sh
+++ b/examples/librispeech/s1/local/train.sh
@ -11,12 +11,6 @@ echo "using $ngpu gpus..."
 config_path=$1
 ckpt_name=$2
 device=gpu
 if [ ${ngpu} == 0 ];then
    device=cpu
 fi
 echo "using ${device}..."
 mkdir -p exp
 # seed may break model convergence
@ -25,8 +19,10 @@ if [ ${seed} != 0 ]; then
    export FLAGS_cudnn_deterministic=True
 fi
 # export FLAGS_cudnn_exhaustive_search=true
 # export FLAGS_conv_workspace_size_limit=4000
 python3 -u ${BIN_DIR}/train.py \
 --device ${device} \
 --nproc ${ngpu} \
 --config ${config_path} \
 --output exp/${ckpt_name} \
--- a/examples/librispeech/s1/path.sh
+++ b/examples/librispeech/s1/path.sh
@ -1,6 +1,6 @@
 export MAIN_ROOT=`realpath ${PWD}/../../../`
-export PATH=${MAIN_ROOT}:${PWD}/utils:${PATH}
+export PATH=${MAIN_ROOT}:${MAIN_ROOT}/tools/sctk/bin:${PWD}/utils:${PATH}
 export LC_ALL=C
 # Use UTF-8 in Python to avoid UnicodeDecodeError when LC_ALL=C
--- a/examples/librispeech/s1/run.sh
+++ b/examples/librispeech/s1/run.sh
@ -1,12 +1,15 @@
 #!/bin/bash
 set -e
-source path.sh
+
 . ./path.sh || exit 1;
 . ./cmd.sh || exit 1;
 stage=0
 stop_stage=100
 conf_path=conf/transformer.yaml
 avg_num=5
-source ${MAIN_ROOT}/utils/parse_options.sh || exit 1;
+
 . ${MAIN_ROOT}/utils/parse_options.sh || exit 1;
 avg_ckpt=avg_${avg_num}
 ckpt=$(basename ${conf_path} | awk -F'.' '{print $1}')
--- a/examples/librispeech/s2/cmd.sh
+++ b/examples/librispeech/s2/cmd.sh
@ -0,0 +1,89 @@
 # ====== About run.pl, queue.pl, slurm.pl, and ssh.pl ======
 # Usage: <cmd>.pl [options] JOB=1:<nj> <log> <command...>
 # e.g.
 #   run.pl --mem 4G JOB=1:10 echo.JOB.log echo JOB
 #
 # Options:
 #   --time <time>: Limit the maximum time to execute.
 #   --mem <mem>: Limit the maximum memory usage.
 #   -–max-jobs-run <njob>: Limit the number parallel jobs. This is ignored for non-array jobs.
 #   --num-threads <ngpu>: Specify the number of CPU core.
 #   --gpu <ngpu>: Specify the number of GPU devices.
 #   --config: Change the configuration file from default.
 #
 # "JOB=1:10" is used for "array jobs" and it can control the number of parallel jobs.
 # The left string of "=", i.e. "JOB", is replaced by <N>(Nth job) in the command and the log file name,
 # e.g. "echo JOB" is changed to "echo 3" for the 3rd job and "echo 8" for 8th job respectively.
 # Note that the number must start with a positive number, so you can't use "JOB=0:10" for example.
 #
 # run.pl, queue.pl, slurm.pl, and ssh.pl have unified interface, not depending on its backend.
 # These options are mapping to specific options for each backend and
 # it is configured by "conf/queue.conf" and "conf/slurm.conf" by default.
 # If jobs failed, your configuration might be wrong for your environment.
 #
 #
 # The official documentation for run.pl, queue.pl, slurm.pl, and ssh.pl:
 #   "Parallelization in Kaldi": http://kaldi-asr.org/doc/queue.html
 # =========================================================~
 # Select the backend used by run.sh from "local", "sge", "slurm", or "ssh"
 cmd_backend='local'
 # Local machine, without any Job scheduling system
 if [ "${cmd_backend}" = local ]; then
    # The other usage
    export train_cmd="run.pl"
    # Used for "*_train.py": "--gpu" is appended optionally by run.sh
    export cuda_cmd="run.pl"
    # Used for "*_recog.py"
    export decode_cmd="run.pl"
 # "qsub" (SGE, Torque, PBS, etc.)
 elif [ "${cmd_backend}" = sge ]; then
    # The default setting is written in conf/queue.conf.
    # You must change "-q g.q" for the "queue" for your environment.
    # To know the "queue" names, type "qhost -q"
    # Note that to use "--gpu *", you have to setup "complex_value" for the system scheduler.
    export train_cmd="queue.pl"
    export cuda_cmd="queue.pl"
    export decode_cmd="queue.pl"
 # "sbatch" (Slurm)
 elif [ "${cmd_backend}" = slurm ]; then
    # The default setting is written in conf/slurm.conf.
    # You must change "-p cpu" and "-p gpu" for the "partion" for your environment.
    # To know the "partion" names, type "sinfo".
    # You can use "--gpu * " by default for slurm and it is interpreted as "--gres gpu:*"
    # The devices are allocated exclusively using "${CUDA_VISIBLE_DEVICES}".
    export train_cmd="slurm.pl"
    export cuda_cmd="slurm.pl"
    export decode_cmd="slurm.pl"
 elif [ "${cmd_backend}" = ssh ]; then
    # You have to create ".queue/machines" to specify the host to execute jobs.
    # e.g. .queue/machines
    #   host1
    #   host2
    #   host3
    # Assuming you can login them without any password, i.e. You have to set ssh keys.
    export train_cmd="ssh.pl"
    export cuda_cmd="ssh.pl"
    export decode_cmd="ssh.pl"
 # This is an example of specifying several unique options in the JHU CLSP cluster setup.
 # Users can modify/add their own command options according to their cluster environments.
 elif [ "${cmd_backend}" = jhu ]; then
    export train_cmd="queue.pl --mem 2G"
    export cuda_cmd="queue-freegpu.pl --mem 2G --gpu 1 --config conf/gpu.conf"
    export decode_cmd="queue.pl --mem 4G"
 else
    echo "$0: Error: Unknown cmd_backend=${cmd_backend}" 1>&2
    return 1
 fi
--- a/examples/librispeech/s2/conf/transformer.yaml
+++ b/examples/librispeech/s2/conf/transformer.yaml
@ -12,7 +12,7 @@ collator:
  stride_ms: 10.0
  window_ms: 25.0
  sortagrad: 0 # Feed samples from shortest to longest ; -1: enabled for all epochs, 0: disabled, other: enabled for 'other' epochs 
-  batch_size: 32 
+  batch_size: 30 
  maxlen_in: 512  # if input length  > maxlen-in, batchsize is automatically reduced
  maxlen_out: 150  # if output length > maxlen-out, batchsize is automatically reduced
  minibatches: 0 # for debug
@ -59,7 +59,7 @@ model:
    model_conf:
        ctc_weight: 0.3
        ctc_dropoutrate: 0.0
-        ctc_grad_norm_type: instance
+        ctc_grad_norm_type: batch
        lsm_weight: 0.1     # label smoothing option
        length_normalized_loss: false
@ -83,7 +83,7 @@ scheduler_conf:
  lr_decay: 1.0
 decoding:
-  batch_size: 64
+  batch_size: 1
  error_rate_type: wer
  decoding_method: attention  # 'attention', 'ctc_greedy_search', 'ctc_prefix_beam_search', 'attention_rescoring'
  lang_model_path: data/lm/common_crawl_00.prune01111.trie.klm
--- a/examples/librispeech/s2/local/align.sh
+++ b/examples/librispeech/s2/local/align.sh
@ -8,10 +8,6 @@ fi
 ngpu=$(echo $CUDA_VISIBLE_DEVICES | awk -F "," '{print NF}')
 echo "using $ngpu gpus..."
 device=gpu
 if [ ${ngpu} == 0 ];then
    device=cpu
 fi
 config_path=$1
 dict_path=$2
 ckpt_prefix=$3
@ -26,8 +22,7 @@ python3 -u ${BIN_DIR}/test.py \
 --model-name 'u2_kaldi' \
 --run-mode 'align' \
 --dict-path ${dict_path} \
--device ${device} \
+--nproc ${ngpu} \
 --nproc 1 \
 --config ${config_path} \
 --result-file ${output_dir}/${type}.align \
 --checkpoint_path ${ckpt_prefix} \
--- a/examples/librispeech/s2/local/export.sh
+++ b/examples/librispeech/s2/local/export.sh
@ -12,15 +12,9 @@ config_path=$1
 ckpt_path_prefix=$2
 jit_model_export_path=$3
 device=gpu
 if [ ${ngpu} == 0 ];then
    device=cpu
 fi
 python3 -u ${BIN_DIR}/test.py \
 --model-name 'u2_kaldi' \
 --run-mode 'export' \
 --device ${device} \
 --nproc ${ngpu} \
 --config ${config_path} \
 --checkpoint_path ${ckpt_path_prefix} \
--- a/examples/librispeech/s2/local/test.sh
+++ b/examples/librispeech/s2/local/test.sh
@ -1,5 +1,22 @@
 #!/bin/bash
 set -e
 expdir=exp
 datadir=data
 nj=32
 lmtag=
 recog_set="test-clean test-other dev-clean dev-other"
 recog_set="test-clean"
 # bpemode (unigram or bpe)
 nbpe=5000
 bpemode=unigram
 bpeprefix="data/bpe_${bpemode}_${nbpe}"
 bpemodel=${bpeprefix}.model
 if [ $# != 3 ];then
    echo "usage: ${0} config_path dict_path ckpt_path_prefix"
    exit -1
@ -8,13 +25,8 @@ fi
 ngpu=$(echo $CUDA_VISIBLE_DEVICES | awk -F "," '{print NF}')
 echo "using $ngpu gpus..."
 device=gpu
 if [ ${ngpu} == 0 ];then
    device=cpu
 fi
 config_path=$1
-dict_path=$2
+dict=$2
 ckpt_prefix=$3
 chunk_mode=false
@ -30,50 +42,49 @@ echo "chunk mode ${chunk_mode}"
 #    exit 1
 #fi
-for type in attention ctc_greedy_search; do
+pids=() # initialize pids
    echo "decoding ${type}"
    if [ ${chunk_mode} == true ];then
        # stream decoding only support batchsize=1
        batch_size=1
    else
        batch_size=64
    fi
    python3 -u ${BIN_DIR}/test.py \
    --model-name u2_kaldi \
    --run-mode test \
    --dict-path ${dict_path} \
    --device ${device} \
    --nproc 1 \
    --config ${config_path} \
    --result-file ${ckpt_prefix}.${type}.rsl \
    --checkpoint_path ${ckpt_prefix} \
    --opts decoding.decoding_method ${type} decoding.batch_size ${batch_size}
-    if [ $? -ne 0 ]; then
+for dmethd in attention ctc_greedy_search ctc_prefix_beam_search attention_rescoring; do
-        echo "Failed in evaluation!"
+(
-        exit 1
+    for rtask in ${recog_set}; do
-    fi
+    (
-done
+        decode_dir=decode_${rtask}_${dmethd}_$(basename ${config_path%.*})_${lmtag}
        feat_recog_dir=${datadir}
        mkdir -p ${expdir}/${decode_dir}
        mkdir -p ${feat_recog_dir}
        # split data
        split_json.sh ${feat_recog_dir}/manifest.${rtask} ${nj}
-for type in ctc_prefix_beam_search attention_rescoring; do
+        #### use CPU for decoding
-    echo "decoding ${type}"
+        ngpu=0
        # set batchsize 0 to disable batch decoding
        batch_size=1
        ${decode_cmd} JOB=1:${nj} ${expdir}/${decode_dir}/log/decode.JOB.log \
            python3 -u ${BIN_DIR}/test.py \
            --model-name u2_kaldi \
            --run-mode test \
-    --dict-path ${dict_path} \
+            --nproc ${ngpu} \
-    --device ${device} \
+            --dict-path ${dict} \
    --nproc 1 \
            --config ${config_path} \
    --result-file ${ckpt_prefix}.${type}.rsl \
            --checkpoint_path ${ckpt_prefix} \
-    --opts decoding.decoding_method ${type} decoding.batch_size ${batch_size}
+            --result-file ${expdir}/${decode_dir}/data.JOB.json \
            --opts decoding.decoding_method ${dmethd} \
            --opts decoding.batch_size ${batch_size} \
            --opts data.test_manifest ${feat_recog_dir}/split${nj}/JOB/manifest.${rtask}
        score_sclite.sh --bpe ${nbpe} --bpemodel ${bpemodel}.model --wer true ${expdir}/${decode_dir} ${dict}
-    if [ $? -ne 0 ]; then
+    ) &
-        echo "Failed in evaluation!"
+    pids+=($!) # store background pids
-        exit 1
+    done
-    fi
+) &
 pids+=($!) # store background pids
 done
 i=0; for pid in "${pids[@]}"; do wait ${pid} || ((++i)); done
 [ ${i} -gt 0 ] && echo "$0: ${i} background jobs are failed." && false
 echo "Finished"
 exit 0
--- a/examples/librispeech/s2/local/train.sh
+++ b/examples/librispeech/s2/local/train.sh
@ -11,12 +11,6 @@ echo "using $ngpu gpus..."
 config_path=$1
 ckpt_name=$2
 device=gpu
 if [ ${ngpu} == 0 ];then
    device=cpu
 fi
 echo "using ${device}..."
 mkdir -p exp
 # seed may break model convergence
@ -27,7 +21,6 @@ fi
 python3 -u ${BIN_DIR}/train.py \
 --model-name u2_kaldi \
 --device ${device} \
 --nproc ${ngpu} \
 --config ${config_path} \
 --output exp/${ckpt_name} \
--- a/examples/librispeech/s2/path.sh
+++ b/examples/librispeech/s2/path.sh
@ -1,6 +1,6 @@
 export MAIN_ROOT=`realpath ${PWD}/../../../`
-export PATH=${MAIN_ROOT}:${PWD}/utils:${PATH}
+export PATH=${MAIN_ROOT}:${MAIN_ROOT}/tools/sctk/bin:${PWD}/utils:${PATH}
 export LC_ALL=C
 # Use UTF-8 in Python to avoid UnicodeDecodeError when LC_ALL=C
--- a/examples/librispeech/s2/run.sh
+++ b/examples/librispeech/s2/run.sh
@ -1,12 +1,14 @@
 #!/bin/bash
 set -e
-source path.sh
+
 . ./path.sh || exit 1;
 . ./cmd.sh || exit 1;
 stage=0
 stop_stage=100
 conf_path=conf/transformer.yaml
 dict_path=data/train_960_unigram5000_units.txt
-avg_num=5
+avg_num=10
 source ${MAIN_ROOT}/utils/parse_options.sh || exit 1;
 avg_ckpt=avg_${avg_num}
@ -20,12 +22,12 @@ fi
 if [ ${stage} -le 1 ] && [ ${stop_stage} -ge 1 ]; then
    # train model, all `ckpt` under `exp` dir
-    CUDA_VISIBLE_DEVICES=0,1,2,3 ./local/train.sh ${conf_path}  ${ckpt}
+    CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 ./local/train.sh ${conf_path}  ${ckpt}
 fi
 if [ ${stage} -le 2 ] && [ ${stop_stage} -ge 2 ]; then
    # avg n best model
-    avg.sh best exp/${ckpt}/checkpoints ${avg_num}
+    avg.sh latest exp/${ckpt}/checkpoints ${avg_num}
 fi
 if [ ${stage} -le 3 ] && [ ${stop_stage} -ge 3 ]; then
--- a/examples/ted_en_zh/t0/local/test.sh
+++ b/examples/ted_en_zh/t0/local/test.sh
@ -8,10 +8,6 @@ fi
 ngpu=$(echo $CUDA_VISIBLE_DEVICES | awk -F "," '{print NF}')
 echo "using $ngpu gpus..."
 device=gpu
 if [ ${ngpu} == 0 ];then
    device=cpu
 fi
 config_path=$1
 ckpt_prefix=$2
@ -19,8 +15,7 @@ for type in fullsentence; do
    echo "decoding ${type}"
    batch_size=32
    python3 -u ${BIN_DIR}/test.py \
-    --device ${device} \
+    --nproc ${ngpu} \
    --nproc 1 \
    --config ${config_path} \
    --result_file ${ckpt_prefix}.${type}.rsl \
    --checkpoint_path ${ckpt_prefix} \
--- a/examples/ted_en_zh/t0/local/train.sh
+++ b/examples/ted_en_zh/t0/local/train.sh
@ -11,12 +11,6 @@ echo "using $ngpu gpus..."
 config_path=$1
 ckpt_name=$2
 device=gpu
 if [ ${ngpu} == 0 ];then
    device=cpu
 fi
 echo "using ${device}..."
 mkdir -p exp
 # seed may break model convergence
@ -26,7 +20,6 @@ if [ ${seed} != 0 ]; then
 fi
 python3 -u ${BIN_DIR}/train.py \
 --device ${device} \
 --nproc ${ngpu} \
 --config ${config_path} \
 --output exp/${ckpt_name} \
--- a/examples/timit/s1/local/align.sh
+++ b/examples/timit/s1/local/align.sh
@ -8,10 +8,6 @@ fi
 ngpu=$(echo $CUDA_VISIBLE_DEVICES | awk -F "," '{print NF}')
 echo "using $ngpu gpus..."
 device=gpu
 if [ ${ngpu} == 0 ];then
    device=cpu
 fi
 config_path=$1
 ckpt_prefix=$2
@ -22,8 +18,7 @@ mkdir -p ${output_dir}
 # align dump in `result_file`
 # .tier, .TextGrid dump in `dir of result_file`
 python3 -u ${BIN_DIR}/alignment.py \
--device ${device} \
+--nproc ${ngpu} \
 --nproc 1 \
 --config ${config_path} \
 --result_file ${output_dir}/${type}.align \
 --checkpoint_path ${ckpt_prefix} \
--- a/examples/timit/s1/local/export.sh
+++ b/examples/timit/s1/local/export.sh
@ -12,13 +12,7 @@ config_path=$1
 ckpt_path_prefix=$2
 jit_model_export_path=$3
 device=gpu
 if [ ${ngpu} == 0 ];then
    device=cpu
 fi
 python3 -u ${BIN_DIR}/export.py \
 --device ${device} \
 --nproc ${ngpu} \
 --config ${config_path} \
 --checkpoint_path ${ckpt_path_prefix} \
--- a/examples/timit/s1/local/test.sh
+++ b/examples/timit/s1/local/test.sh
@ -8,11 +8,6 @@ fi
 ngpu=$(echo $CUDA_VISIBLE_DEVICES | awk -F "," '{print NF}')
 echo "using $ngpu gpus..."
 device=gpu
 if [ ${ngpu} == 0 ];then
    device=cpu
 fi
 config_path=$1
 ckpt_prefix=$2
@ -37,8 +32,7 @@ for type in attention ctc_greedy_search; do
        batch_size=64
    fi
    python3 -u ${BIN_DIR}/test.py \
-    --device ${device} \
+    --nproc ${ngpu} \
    --nproc 1 \
    --config ${config_path} \
    --result_file ${ckpt_prefix}.${type}.rsl \
    --checkpoint_path ${ckpt_prefix} \
@ -54,8 +48,7 @@ for type in ctc_prefix_beam_search attention_rescoring; do
    echo "decoding ${type}"
    batch_size=1
    python3 -u ${BIN_DIR}/test.py \
-    --device ${device} \
+    --nproc ${ngpu}  \
    --nproc 1 \
    --config ${config_path} \
    --result_file ${ckpt_prefix}.${type}.rsl \
    --checkpoint_path ${ckpt_prefix} \
--- a/examples/timit/s1/local/train.sh
+++ b/examples/timit/s1/local/train.sh
@ -11,12 +11,6 @@ echo "using $ngpu gpus..."
 config_path=$1
 ckpt_name=$2
 device=gpu
 if [ ${ngpu} == 0 ];then
    device=cpu
 fi
 echo "using ${device}..."
 mkdir -p exp
 # seed may break model convergence
@ -26,7 +20,6 @@ if [ ${seed} != 0  ]; then
 fi
 python3 -u ${BIN_DIR}/train.py \
 --device ${device} \
 --nproc ${ngpu} \
 --config ${config_path} \
 --output exp/${ckpt_name} \
--- a/examples/tiny/s0/local/export.sh
+++ b/examples/tiny/s0/local/export.sh
@ -13,13 +13,7 @@ ckpt_path_prefix=$2
 jit_model_export_path=$3
 model_type=$4
 device=gpu
 if [ ${ngpu} == 0 ];then
    device=cpu
 fi
 python3 -u ${BIN_DIR}/export.py \
 --device ${device} \
 --nproc ${ngpu} \
 --config ${config_path} \
 --checkpoint_path ${ckpt_path_prefix} \
--- a/examples/tiny/s0/local/test.sh
+++ b/examples/tiny/s0/local/test.sh
@ -8,10 +8,6 @@ fi
 ngpu=$(echo $CUDA_VISIBLE_DEVICES | awk -F "," '{print NF}')
 echo "using $ngpu gpus..."
 device=gpu
 if [ ${ngpu} == 0 ];then
    device=cpu
 fi
 config_path=$1
 ckpt_prefix=$2
 model_type=$3
@ -23,8 +19,7 @@ if [ $? -ne 0 ]; then
 fi
 python3 -u ${BIN_DIR}/test.py \
--device ${device} \
+--nproc ${ngpu} \
 --nproc 1 \
 --config ${config_path} \
 --result_file ${ckpt_prefix}.rsl \
 --checkpoint_path ${ckpt_prefix} \
--- a/examples/tiny/s0/local/train.sh
+++ b/examples/tiny/s0/local/train.sh
@ -10,17 +10,11 @@ source ${MAIN_ROOT}/utils/parse_options.sh || exit 1;
 ngpu=$(echo $CUDA_VISIBLE_DEVICES | awk -F "," '{print NF}')
 echo "using $ngpu gpus..."
 device=gpu
 if [ ${ngpu} == 0 ];then
    device=cpu
 fi
 if [ ${seed} != 0  ]; then
    export FLAGS_cudnn_deterministic=True
    echo "using seed $seed & FLAGS_cudnn_deterministic=True ..."
 fi
 if [ $# != 3 ];then
    echo "usage: CUDA_VISIBLE_DEVICES=0 ${0} config_path ckpt_name model_type"
    exit -1
@ -33,7 +27,6 @@ model_type=$3
 mkdir -p exp
 python3 -u ${BIN_DIR}/train.py \
 --device ${device} \
 --nproc ${ngpu} \
 --config ${config_path} \
 --output exp/${ckpt_name} \
--- a/examples/tiny/s1/local/align.sh
+++ b/examples/tiny/s1/local/align.sh
@ -8,10 +8,6 @@ fi
 ngpu=$(echo $CUDA_VISIBLE_DEVICES | awk -F "," '{print NF}')
 echo "using $ngpu gpus..."
 device=gpu
 if [ ${ngpu} == 0 ];then
    device=cpu
 fi
 config_path=$1
 ckpt_prefix=$2
@ -22,8 +18,7 @@ mkdir -p ${output_dir}
 # align dump in `result_file`
 # .tier, .TextGrid dump in `dir of result_file`
 python3 -u ${BIN_DIR}/alignment.py \
--device ${device} \
+--nproc ${ngpu} \
 --nproc 1 \
 --config ${config_path} \
 --result_file ${output_dir}/${type}.align \
 --checkpoint_path ${ckpt_prefix} \
--- a/examples/tiny/s1/local/export.sh
+++ b/examples/tiny/s1/local/export.sh
@ -12,13 +12,7 @@ config_path=$1
 ckpt_path_prefix=$2
 jit_model_export_path=$3
 device=gpu
 if [ ${ngpu} == 0 ];then
    device=cpu
 fi
 python3 -u ${BIN_DIR}/export.py \
 --device ${device} \
 --nproc ${ngpu} \
 --config ${config_path} \
 --checkpoint_path ${ckpt_path_prefix} \
--- a/examples/tiny/s1/local/test.sh
+++ b/examples/tiny/s1/local/test.sh
@ -8,10 +8,6 @@ fi
 ngpu=$(echo $CUDA_VISIBLE_DEVICES | awk -F "," '{print NF}')
 echo "using $ngpu gpus..."
 device=gpu
 if [ ${ngpu} == 0 ];then
    device=cpu
 fi
 config_path=$1
 ckpt_prefix=$2
@ -35,8 +31,7 @@ for type in attention ctc_greedy_search; do
        batch_size=64
    fi
    python3 -u ${BIN_DIR}/test.py \
-    --device ${device} \
+    --nproc ${ngpu} \
    --nproc 1 \
    --config ${config_path} \
    --result_file ${ckpt_prefix}.${type}.rsl \
    --checkpoint_path ${ckpt_prefix} \
@ -52,8 +47,7 @@ for type in ctc_prefix_beam_search attention_rescoring; do
    echo "decoding ${type}"
    batch_size=1
    python3 -u ${BIN_DIR}/test.py \
-    --device ${device} \
+    --nproc ${ngpu} \
    --nproc 1 \
    --config ${config_path} \
    --result_file ${ckpt_prefix}.${type}.rsl \
    --checkpoint_path ${ckpt_prefix} \
--- a/examples/tiny/s1/local/train.sh
+++ b/examples/tiny/s1/local/train.sh
@ -12,11 +12,6 @@ source ${MAIN_ROOT}/utils/parse_options.sh || exit 1;
 ngpu=$(echo $CUDA_VISIBLE_DEVICES | awk -F "," '{print NF}')
 echo "using $ngpu gpus..."
 device=gpu
 if [ ${ngpu} == 0 ];then
    device=cpu
 fi
 if [ ${seed} != 0  ]; then
    export FLAGS_cudnn_deterministic=True
    echo "using seed $seed & FLAGS_cudnn_deterministic=True ..."
@ -34,7 +29,6 @@ mkdir -p exp
 python3 -u ${BIN_DIR}/train.py \
 --seed ${seed} \
 --device ${device} \
 --nproc ${ngpu} \
 --config ${config_path} \
 --output exp/${ckpt_name} \
--- a/requirements.txt
+++ b/requirements.txt
@ -1,7 +1,6 @@
 coverage
 gpustat
 jsonlines
 jsonlines
 kaldiio
 loguru
 Pillow
--- a/tests/chains/ds2_params_lite_train_infer.txt
+++ b/tests/chains/ds2_params_lite_train_infer.txt
@ -13,7 +13,7 @@ null:null
 null:null
 ##
 trainer:norm_train
-norm_train: ../../../deepspeech/exps/deepspeech2/bin/train.py --nproc 1 --config conf/deepspeech2.yaml --model_type offline --device gpu
+norm_train: ../../../deepspeech/exps/deepspeech2/bin/train.py --nproc 1 --config conf/deepspeech2.yaml --model_type offline
 pact_train:null
 fpgm_train:null
 distill_train:null
@ -21,7 +21,7 @@ null:null
 null:null
 ##
 ===========================eval_params===========================
-eval: ../../../deepspeech/exps/deepspeech2/bin/test.py --nproc 1 --config conf/deepspeech2.yaml --result_file tests/9.rsl  --model_type offline --device gpu
+eval: ../../../deepspeech/exps/deepspeech2/bin/test.py --nproc 1 --config conf/deepspeech2.yaml --result_file tests/9.rsl  --model_type offline
 null:null
 ##
 ===========================infer_params===========================
--- a/tools/Makefile
+++ b/tools/Makefile
@ -1,8 +1,16 @@
 SHELL:= /bin/bash
 PYTHON:= python3.7
 CXX ?= g++
 CC ?= gcc        # used for sph2pipe
 # CXX = clang++  # Uncomment these lines...
 # CC = clang     # ...to build with Clang.
 WGET ?= wget
 .PHONY: all clean
-all: virtualenv kenlm.done sox.done soxbindings.done mfa.done
+all: virtualenv kenlm.done sox.done soxbindings.done mfa.done sclite.done
 virtualenv:
 	test -d venv || virtualenv -p $(PYTHON) venv
@ -39,3 +47,50 @@ mfa.done:
 	test -d montreal-forced-aligner || wget https://github.com/MontrealCorpusTools/Montreal-Forced-Aligner/releases/download/v1.0.1/montreal-forced-aligner_linux.tar.gz
 	tar xvf montreal-forced-aligner_linux.tar.gz
 	touch mfa.done
 #== SCTK ===============================================================================
 # SCTK official repo does not have version tags. Here's the mapping:
 # # 2.4.9 = 659bc36; 2.4.10 = d914e1b; 2.4.11 = 20159b5.
 SCTK_GITHASH = 20159b5
 SCTK_CXFLAGS = -w -march=native
 SCTK_MKENV = CFLAGS="$(CFLAGS) $(SCTK_CXFLAGS)" \
 			              CXXFLAGS="$(CXXFLAGS) -std=c++11 $(SCTK_CXFLAGS)" \
 # Keep the existing target 'sclite' to avoid breaking the users who might have
 # scripted it in.
 .PHONY: sclite.done sctk_cleaned sctk_made
 sclite.done sctk_made: sctk/.compiled
 	touch sclite.done
 sctk/.compiled: sctk
 	rm -f sctk/.compiled
 	$(SCTK_MKENV) $(MAKE) -C sctk config
 	$(SCTK_MKENV) $(MAKE) -C sctk all doc
 	$(MAKE) -C sctk install
 	touch sctk/.compiled
 # The GitHub archive unpacks into SCTK-{40-character-long-hash}/
 sctk: sctk-$(SCTK_GITHASH).tar.gz
 	tar zxvf sctk-$(SCTK_GITHASH).tar.gz
 	rm -rf sctk-$(SCTK_GITHASH) sctk
 	mv SCTK-$(SCTK_GITHASH)* sctk-$(SCTK_GITHASH)
 	ln -s sctk-$(SCTK_GITHASH) sctk
 	touch sctk-$(SCTK_GITHASH).tar.gz
 sctk-$(SCTK_GITHASH).tar.gz:
 	if [ -d '$(DOWNLOAD_DIR)' ]; then \
 	  cp -p '$(DOWNLOAD_DIR)/sctk-$(SCTK_GITHASH).tar.gz' .; \
 	else \
 	  $(WGET) -nv -T 10 -t 3 -O sctk-$(SCTK_GITHASH).tar.gz \
 	    https://github.com/usnistgov/SCTK/archive/$(SCTK_GITHASH).tar.gz; \
 	fi
 sctk_cleaned:
 	-for d in sctk/ sctk-*/; do \
 	   [ ! -f $$d/.compiled ] || $(MAKE) -C $$d clean; \
 	   rm -f $$d/.compiled; \
 	done
--- a/utils/README.md
+++ b/utils/README.md
@ -0,0 +1,4 @@
 # Utils
 * [kaldi utils](https://github.com/kaldi-asr/kaldi/blob/cbed4ff688/egs/wsj/s5/utils)
 * [espnet utils)(https://github.com/espnet/espnet/tree/master/utils)
--- a/utils/avg_model.py
+++ b/utils/avg_model.py
@ -27,8 +27,9 @@ def main(args):
    val_scores = []
    beat_val_scores = []
    selected_epochs = []
-    if args.val_best:
+
    jsons = glob.glob(f'{args.ckpt_dir}/[!train]*.json')
    jsons = sorted(jsons, key=os.path.getmtime, reverse=True)
    for y in jsons:
        with open(y, 'r') as f:
            dic_json = json.load(f)
@ -36,24 +37,23 @@ def main(args):
        epoch = dic_json['epoch']
        if epoch >= args.min_epoch and epoch <= args.max_epoch:
            val_scores.append((epoch, loss))
    val_scores = np.array(val_scores)
    if args.val_best:
        sort_idx = np.argsort(val_scores[:, 1])
        sorted_val_scores = val_scores[sort_idx]
-        path_list = [
+    else:
-            args.ckpt_dir + '/{}.pdparams'.format(int(epoch))
+        sorted_val_scores = val_scores
            for epoch in sorted_val_scores[:args.num, 0]
        ]
    beat_val_scores = sorted_val_scores[:args.num, 1]
    selected_epochs = sorted_val_scores[:args.num, 0].astype(np.int64)
-        print("best val scores = " + str(beat_val_scores))
+    print("selected val scores = " + str(beat_val_scores))
    print("selected epochs = " + str(selected_epochs))
    else:
        path_list = glob.glob(f'{args.ckpt_dir}/[!avg][!final]*.pdparams')
        path_list = sorted(path_list, key=os.path.getmtime)
        path_list = path_list[-args.num:]
    path_list = [
        args.ckpt_dir + '/{}.pdparams'.format(int(epoch))
        for epoch in sorted_val_scores[:args.num, 0]
    ]
    print(path_list)
    avg = None
@ -78,6 +78,7 @@ def main(args):
    meta_path = os.path.splitext(args.dst_model)[0] + '.avg.json'
    with open(meta_path, 'w') as f:
        data = json.dumps({
            "mode": 'val_best' if args.val_best else 'latest',
            "avg_ckpt": args.dst_model,
            "ckpt": path_list,
            "epoch": selected_epochs.tolist(),
--- a/utils/build_kenlm_model_from_arpa.sh
+++ b/utils/build_kenlm_model_from_arpa.sh
@ -0,0 +1,44 @@
 #!/usr/bin/env bash
 # 2020 author Jiayu DU
 # Apache 2.0
 # This script reads in an Arpa format language model, and converts it into the
 # KenLM format language model.
 [ -f path.sh ] && . ./path.sh;
 # begin configuration section
 kenlm_opts="" # e.g. "-q 8 -b 8" for 8bits quantization
 model_type="trie" # "trie" or "probing". trie is smaller, probing is faster.
 # end configuration section
 . utils/parse_options.sh
 if [ $# != 2 ]; then
  echo "Usage: "
  echo "  $0 [options] <arpa-lm-path> <kenlm-path>"
  echo "e.g.:"
  echo "  $0 data/local/lm/4gram.arpa data/lang_test/G.trie"
  echo "Options:"
  echo "  --model-type can be either \"trie\" or \"probing\""
  echo "  --kenlm-opts directly pass through to kenlm"
  echo "    e.g. for 8bits quantization, feed \"-q 8 -b 8\""
  exit 1;
 fi
 export LC_ALL=C
 arpa_lm=$1
 kenlm=$2
 if ! which build_binary >& /dev/null ; then
  echo "$0: cannot find KenLM's build_binary tool,"
  echo "check kenlm installation (tools/extras/install_kenlm_query_only.sh)."
  exit 1
 fi
 mkdir -p $(dirname $kenlm)
 build_binary  $kenlm_opts  $model_type  $arpa_lm  $kenlm
 echo "$0: Successfully built arpa into kenlm format: $kenlm"
 exit 0
--- a/utils/dump_manifest.py
+++ b/utils/dump_manifest.py
--- a/utils/duration_from_maniefst.sh
+++ b/utils/duration_from_maniefst.sh
--- a/Show More
+++ b/Show More