From 9dace625818a4ca282d82dcdbf2e7a18f6a8779d Mon Sep 17 00:00:00 2001
From: Hui Zhang <zhtclz@foxmail.com>
Date: Thu, 19 Aug 2021 03:12:35 +0000
Subject: [PATCH] fix augmentation

---
 deepspeech/exps/u2_kaldi/bin/alignment.py     | 54 -------------------
 deepspeech/exps/u2_kaldi/bin/export.py        | 48 -----------------
 deepspeech/exps/u2_kaldi/bin/test.py          | 17 ++++--
 deepspeech/exps/u2_kaldi/bin/train.py         |  4 +-
 deepspeech/frontend/augmentor/augmentation.py | 14 ++---
 examples/librispeech/s2/local/align.sh        |  3 +-
 examples/librispeech/s2/local/export.sh       |  3 +-
 examples/librispeech/s2/local/test.sh         |  2 +
 8 files changed, 29 insertions(+), 116 deletions(-)
 delete mode 100644 deepspeech/exps/u2_kaldi/bin/alignment.py
 delete mode 100644 deepspeech/exps/u2_kaldi/bin/export.py

diff --git a/deepspeech/exps/u2_kaldi/bin/alignment.py b/deepspeech/exps/u2_kaldi/bin/alignment.py
deleted file mode 100644
index 3bc70f5a..00000000
--- a/deepspeech/exps/u2_kaldi/bin/alignment.py
+++ /dev/null
@@ -1,54 +0,0 @@
-# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""Alignment for U2 model."""
-from deepspeech.exps.u2.model import get_cfg_defaults
-from deepspeech.exps.u2.model import U2Tester as Tester
-from deepspeech.training.cli import default_argument_parser
-from deepspeech.utils.dynamic_import import dynamic_import
-from deepspeech.utils.utility import print_arguments
-
-
-def main_sp(config, args):
-    exp = Tester(config, args)
-    exp.setup()
-    exp.run_align()
-
-
-def main(config, args):
-    main_sp(config, args)
-
-
-if __name__ == "__main__":
-    parser = default_argument_parser()
-    parser.add_arguments(
-        '--model-name',
-        type=str,
-        default='u2',
-        help='model name, e.g: deepspeech2, u2, u2_kaldi, u2_st')
-    args = parser.parse_args()
-    print_arguments(args, globals())
-
-    # https://yaml.org/type/float.html
-    config = get_cfg_defaults()
-    if args.config:
-        config.merge_from_file(args.config)
-    if args.opts:
-        config.merge_from_list(args.opts)
-    config.freeze()
-    print(config)
-    if args.dump_config:
-        with open(args.dump_config, 'w') as f:
-            print(config, file=f)
-
-    main(config, args)
diff --git a/deepspeech/exps/u2_kaldi/bin/export.py b/deepspeech/exps/u2_kaldi/bin/export.py
deleted file mode 100644
index 91967627..00000000
--- a/deepspeech/exps/u2_kaldi/bin/export.py
+++ /dev/null
@@ -1,48 +0,0 @@
-# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""Export for U2 model."""
-from deepspeech.exps.u2.model import get_cfg_defaults
-from deepspeech.exps.u2.model import U2Tester as Tester
-from deepspeech.training.cli import default_argument_parser
-from deepspeech.utils.utility import print_arguments
-
-
-def main_sp(config, args):
-    exp = Tester(config, args)
-    exp.setup()
-    exp.run_export()
-
-
-def main(config, args):
-    main_sp(config, args)
-
-
-if __name__ == "__main__":
-    parser = default_argument_parser()
-    args = parser.parse_args()
-    print_arguments(args, globals())
-
-    # https://yaml.org/type/float.html
-    config = get_cfg_defaults()
-    if args.config:
-        config.merge_from_file(args.config)
-    if args.opts:
-        config.merge_from_list(args.opts)
-    config.freeze()
-    print(config)
-    if args.dump_config:
-        with open(args.dump_config, 'w') as f:
-            print(config, file=f)
-
-    main(config, args)
diff --git a/deepspeech/exps/u2_kaldi/bin/test.py b/deepspeech/exps/u2_kaldi/bin/test.py
index 06504827..457672c0 100644
--- a/deepspeech/exps/u2_kaldi/bin/test.py
+++ b/deepspeech/exps/u2_kaldi/bin/test.py
@@ -18,17 +18,23 @@ from deepspeech.training.cli import default_argument_parser
 from deepspeech.utils.dynamic_import import dynamic_import
 from deepspeech.utils.utility import print_arguments
 
-model_alias = {
+model_test_alias = {
     "u2": "deepspeech.exps.u2.model:U2Tester",
     "u2_kaldi": "deepspeech.exps.u2_kaldi.model:U2Tester",
 }
 
 
 def main_sp(config, args):
-    class_obj = dynamic_import(args.model_name, model_alias)
+    class_obj = dynamic_import(args.model_name, model_test_alias)
     exp = class_obj(config, args)
     exp.setup()
-    exp.run_test()
+
+    if args.run_mode == 'test':
+        exp.run_test()
+    elif args.run_mode == 'export':
+        exp.run_export()
+    elif args.run_mode == 'align':
+        exp.run_align()
 
 
 def main(config, args):
@@ -42,6 +48,11 @@ if __name__ == "__main__":
         type=str,
         default='u2_kaldi',
         help='model name, e.g: deepspeech2, u2, u2_kaldi, u2_st')
+    parser.add_argument(
+        '--run-mode',
+        type=str,
+        default='test',
+        help='run mode, e.g. test, align, export')
     args = parser.parse_args()
     print_arguments(args, globals())
 
diff --git a/deepspeech/exps/u2_kaldi/bin/train.py b/deepspeech/exps/u2_kaldi/bin/train.py
index 3a240b80..1dcd154d 100644
--- a/deepspeech/exps/u2_kaldi/bin/train.py
+++ b/deepspeech/exps/u2_kaldi/bin/train.py
@@ -22,14 +22,14 @@ from deepspeech.training.cli import default_argument_parser
 from deepspeech.utils.dynamic_import import dynamic_import
 from deepspeech.utils.utility import print_arguments
 
-model_alias = {
+model_train_alias = {
     "u2": "deepspeech.exps.u2.model:U2Trainer",
     "u2_kaldi": "deepspeech.exps.u2_kaldi.model:U2Trainer",
 }
 
 
 def main_sp(config, args):
-    class_obj = dynamic_import(args.model_name, model_alias)
+    class_obj = dynamic_import(args.model_name, model_train_alias)
     exp = class_obj(config, args)
     exp.setup()
     exp.run()
diff --git a/deepspeech/frontend/augmentor/augmentation.py b/deepspeech/frontend/augmentor/augmentation.py
index 7b43988e..c479958f 100644
--- a/deepspeech/frontend/augmentor/augmentation.py
+++ b/deepspeech/frontend/augmentor/augmentation.py
@@ -97,14 +97,14 @@ class AugmentationPipeline():
         ValueError: If the augmentation json config is in incorrect format".
     """
 
+    SPEC_TYPES = ('specaug')
+
     def __init__(self, augmentation_config: str, random_seed: int=0):
         self._rng = np.random.RandomState(random_seed)
-        self._spec_types = ('specaug')
-
-        if augmentation_config is None:
-            self.conf = {}
-        else:
-            self.conf = json.loads(augmentation_config)
+        self.conf = {'mode': 'sequential', 'process': []}
+        if augmentation_config:
+            process = json.loads(augmentation_config)
+            self.conf['process'] += process
 
         self._augmentors, self._rates = self._parse_pipeline_from('all')
         self._audio_augmentors, self._audio_rates = self._parse_pipeline_from(
@@ -188,7 +188,7 @@ class AugmentationPipeline():
         all_confs = []
         for config in self.conf:
             all_confs.append(config)
-            if config["type"] in self._spec_types:
+            if config["type"] in self.SPEC_TYPES:
                 feature_confs.append(config)
             else:
                 audio_confs.append(config)
diff --git a/examples/librispeech/s2/local/align.sh b/examples/librispeech/s2/local/align.sh
index ad6c84bc..94146ccf 100755
--- a/examples/librispeech/s2/local/align.sh
+++ b/examples/librispeech/s2/local/align.sh
@@ -21,7 +21,8 @@ mkdir -p ${output_dir}
 
 # align dump in `result_file`
 # .tier, .TextGrid dump in `dir of result_file`
-python3 -u ${BIN_DIR}/alignment.py \
+python3 -u ${BIN_DIR}/test.py \
+--run_mode 'align' \
 --device ${device} \
 --nproc 1 \
 --config ${config_path} \
diff --git a/examples/librispeech/s2/local/export.sh b/examples/librispeech/s2/local/export.sh
index f99a15ba..7e42e011 100755
--- a/examples/librispeech/s2/local/export.sh
+++ b/examples/librispeech/s2/local/export.sh
@@ -17,7 +17,8 @@ if [ ${ngpu} == 0 ];then
     device=cpu
 fi
 
-python3 -u ${BIN_DIR}/export.py \
+python3 -u ${BIN_DIR}/test.py \
+--run_mode 'export' \
 --device ${device} \
 --nproc ${ngpu} \
 --config ${config_path} \
diff --git a/examples/librispeech/s2/local/test.sh b/examples/librispeech/s2/local/test.sh
index 3bd3f0bb..762211c2 100755
--- a/examples/librispeech/s2/local/test.sh
+++ b/examples/librispeech/s2/local/test.sh
@@ -38,6 +38,7 @@ for type in attention ctc_greedy_search; do
         batch_size=64
     fi
     python3 -u ${BIN_DIR}/test.py \
+    --run_mode test \
     --device ${device} \
     --nproc 1 \
     --config ${config_path} \
@@ -55,6 +56,7 @@ for type in ctc_prefix_beam_search attention_rescoring; do
     echo "decoding ${type}"
     batch_size=1
     python3 -u ${BIN_DIR}/test.py \
+    --run_mode test \
     --device ${device} \
     --nproc 1 \
     --config ${config_path} \