From c6e8a33b735e943c42a0398bacf5bb125a8ceb54 Mon Sep 17 00:00:00 2001 From: Hui Zhang Date: Wed, 22 Sep 2021 12:17:38 +0000 Subject: [PATCH] fix set_device; more utils; args.opts support multi same name --- deepspeech/exps/deepspeech2/model.py | 4 +- deepspeech/exps/u2/model.py | 2 +- deepspeech/exps/u2_kaldi/model.py | 2 +- deepspeech/exps/u2_st/model.py | 2 +- deepspeech/training/cli.py | 23 +- deepspeech/training/trainer.py | 4 +- examples/librispeech/s1/local/test.sh | 6 +- examples/librispeech/s2/cmd.sh | 89 +++++++ examples/librispeech/s2/local/test.sh | 102 ++++---- examples/librispeech/s2/run.sh | 4 +- utils/README.md | 3 + utils/build_kenlm_model_from_arpa.sh | 44 ++++ utils/duration_from_maniefst.sh | 0 utils/log.sh | 0 utils/parallel/run.pl | 356 ++++++++++++++++++++++++++ utils/parse_options.sh | 0 utils/pd_env_collect.sh | 0 utils/profile.sh | 0 utils/run.pl | 1 + utils/score_sclite.sh | 125 +++++++++ utils/spk2utt_to_utt2spk.pl | 25 ++ utils/split_data.sh | 79 ++++++ utils/split_json.sh | 31 +++ utils/split_scp.pl | 212 +++++++++++++++ utils/train_arpa_with_kenlm.sh | 67 +++++ utils/utility.sh | 0 utils/utt2spk_to_spk2utt.pl | 38 +++ 27 files changed, 1158 insertions(+), 61 deletions(-) create mode 100644 examples/librispeech/s2/cmd.sh create mode 100644 utils/README.md create mode 100755 utils/build_kenlm_model_from_arpa.sh mode change 100644 => 100755 utils/duration_from_maniefst.sh mode change 100644 => 100755 utils/log.sh create mode 100755 utils/parallel/run.pl mode change 100644 => 100755 utils/parse_options.sh mode change 100644 => 100755 utils/pd_env_collect.sh mode change 100644 => 100755 utils/profile.sh create mode 100755 utils/run.pl create mode 100755 utils/score_sclite.sh create mode 100755 utils/spk2utt_to_utt2spk.pl create mode 100755 utils/split_data.sh create mode 100755 utils/split_json.sh create mode 100644 utils/split_scp.pl create mode 100755 utils/train_arpa_with_kenlm.sh mode change 100644 => 100755 utils/utility.sh create mode 100755 utils/utt2spk_to_spk2utt.pl diff --git a/deepspeech/exps/deepspeech2/model.py b/deepspeech/exps/deepspeech2/model.py index 8af2b02a..12053981 100644 --- a/deepspeech/exps/deepspeech2/model.py +++ b/deepspeech/exps/deepspeech2/model.py @@ -403,7 +403,7 @@ class DeepSpeech2Tester(DeepSpeech2Trainer): def setup(self): """Setup the experiment. """ - paddle.set_device('gpu' self.args.nprocs > 0 else 'cpu') + paddle.set_device('gpu' if self.args.nprocs > 0 else 'cpu') self.setup_output_dir() self.setup_checkpointer() @@ -635,7 +635,7 @@ class DeepSpeech2ExportTester(DeepSpeech2Tester): def setup(self): """Setup the experiment. """ - paddle.set_device('gpu' self.args.nprocs > 0 else 'cpu') + paddle.set_device('gpu' if self.args.nprocs > 0 else 'cpu') self.setup_output_dir() diff --git a/deepspeech/exps/u2/model.py b/deepspeech/exps/u2/model.py index bc46a104..4dd05489 100644 --- a/deepspeech/exps/u2/model.py +++ b/deepspeech/exps/u2/model.py @@ -653,7 +653,7 @@ class U2Tester(U2Trainer): def setup(self): """Setup the experiment. """ - paddle.set_device('gpu' self.args.nprocs > 0 else 'cpu') + paddle.set_device('gpu' if self.args.nprocs > 0 else 'cpu') self.setup_output_dir() self.setup_checkpointer() diff --git a/deepspeech/exps/u2_kaldi/model.py b/deepspeech/exps/u2_kaldi/model.py index be89c3d6..e8482aa9 100644 --- a/deepspeech/exps/u2_kaldi/model.py +++ b/deepspeech/exps/u2_kaldi/model.py @@ -637,7 +637,7 @@ class U2Tester(U2Trainer): def setup(self): """Setup the experiment. """ - paddle.set_device('gpu' self.args.nprocs > 0 else 'cpu') + paddle.set_device('gpu' if self.args.nprocs > 0 else 'cpu') self.setup_output_dir() self.setup_checkpointer() diff --git a/deepspeech/exps/u2_st/model.py b/deepspeech/exps/u2_st/model.py index 55dadee8..c98f5e69 100644 --- a/deepspeech/exps/u2_st/model.py +++ b/deepspeech/exps/u2_st/model.py @@ -661,7 +661,7 @@ class U2STTester(U2STTrainer): def setup(self): """Setup the experiment. """ - paddle.set_device('gpu' self.args.nprocs > 0 else 'cpu') + paddle.set_device('gpu' if self.args.nprocs > 0 else 'cpu') self.setup_output_dir() self.setup_checkpointer() diff --git a/deepspeech/training/cli.py b/deepspeech/training/cli.py index aa263a06..e079293c 100644 --- a/deepspeech/training/cli.py +++ b/deepspeech/training/cli.py @@ -14,6 +14,20 @@ import argparse +class ExtendAction(argparse.Action): + """ + [Since Python 3.8, the "extend" is available directly in stdlib] + (https://docs.python.org/3.8/library/argparse.html#action). + If you only have to support 3.8+ then defining it yourself is no longer required. + Usage of stdlib "extend" action is exactly the same way as this answer originally described: + """ + + def __call__(self, parser, namespace, values, option_string=None): + items = getattr(namespace, self.dest) or [] + items.extend(values) + setattr(namespace, self.dest, items) + + def default_argument_parser(): r"""A simple yet genral argument parser for experiments with parakeet. @@ -42,6 +56,7 @@ def default_argument_parser(): the parser """ parser = argparse.ArgumentParser() + parser.register('action', 'extend', ExtendAction) train_group = parser.add_argument_group( title='Train Options', description=None) @@ -64,10 +79,10 @@ def default_argument_parser(): "--checkpoint_path", type=str, help="path to load checkpoint") train_group.add_argument( "--opts", - type=str, - default=[], - nargs='+', - help="overwrite --config file, passing in LIST[KEY VALUE] pairs") + action='extend', + nargs=2, + metavar=('key', 'val'), + help="overwrite --config field, passing (KEY VALUE) pairs") train_group.add_argument( "--dump-config", metavar="FILE", help="dump config to `this` file.") diff --git a/deepspeech/training/trainer.py b/deepspeech/training/trainer.py index b180f489..79b1562e 100644 --- a/deepspeech/training/trainer.py +++ b/deepspeech/training/trainer.py @@ -119,7 +119,7 @@ class Trainer(): def setup(self): """Setup the experiment. """ - paddle.set_device('gpu' self.args.nprocs > 0 else 'cpu') + paddle.set_device('gpu' if self.args.nprocs > 0 else 'cpu') if self.parallel: self.init_parallel() @@ -139,7 +139,7 @@ class Trainer(): """A flag indicating whether the experiment should run with multiprocessing. """ - return elf.args.nprocs > 0 + return self.args.nprocs > 0 def init_parallel(self): """Init environment for multiprocess training. diff --git a/examples/librispeech/s1/local/test.sh b/examples/librispeech/s1/local/test.sh index f7ec34ab..62b92e1e 100755 --- a/examples/librispeech/s1/local/test.sh +++ b/examples/librispeech/s1/local/test.sh @@ -37,7 +37,8 @@ for type in attention ctc_greedy_search; do --config ${config_path} \ --result_file ${ckpt_prefix}.${type}.rsl \ --checkpoint_path ${ckpt_prefix} \ - --opts decoding.decoding_method ${type} decoding.batch_size ${batch_size} + --opts decoding.decoding_method ${type} \ + --opts decoding.batch_size ${batch_size} if [ $? -ne 0 ]; then echo "Failed in evaluation!" @@ -53,7 +54,8 @@ for type in ctc_prefix_beam_search attention_rescoring; do --config ${config_path} \ --result_file ${ckpt_prefix}.${type}.rsl \ --checkpoint_path ${ckpt_prefix} \ - --opts decoding.decoding_method ${type} decoding.batch_size ${batch_size} + --opts decoding.decoding_method ${type} \ + --opts decoding.batch_size ${batch_size} if [ $? -ne 0 ]; then echo "Failed in evaluation!" diff --git a/examples/librispeech/s2/cmd.sh b/examples/librispeech/s2/cmd.sh new file mode 100644 index 00000000..7b70ef5e --- /dev/null +++ b/examples/librispeech/s2/cmd.sh @@ -0,0 +1,89 @@ +# ====== About run.pl, queue.pl, slurm.pl, and ssh.pl ====== +# Usage: .pl [options] JOB=1: +# e.g. +# run.pl --mem 4G JOB=1:10 echo.JOB.log echo JOB +# +# Options: +# --time