From 8b45c3e65e95ccfb4776d2e227f16949d09bc090 Mon Sep 17 00:00:00 2001 From: Hui Zhang Date: Sat, 9 Oct 2021 11:28:15 +0000 Subject: [PATCH] refactor trainer.py and rm ueseless dir setup code --- deepspeech/exps/deepspeech2/model.py | 73 -------------- deepspeech/exps/u2/model.py | 48 --------- deepspeech/exps/u2_kaldi/model.py | 48 +-------- deepspeech/exps/u2_st/model.py | 48 --------- deepspeech/modules/loss.py | 1 - deepspeech/training/trainer.py | 140 +++++++++++++++++++++------ 6 files changed, 110 insertions(+), 248 deletions(-) diff --git a/deepspeech/exps/deepspeech2/model.py b/deepspeech/exps/deepspeech2/model.py index 3dc8286d..3ebbbe7a 100644 --- a/deepspeech/exps/deepspeech2/model.py +++ b/deepspeech/exps/deepspeech2/model.py @@ -386,13 +386,6 @@ class DeepSpeech2Tester(DeepSpeech2Trainer): logger.info(msg) self.autolog.report() - def run_test(self): - self.resume_or_scratch() - try: - self.test() - except KeyboardInterrupt: - exit(-1) - def export(self): if self.args.model_type == 'offline': infer_model = DeepSpeech2InferModel.from_pretrained( @@ -409,40 +402,6 @@ class DeepSpeech2Tester(DeepSpeech2Trainer): logger.info(f"Export code: {static_model.forward.code}") paddle.jit.save(static_model, self.args.export_path) - def run_export(self): - try: - self.export() - except KeyboardInterrupt: - exit(-1) - - def setup(self): - """Setup the experiment. - """ - paddle.set_device('gpu' if self.args.nprocs > 0 else 'cpu') - - self.setup_output_dir() - self.setup_checkpointer() - - self.setup_dataloader() - self.setup_model() - - self.iteration = 0 - self.epoch = 0 - - def setup_output_dir(self): - """Create a directory used for output. - """ - # output dir - if self.args.output: - output_dir = Path(self.args.output).expanduser() - output_dir.mkdir(parents=True, exist_ok=True) - else: - output_dir = Path( - self.args.checkpoint_path).expanduser().parent.parent - output_dir.mkdir(parents=True, exist_ok=True) - - self.output_dir = output_dir - class DeepSpeech2ExportTester(DeepSpeech2Tester): def __init__(self, config, args): @@ -646,38 +605,6 @@ class DeepSpeech2ExportTester(DeepSpeech2Tester): output_lens = output_lens_handle.copy_to_cpu() return output_probs, output_lens - def run_test(self): - try: - self.test() - except KeyboardInterrupt: - exit(-1) - - def setup(self): - """Setup the experiment. - """ - paddle.set_device('gpu' if self.args.nprocs > 0 else 'cpu') - - self.setup_output_dir() - - self.setup_dataloader() - self.setup_model() - - self.iteration = 0 - self.epoch = 0 - - def setup_output_dir(self): - """Create a directory used for output. - """ - # output dir - if self.args.output: - output_dir = Path(self.args.output).expanduser() - output_dir.mkdir(parents=True, exist_ok=True) - else: - output_dir = Path(self.args.export_path).expanduser().parent.parent - output_dir.mkdir(parents=True, exist_ok=True) - - self.output_dir = output_dir - def setup_model(self): super().setup_model() speedyspeech_config = inference.Config( diff --git a/deepspeech/exps/u2/model.py b/deepspeech/exps/u2/model.py index 65ec5174..beb91d5d 100644 --- a/deepspeech/exps/u2/model.py +++ b/deepspeech/exps/u2/model.py @@ -551,13 +551,6 @@ class U2Tester(U2Trainer): }) f.write(data + '\n') - def run_test(self): - self.resume_or_scratch() - try: - self.test() - except KeyboardInterrupt: - sys.exit(-1) - @paddle.no_grad() def align(self): if self.config.decoding.batch_size > 1: @@ -617,13 +610,6 @@ class U2Tester(U2Trainer): intervals=tierformat, output=str(textgrid_path)) - def run_align(self): - self.resume_or_scratch() - try: - self.align() - except KeyboardInterrupt: - sys.exit(-1) - def load_inferspec(self): """infer model and input spec. @@ -651,37 +637,3 @@ class U2Tester(U2Trainer): static_model = paddle.jit.to_static(infer_model, input_spec=input_spec) logger.info(f"Export code: {static_model.forward.code}") paddle.jit.save(static_model, self.args.export_path) - - def run_export(self): - try: - self.export() - except KeyboardInterrupt: - sys.exit(-1) - - def setup(self): - """Setup the experiment. - """ - paddle.set_device('gpu' if self.args.nprocs > 0 else 'cpu') - - self.setup_output_dir() - self.setup_checkpointer() - - self.setup_dataloader() - self.setup_model() - - self.iteration = 0 - self.epoch = 0 - - def setup_output_dir(self): - """Create a directory used for output. - """ - # output dir - if self.args.output: - output_dir = Path(self.args.output).expanduser() - output_dir.mkdir(parents=True, exist_ok=True) - else: - output_dir = Path( - self.args.checkpoint_path).expanduser().parent.parent - output_dir.mkdir(parents=True, exist_ok=True) - - self.output_dir = output_dir diff --git a/deepspeech/exps/u2_kaldi/model.py b/deepspeech/exps/u2_kaldi/model.py index 5a72e44d..48950fc8 100644 --- a/deepspeech/exps/u2_kaldi/model.py +++ b/deepspeech/exps/u2_kaldi/model.py @@ -525,13 +525,6 @@ class U2Tester(U2Trainer): }) f.write(data + '\n') - def run_test(self): - self.resume_or_scratch() - try: - self.test() - except KeyboardInterrupt: - sys.exit(-1) - @paddle.no_grad() def align(self): if self.config.decoding.batch_size > 1: @@ -591,13 +584,6 @@ class U2Tester(U2Trainer): intervals=tierformat, output=str(textgrid_path)) - def run_align(self): - self.resume_or_scratch() - try: - self.align() - except KeyboardInterrupt: - sys.exit(-1) - def load_inferspec(self): """infer model and input spec. @@ -626,43 +612,11 @@ class U2Tester(U2Trainer): logger.info(f"Export code: {static_model.forward.code}") paddle.jit.save(static_model, self.args.export_path) - def run_export(self): - try: - self.export() - except KeyboardInterrupt: - sys.exit(-1) - def setup_dict(self): # load dictionary for debug log self.args.char_list = load_dict(self.args.dict_path, "maskctc" in self.args.model_name) def setup(self): - """Setup the experiment. - """ - paddle.set_device('gpu' if self.args.nprocs > 0 else 'cpu') - - self.setup_output_dir() - self.setup_checkpointer() - - self.setup_dataloader() - self.setup_model() - + super().setup() self.setup_dict() - - self.iteration = 0 - self.epoch = 0 - - def setup_output_dir(self): - """Create a directory used for output. - """ - # output dir - if self.args.output: - output_dir = Path(self.args.output).expanduser() - output_dir.mkdir(parents=True, exist_ok=True) - else: - output_dir = Path( - self.args.checkpoint_path).expanduser().parent.parent - output_dir.mkdir(parents=True, exist_ok=True) - - self.output_dir = output_dir diff --git a/deepspeech/exps/u2_st/model.py b/deepspeech/exps/u2_st/model.py index 08060d97..2d228d29 100644 --- a/deepspeech/exps/u2_st/model.py +++ b/deepspeech/exps/u2_st/model.py @@ -545,13 +545,6 @@ class U2STTester(U2STTrainer): }) f.write(data + '\n') - def run_test(self): - self.resume_or_scratch() - try: - self.test() - except KeyboardInterrupt: - sys.exit(-1) - @paddle.no_grad() def align(self): if self.config.decoding.batch_size > 1: @@ -611,13 +604,6 @@ class U2STTester(U2STTrainer): intervals=tierformat, output=str(textgrid_path)) - def run_align(self): - self.resume_or_scratch() - try: - self.align() - except KeyboardInterrupt: - sys.exit(-1) - def load_inferspec(self): """infer model and input spec. @@ -645,37 +631,3 @@ class U2STTester(U2STTrainer): static_model = paddle.jit.to_static(infer_model, input_spec=input_spec) logger.info(f"Export code: {static_model.forward.code}") paddle.jit.save(static_model, self.args.export_path) - - def run_export(self): - try: - self.export() - except KeyboardInterrupt: - sys.exit(-1) - - def setup(self): - """Setup the experiment. - """ - paddle.set_device('gpu' if self.args.nprocs > 0 else 'cpu') - - self.setup_output_dir() - self.setup_checkpointer() - - self.setup_dataloader() - self.setup_model() - - self.iteration = 0 - self.epoch = 0 - - def setup_output_dir(self): - """Create a directory used for output. - """ - # output dir - if self.args.output: - output_dir = Path(self.args.output).expanduser() - output_dir.mkdir(parents=True, exist_ok=True) - else: - output_dir = Path( - self.args.checkpoint_path).expanduser().parent.parent - output_dir.mkdir(parents=True, exist_ok=True) - - self.output_dir = output_dir diff --git a/deepspeech/modules/loss.py b/deepspeech/modules/loss.py index 71ecd266..e06f26f8 100644 --- a/deepspeech/modules/loss.py +++ b/deepspeech/modules/loss.py @@ -12,7 +12,6 @@ # See the License for the specific language governing permissions and # limitations under the License. import inspect -from functools import partial import paddle from paddle import nn diff --git a/deepspeech/training/trainer.py b/deepspeech/training/trainer.py index c3e1bec8..a14cd7a0 100644 --- a/deepspeech/training/trainer.py +++ b/deepspeech/training/trainer.py @@ -14,6 +14,7 @@ import sys import time from collections import OrderedDict +from contextlib import contextmanager from pathlib import Path import paddle @@ -103,14 +104,28 @@ class Trainer(): self.iteration = 0 self.epoch = 0 self.rank = dist.get_rank() + self.world_size = dist.get_world_size() + self._train = True + # print deps version all_version() - logger.info(f"Rank: {self.rank}/{dist.get_world_size()}") + logger.info(f"Rank: {self.rank}/{self.world_size}") + # set device + paddle.set_device('gpu' if self.args.nprocs > 0 else 'cpu') + if self.parallel: + self.init_parallel() + + self.checkpoint = Checkpoint( + kbest_n=self.config.training.checkpoint.kbest_n, + latest_n=self.config.training.checkpoint.latest_n) + + # set random seed if needed if args.seed: seed_all(args.seed) logger.info(f"Set seed {args.seed}") + # profiler and benchmark options if self.args.benchmark_batch_size: with UpdateConfig(self.config): self.config.collator.batch_size = self.args.benchmark_batch_size @@ -118,17 +133,18 @@ class Trainer(): logger.info( f"Benchmark reset batch-size: {self.args.benchmark_batch_size}") + @contextmanager + def eval(self): + self._train = False + yield + self._train = True + def setup(self): """Setup the experiment. """ - paddle.set_device('gpu' if self.args.nprocs > 0 else 'cpu') - if self.parallel: - self.init_parallel() - self.setup_output_dir() self.dump_config() self.setup_visualizer() - self.setup_checkpointer() self.setup_dataloader() self.setup_model() @@ -183,8 +199,8 @@ class Trainer(): if infos: # just restore ckpt # lr will resotre from optimizer ckpt - self.iteration = infos["step"] - self.epoch = infos["epoch"] + self.iteration = infos["step"] + 1 + self.epoch = infos["epoch"] + 1 scratch = False logger.info( f"Restore ckpt: epoch {self.epoch }, step {self.iteration}!") @@ -302,37 +318,74 @@ class Trainer(): """The routine of the experiment after setup. This method is intended to be used by the user. """ - with Timer("Training Done: {}"): - try: + try: + with Timer("Training Done: {}"): self.train() - except KeyboardInterrupt: - exit(-1) - finally: - self.destory() + except KeyboardInterrupt: + exit(-1) + finally: + self.destory() + + def run_test(self): + """Do Test/Decode""" + try: + with Timer("Test/Decode Done: {}"): + with self.eval(): + self.resume_or_scratch() + self.test() + except KeyboardInterrupt: + exit(-1) + + def run_export(self): + """Do Model Export""" + try: + with Timer("Export Done: {}"): + with self.eval(): + self.export() + except KeyboardInterrupt: + exit(-1) + + def run_align(self): + """Do CTC alignment""" + try: + with Timer("Align Done: {}"): + with self.eval(): + self.resume_or_scratch() + self.align() + except KeyboardInterrupt: + sys.exit(-1) def setup_output_dir(self): """Create a directory used for output. """ - # output dir - output_dir = Path(self.args.output).expanduser() - output_dir.mkdir(parents=True, exist_ok=True) - + if self.args.output: + output_dir = Path(self.args.output).expanduser() + elif self.args.checkpoint_path: + output_dir = Path( + self.args.checkpoint_path).expanduser().parent.parent self.output_dir = output_dir + self.output_dir.mkdir(parents=True, exist_ok=True) - def setup_checkpointer(self): - """Create a directory used to save checkpoints into. + self.checkpoint_dir = self.output_dir / "checkpoints" + self.checkpoint_dir.mkdir(parents=True, exist_ok=True) - It is "checkpoints" inside the output directory. - """ - # checkpoint dir - checkpoint_dir = self.output_dir / "checkpoints" - checkpoint_dir.mkdir(exist_ok=True) + self.log_dir = output_dir / "log" + self.log_dir.mkdir(parents=True, exist_ok=True) - self.checkpoint_dir = checkpoint_dir + self.test_dir = output_dir / "test" + self.test_dir.mkdir(parents=True, exist_ok=True) - self.checkpoint = Checkpoint( - kbest_n=self.config.training.checkpoint.kbest_n, - latest_n=self.config.training.checkpoint.latest_n) + self.decode_dir = output_dir / "decode" + self.decode_dir.mkdir(parents=True, exist_ok=True) + + self.export_dir = output_dir / "export" + self.export_dir.mkdir(parents=True, exist_ok=True) + + self.visual_dir = output_dir / "visual" + self.visual_dir.mkdir(parents=True, exist_ok=True) + + self.config_dir = output_dir / "conf" + self.config_dir.mkdir(parents=True, exist_ok=True) @mp_tools.rank_zero_only def destory(self): @@ -354,7 +407,7 @@ class Trainer(): unexpected behaviors. """ # visualizer - visualizer = SummaryWriter(logdir=str(self.output_dir)) + visualizer = SummaryWriter(logdir=str(self.visual_dir)) self.visualizer = visualizer @mp_tools.rank_zero_only @@ -364,7 +417,14 @@ class Trainer(): It is saved in to ``config.yaml`` in the output directory at the beginning of the experiment. """ - with open(self.output_dir / "config.yaml", 'wt') as f: + config_file = self.config_dir / "config.yaml" + if self._train and config_file.exists(): + time_stamp = time.strftime("%Y_%m_%d_%H_%M_%s", time.gmtime()) + target_path = self.config_dir / ".".join( + [time_stamp, "config.yaml"]) + config_file.rename(target_path) + + with open(config_file, 'wt') as f: print(self.config, file=f) def train_batch(self): @@ -378,6 +438,24 @@ class Trainer(): """ raise NotImplementedError("valid should be implemented.") + @paddle.no_grad() + def test(self): + """The test. A subclass should implement this method in Tester. + """ + raise NotImplementedError("test should be implemented.") + + @paddle.no_grad() + def export(self): + """The test. A subclass should implement this method in Tester. + """ + raise NotImplementedError("export should be implemented.") + + @paddle.no_grad() + def align(self): + """The align. A subclass should implement this method in Tester. + """ + raise NotImplementedError("align should be implemented.") + def setup_model(self): """Setup model, criterion and optimizer, etc. A subclass should implement this method.