add timer info

4 years ago · 65e666378d
parent b8601c756f
commit 65e666378d
4 changed files with 66 additions and 66 deletions
--- a/deepspeech/training/gradclip.py
+++ b/deepspeech/training/gradclip.py
@ -48,7 +48,6 @@ class ClipGradByGlobalNormWithLog(paddle.nn.ClipGradByGlobalNorm):
            sum_square_list.append(sum_square)

            # debug log
-            if i < 10:
            logger.debug(
                f"Grad Before Clip: {p.name}: {float(sum_square.sqrt()) }")

@ -77,7 +76,6 @@ class ClipGradByGlobalNormWithLog(paddle.nn.ClipGradByGlobalNorm):
            params_and_grads.append((p, new_grad))

            # debug log
-            if i < 10:
            logger.debug(
                f"Grad After Clip: {p.name}: {float(new_grad.square().sum().sqrt())}"
            )
--- a/deepspeech/training/timer.py
+++ b/deepspeech/training/timer.py
@ -27,7 +27,7 @@ class Timer():
            do some thing
    """

-    def __init__(self, message):
+    def __init__(self, message=None):
        self.message = message

    def duration(self) -> str:
@ -40,6 +40,7 @@ class Timer():
        return self

    def __exit__(self, type, value, traceback):
+        if self.message:
            logger.info(self.message.format(self.duration()))

    def __call__(self) -> float:
--- a/deepspeech/training/trainer.py
+++ b/deepspeech/training/trainer.py
@ -185,7 +185,6 @@ class Trainer():

    def train(self):
        """The training process control by epoch."""
-        with Timer("Load/Init Model: {}"):
        from_scratch = self.resume_or_scratch()
        if from_scratch:
            # save init model, i.e. 0 epoch
@ -196,6 +195,7 @@ class Trainer():

        logger.info(f"Train Total Examples: {len(self.train_loader.dataset)}")
        while self.epoch < self.config.training.n_epoch:
+            with Timer("Epoch-Train Time Cost: {}"):
                self.model.train()
                try:
                    data_start_time = time.time()
@ -214,6 +214,7 @@ class Trainer():
                    logger.error(e)
                    raise e

+            with Timer("Eval Time Cost: {}"):
                total_loss, num_seen_utts = self.valid()
                if dist.get_world_size() > 1:
                    num_seen_utts = paddle.to_tensor(num_seen_utts)