diff --git a/deepspeech/exps/u2/model.py b/deepspeech/exps/u2/model.py
index 6b951da42..142491f86 100644
--- a/deepspeech/exps/u2/model.py
+++ b/deepspeech/exps/u2/model.py
@@ -88,10 +88,6 @@ class U2Trainer(Trainer):
             losses_np['ctc_loss'] = float(ctc_loss)
 
         if (batch_index + 1) % train_conf.accum_grad == 0:
-            if dist.get_rank() == 0 and self.visualizer:
-                losses_np_v = losses_np.copy()
-                losses_np_v.update({"lr": self.lr_scheduler()})
-                self.visualizer.add_scalars("step", losses_np_v, self.iteration)
             self.optimizer.step()
             self.optimizer.clear_grad()
             self.lr_scheduler.step()
@@ -107,6 +103,12 @@ class U2Trainer(Trainer):
                              for k, v in losses_np.items())
             logger.info(msg)
 
+            if dist.get_rank() == 0 and self.visualizer:
+                losses_np_v = losses_np.copy()
+                losses_np_v.update({"lr": self.lr_scheduler()})
+                self.visualizer.add_scalars("step", losses_np_v,
+                                            self.iteration - 1)
+
     def train(self):
         """The training process control by step."""
         # !!!IMPORTANT!!!
diff --git a/deepspeech/utils/checkpoint.py b/deepspeech/utils/checkpoint.py
index 882b65241..8ede6b8fd 100644
--- a/deepspeech/utils/checkpoint.py
+++ b/deepspeech/utils/checkpoint.py
@@ -46,8 +46,8 @@ def _load_latest_checkpoint(checkpoint_dir: str) -> int:
     return iteration
 
 
-def _save_checkpoint(checkpoint_dir: str, iteration: int):
-    """Save the iteration number of the latest model to be checkpointed.
+def _save_record(checkpoint_dir: str, iteration: int):
+    """Save the iteration number of the latest model to be checkpoint record.
     Args:
         checkpoint_dir (str): the directory where checkpoint is saved.
         iteration (int): the latest iteration number.
@@ -149,4 +149,4 @@ def save_parameters(checkpoint_dir: str,
         fout.write(data)
 
     if isinstance(tag_or_iteration, int):
-        _save_checkpoint(checkpoint_dir, tag_or_iteration)
+        _save_record(checkpoint_dir, tag_or_iteration)
diff --git a/deepspeech/utils/layer_tools.py b/deepspeech/utils/layer_tools.py
index 1e8e55ed1..c05982c14 100644
--- a/deepspeech/utils/layer_tools.py
+++ b/deepspeech/utils/layer_tools.py
@@ -21,6 +21,8 @@ __all__ = [
 
 
 def summary(layer: nn.Layer, print_func=print):
+    if print_func is None:
+        return
     num_params = num_elements = 0
     for name, param in layer.state_dict().items():
         if print_func:
@@ -32,15 +34,6 @@ def summary(layer: nn.Layer, print_func=print):
         print_func(f"Total parameters: {num_params}, {num_elements} elements.")
 
 
-def gradient_norm(layer: nn.Layer):
-    grad_norm_dict = {}
-    for name, param in layer.state_dict().items():
-        if param.trainable:
-            grad = param.gradient()  # return numpy.ndarray
-            grad_norm_dict[name] = np.linalg.norm(grad) / grad.size
-    return grad_norm_dict
-
-
 def print_grads(model, print_func=print):
     if print_func is None:
         return
@@ -64,6 +57,15 @@ def print_params(model, print_func=print):
         print_func(f"Total parameters: {num_params}, {total} elements.")
 
 
+def gradient_norm(layer: nn.Layer):
+    grad_norm_dict = {}
+    for name, param in layer.state_dict().items():
+        if param.trainable:
+            grad = param.gradient()  # return numpy.ndarray
+            grad_norm_dict[name] = np.linalg.norm(grad) / grad.size
+    return grad_norm_dict
+
+
 def recursively_remove_weight_norm(layer: nn.Layer):
     for layer in layer.sublayers():
         try: