not dump all grad info, since slow down train porcess

pull/820/head
Hui Zhang 3 years ago
parent 8fef667f80
commit f0470e0529

@ -47,7 +47,8 @@ class ClipGradByGlobalNormWithLog(paddle.nn.ClipGradByGlobalNorm):
sum_square = layers.reduce_sum(square) sum_square = layers.reduce_sum(square)
sum_square_list.append(sum_square) sum_square_list.append(sum_square)
# debug log # debug log, not dump all since slow down train process
if i < 10:
logger.debug( logger.debug(
f"Grad Before Clip: {p.name}: {float(sum_square.sqrt()) }") f"Grad Before Clip: {p.name}: {float(sum_square.sqrt()) }")
@ -75,7 +76,8 @@ class ClipGradByGlobalNormWithLog(paddle.nn.ClipGradByGlobalNorm):
new_grad = layers.elementwise_mul(x=g, y=clip_var) new_grad = layers.elementwise_mul(x=g, y=clip_var)
params_and_grads.append((p, new_grad)) params_and_grads.append((p, new_grad))
# debug log # debug log, not dump all since slow down train process
if i < 10:
logger.debug( logger.debug(
f"Grad After Clip: {p.name}: {float(new_grad.square().sum().sqrt())}" f"Grad After Clip: {p.name}: {float(new_grad.square().sum().sqrt())}"
) )

Loading…
Cancel
Save