From a2ae6396ef7f6b8f0254df6e66162af92f472cfa Mon Sep 17 00:00:00 2001
From: Hui Zhang <zhtclz@foxmail.com>
Date: Tue, 13 Jun 2023 16:31:34 +0800
Subject: [PATCH] old grad clip has 0d tensor problem, fix it (#3334)

---
 paddlespeech/s2t/exps/deepspeech2/model.py    |  3 +-
 paddlespeech/s2t/training/gradclip.py         | 86 -------------------
 .../s2t/training/optimizer/__init__.py        |  4 +-
 tests/unit/tts/test_ssml.py                   |  9 ++
 4 files changed, 12 insertions(+), 90 deletions(-)
 delete mode 100644 paddlespeech/s2t/training/gradclip.py

diff --git a/paddlespeech/s2t/exps/deepspeech2/model.py b/paddlespeech/s2t/exps/deepspeech2/model.py
index 7ab8cf85..d007a9e3 100644
--- a/paddlespeech/s2t/exps/deepspeech2/model.py
+++ b/paddlespeech/s2t/exps/deepspeech2/model.py
@@ -27,7 +27,6 @@ from paddlespeech.audio.text.text_featurizer import TextFeaturizer
 from paddlespeech.s2t.io.dataloader import BatchDataLoader
 from paddlespeech.s2t.models.ds2 import DeepSpeech2InferModel
 from paddlespeech.s2t.models.ds2 import DeepSpeech2Model
-from paddlespeech.s2t.training.gradclip import ClipGradByGlobalNormWithLog
 from paddlespeech.s2t.training.reporter import report
 from paddlespeech.s2t.training.timer import Timer
 from paddlespeech.s2t.training.trainer import Trainer
@@ -148,7 +147,7 @@ class DeepSpeech2Trainer(Trainer):
         if not self.train:
             return
 
-        grad_clip = ClipGradByGlobalNormWithLog(config.global_grad_clip)
+        grad_clip = paddle.nn.ClipGradByGlobalNorm(config.global_grad_clip)
         lr_scheduler = paddle.optimizer.lr.ExponentialDecay(
             learning_rate=config.lr, gamma=config.lr_decay, verbose=True)
         optimizer = paddle.optimizer.Adam(
diff --git a/paddlespeech/s2t/training/gradclip.py b/paddlespeech/s2t/training/gradclip.py
deleted file mode 100644
index 06587c74..00000000
--- a/paddlespeech/s2t/training/gradclip.py
+++ /dev/null
@@ -1,86 +0,0 @@
-# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import paddle
-from paddle.fluid import core
-from paddle.fluid import layers
-from paddle.fluid.dygraph import base as imperative_base
-
-from paddlespeech.s2t.utils.log import Log
-
-__all__ = ["ClipGradByGlobalNormWithLog"]
-
-logger = Log(__name__).getlog()
-
-
-class ClipGradByGlobalNormWithLog(paddle.nn.ClipGradByGlobalNorm):
-    def __init__(self, clip_norm):
-        super().__init__(clip_norm)
-
-    def __repr__(self):
-        return f"{self.__class__.__name__}(global_clip_norm={self.clip_norm})"
-
-    @imperative_base.no_grad
-    def _dygraph_clip(self, params_grads):
-        params_and_grads = []
-        sum_square_list = []
-        for i, (p, g) in enumerate(params_grads):
-            if g is None:
-                continue
-            if getattr(p, 'need_clip', True) is False:
-                continue
-            merge_grad = g
-            if g.type == core.VarDesc.VarType.SELECTED_ROWS:
-                merge_grad = layers.merge_selected_rows(g)
-                merge_grad = layers.get_tensor_from_selected_rows(merge_grad)
-            square = paddle.square(merge_grad)
-            sum_square = paddle.sum(square)
-            sum_square_list.append(sum_square)
-
-            # debug log, not dump all since slow down train process
-            if i < 10:
-                logger.debug(
-                    f"Grad Before Clip: {p.name}: {float(sum_square.sqrt()) }")
-
-        # all parameters have been filterd out
-        if len(sum_square_list) == 0:
-            return params_grads
-
-        global_norm_var = paddle.concat(sum_square_list)
-        global_norm_var = paddle.sum(global_norm_var)
-        global_norm_var = paddle.sqrt(global_norm_var)
-
-        # debug log
-        logger.debug(f"Grad Global Norm: {float(global_norm_var)}!!!!")
-
-        max_global_norm = paddle.full(
-            shape=[1], dtype=global_norm_var.dtype, fill_value=self.clip_norm)
-        clip_var = paddle.divide(
-            x=max_global_norm,
-            y=paddle.maximum(x=global_norm_var, y=max_global_norm))
-        for i, (p, g) in enumerate(params_grads):
-            if g is None:
-                continue
-            if getattr(p, 'need_clip', True) is False:
-                params_and_grads.append((p, g))
-                continue
-            new_grad = paddle.multiply(x=g, y=clip_var)
-            params_and_grads.append((p, new_grad))
-
-            # debug log, not dump all since slow down train process
-            if i < 10:
-                logger.debug(
-                    f"Grad After Clip: {p.name}: {float(new_grad.square().sum().sqrt())}"
-                )
-
-        return params_and_grads
diff --git a/paddlespeech/s2t/training/optimizer/__init__.py b/paddlespeech/s2t/training/optimizer/__init__.py
index aafdc5b6..0f998dde 100644
--- a/paddlespeech/s2t/training/optimizer/__init__.py
+++ b/paddlespeech/s2t/training/optimizer/__init__.py
@@ -19,7 +19,7 @@ from typing import Text
 import paddle
 from paddle.optimizer import Optimizer
 from paddle.regularizer import L2Decay
-from paddlespeech.s2t.training.gradclip import ClipGradByGlobalNormWithLog
+
 from paddlespeech.s2t.utils.dynamic_import import dynamic_import
 from paddlespeech.s2t.utils.dynamic_import import instance_class
 from paddlespeech.s2t.utils.log import Log
@@ -100,7 +100,7 @@ class OptimizerFactory():
         assert "parameters" in args, "parameters not in args."
         assert "learning_rate" in args, "learning_rate not in args."
 
-        grad_clip = ClipGradByGlobalNormWithLog(
+        grad_clip = paddle.nn.ClipGradByGlobalNorm(
             args['grad_clip']) if "grad_clip" in args else None
         weight_decay = L2Decay(
             args['weight_decay']) if "weight_decay" in args else None
diff --git a/tests/unit/tts/test_ssml.py b/tests/unit/tts/test_ssml.py
index 2c240183..4c3e9d53 100644
--- a/tests/unit/tts/test_ssml.py
+++ b/tests/unit/tts/test_ssml.py
@@ -72,3 +72,12 @@ if __name__ == '__main__':
     for i, sub in enumerate(outs):
         print(i, sub)
     print()
+
+    import json
+    import xmltodict
+    text = "<speak>我们的声学模型使用了 Fast Speech Two。前浪<say-as pinyin='dao3'>倒</say-as>在沙滩上,沙滩上倒了一堆<say-as pinyin='tu3'>土</say-as>。 想象<say-as pinyin='gan1 gan1'>干干</say-as>的树干<say-as pinyin='dao3'>倒</say-as>了, 里面有个干尸，不知是被谁<say-as pinyin='gan4'>干</say-as>死的。</speak>"
+    ssml = xmltodict.parse(text)
+    print(json.dumps(ssml))
+    print(ssml['speak'].keys())
+    print(ssml['speak']['#text'])
+    print(ssml['speak']['say-as'])