From d58286d039068415c3e7aec6c614a770a10a5d30 Mon Sep 17 00:00:00 2001
From: cchenhaifeng <1090056852@qq.com>
Date: Mon, 24 Feb 2025 13:53:32 +0800
Subject: [PATCH] fix codestyle

---
 paddlespeech/audiotools/core/__init__.py     |  4 +--
 paddlespeech/audiotools/core/audio_signal.py | 38 ++++++++++----------
 paddlespeech/audiotools/core/util.py         |  4 +--
 paddlespeech/t2s/modules/losses.py           |  2 +-
 tests/unit/audiotools/test_audiotools.sh     |  1 -
 tests/unit/ci.sh                             |  2 +-
 tests/unit/tts/test_losses.py                | 14 ++++----
 7 files changed, 31 insertions(+), 34 deletions(-)

diff --git a/paddlespeech/audiotools/core/__init__.py b/paddlespeech/audiotools/core/__init__.py
index c1cd524b0..b505b7590 100644
--- a/paddlespeech/audiotools/core/__init__.py
+++ b/paddlespeech/audiotools/core/__init__.py
@@ -24,5 +24,5 @@ from ._julius import SplitBands
 from .audio_signal import AudioSignal
 from .audio_signal import STFTParams
 from .loudness import Meter
-from paddlespeech.t2s.modules import fft_conv1d
-from paddlespeech.t2s.modules import FFTConv1D
+from ...t2s.modules import fft_conv1d
+from ...t2s.modules import FFTConv1D
diff --git a/paddlespeech/audiotools/core/audio_signal.py b/paddlespeech/audiotools/core/audio_signal.py
index 74e8cac67..7dcb5e120 100644
--- a/paddlespeech/audiotools/core/audio_signal.py
+++ b/paddlespeech/audiotools/core/audio_signal.py
@@ -19,7 +19,7 @@ import numpy as np
 import paddle
 import soundfile
 
-from . import util
+from .util import random_state, info as utilinfo, ensure_tensor, move_to_device, exp_compat, _get_value, bool_setitem_compat, bool_index_compat
 from ._julius import resample_frac
 from .display import DisplayMixin
 from .dsp import DSPMixin
@@ -245,10 +245,10 @@ class AudioSignal(
         --------
         >>> signal = AudioSignal.excerpt("path/to/audio", duration=5)
         """
-        info = util.info(audio_path)
+        info = utilinfo(audio_path)
         total_duration = info.duration
 
-        state = util.random_state(state)
+        state = random_state(state)
         lower_bound = 0 if offset is None else offset
         upper_bound = max(total_duration - duration, 0)
         offset = state.uniform(lower_bound, upper_bound)
@@ -305,7 +305,7 @@ class AudioSignal(
                 duration=5
             )
         """
-        state = util.random_state(state)
+        state = random_state(state)
         if loudness_cutoff is None:
             excerpt = cls.excerpt(audio_path, state=state, **kwargs)
         else:
@@ -533,7 +533,7 @@ class AudioSignal(
             duration=duration,
             sr=None,
             mono=False, )
-        data = util.ensure_tensor(data)
+        data = ensure_tensor(data)
         if data.shape[-1] == 0:
             raise RuntimeError(
                 f"Audio file {audio_path} with offset {offset} and duration {duration} is empty!"
@@ -574,7 +574,7 @@ class AudioSignal(
         AudioSignal
             AudioSignal loaded from array
         """
-        audio_data = util.ensure_tensor(audio_array)
+        audio_data = ensure_tensor(audio_array)
 
         if str(audio_data.dtype) == paddle.float64:
             audio_data = audio_data.astype("float32")
@@ -778,11 +778,11 @@ class AudioSignal(
             AudioSignal with all tensors moved to specified device.
         """
         if self._loudness is not None:
-            self._loudness = util.move_to_device(self._loudness, device)
+            self._loudness = move_to_device(self._loudness, device)
         if self.stft_data is not None:
-            self.stft_data = util.move_to_device(self.stft_data, device)
+            self.stft_data = move_to_device(self.stft_data, device)
         if self.audio_data is not None:
-            self.audio_data = util.move_to_device(self.audio_data, device)
+            self.audio_data = move_to_device(self.audio_data, device)
         return self
 
     def float(self):
@@ -1486,7 +1486,7 @@ class AudioSignal(
 
     @magnitude.setter
     def magnitude(self, value):
-        self.stft_data = value * util.exp_compat(1j * self.phase)
+        self.stft_data = value * exp_compat(1j * self.phase)
         return
 
     def log_magnitude(self,
@@ -1551,17 +1551,17 @@ class AudioSignal(
     @phase.setter
     def phase(self, value):
         # 
-        self.stft_data = self.magnitude * util.exp_compat(1j * value)
+        self.stft_data = self.magnitude * exp_compat(1j * value)
         return
 
     # Operator overloading
     def __add__(self, other):
         new_signal = self.clone()
-        new_signal.audio_data += util._get_value(other)
+        new_signal.audio_data += _get_value(other)
         return new_signal
 
     def __iadd__(self, other):
-        self.audio_data += util._get_value(other)
+        self.audio_data += _get_value(other)
         return self
 
     def __radd__(self, other):
@@ -1569,20 +1569,20 @@ class AudioSignal(
 
     def __sub__(self, other):
         new_signal = self.clone()
-        new_signal.audio_data -= util._get_value(other)
+        new_signal.audio_data -= _get_value(other)
         return new_signal
 
     def __isub__(self, other):
-        self.audio_data -= util._get_value(other)
+        self.audio_data -= _get_value(other)
         return self
 
     def __mul__(self, other):
         new_signal = self.clone()
-        new_signal.audio_data *= util._get_value(other)
+        new_signal.audio_data *= _get_value(other)
         return new_signal
 
     def __imul__(self, other):
-        self.audio_data *= util._get_value(other)
+        self.audio_data *= _get_value(other)
         return self
 
     def __rmul__(self, other):
@@ -1704,7 +1704,7 @@ class AudioSignal(
                 key] if self._loudness is not None else None
             # stft_data = self.stft_data[
             #     key] if self.stft_data is not None else None
-            stft_data = util.bool_index_compat(
+            stft_data = bool_index_compat(
                 self.stft_data, key) if self.stft_data is not None else None
 
         sources = None
@@ -1742,7 +1742,7 @@ class AudioSignal(
                     self._loudness[key] = value._loudness
             if self.stft_data is not None and value.stft_data is not None:
                 # self.stft_data[key] = value.stft_data
-                self.stft_data = util.bool_setitem_compat(self.stft_data, key,
+                self.stft_data = bool_setitem_compat(self.stft_data, key,
                                                           value.stft_data)
             return
 
diff --git a/paddlespeech/audiotools/core/util.py b/paddlespeech/audiotools/core/util.py
index f53321489..087388b47 100644
--- a/paddlespeech/audiotools/core/util.py
+++ b/paddlespeech/audiotools/core/util.py
@@ -32,6 +32,7 @@ import soundfile
 from flatten_dict import flatten
 from flatten_dict import unflatten
 
+from .audio_signal import AudioSignal
 from paddlespeech.utils import satisfy_paddle_version
 from paddlespeech.vector.training.seeding import seed_everything
 
@@ -231,7 +232,6 @@ def ensure_tensor(
 
 def _get_value(other):
     # 
-    from .audio_signal import AudioSignal
 
     if isinstance(other, AudioSignal):
         return other.audio_data
@@ -801,7 +801,6 @@ def collate(list_of_dicts: list, n_splits: int=None):
         batch = {}
         for k, v in dict_of_lists.items():
             if isinstance(v, list):
-                from .audio_signal import AudioSignal
                 if all(isinstance(s, AudioSignal) for s in v):
                     batch[k] = AudioSignal.batch(v, pad_signals=True)
                 else:
@@ -873,7 +872,6 @@ def generate_chord_dataset(
 
     """
     import librosa
-    from .audio_signal import AudioSignal
     from ..data.preprocess import create_csv
 
     min_midi = librosa.note_to_midi(min_note)
diff --git a/paddlespeech/t2s/modules/losses.py b/paddlespeech/t2s/modules/losses.py
index 23b89dd58..05a1ee941 100644
--- a/paddlespeech/t2s/modules/losses.py
+++ b/paddlespeech/t2s/modules/losses.py
@@ -1554,7 +1554,7 @@ class SISDRLoss(nn.Layer):
         noise = (e_res**2).sum(axis=1)
         sdr = -10 * paddle.log10(signal / noise + eps)
 
-        if self.clip_min is not None:
+        if self.clip_min != None:
             sdr = paddle.clip(sdr, min=self.clip_min)
 
         if self.reduction == "mean":
diff --git a/tests/unit/audiotools/test_audiotools.sh b/tests/unit/audiotools/test_audiotools.sh
index 3a0161900..f69447d62 100644
--- a/tests/unit/audiotools/test_audiotools.sh
+++ b/tests/unit/audiotools/test_audiotools.sh
@@ -1,4 +1,3 @@
-python -m pip install -r ../../../paddlespeech/audiotools/requirements.txt
 wget  https://paddlespeech.bj.bcebos.com/PaddleAudio/audio_tools/audio.tar.gz
 wget  https://paddlespeech.bj.bcebos.com/PaddleAudio/audio_tools/regression.tar.gz
 tar -zxvf audio.tar.gz
diff --git a/tests/unit/ci.sh b/tests/unit/ci.sh
index 020f51664..567af2210 100644
--- a/tests/unit/ci.sh
+++ b/tests/unit/ci.sh
@@ -1,7 +1,7 @@
 function main(){
   set -ex
   speech_ci_path=`pwd`
-  pip install ffmpeg flatten_dict ffmpy
+  python -m pip install -r ../../paddlespeech/audiotools/requirements.txt
 
   echo "Start asr"
   cd ${speech_ci_path}/asr
diff --git a/tests/unit/tts/test_losses.py b/tests/unit/tts/test_losses.py
index 5360e657d..f883d5e90 100644
--- a/tests/unit/tts/test_losses.py
+++ b/tests/unit/tts/test_losses.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved.
+# Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -30,14 +30,14 @@ def test_multi_scale_stft_loss():
     x, y = get_input()
     loss = MultiScaleSTFTLoss()
     pd_loss = loss(x, y)
-    np.allclose(pd_loss.numpy(), 7.5622)
+    np.allclose(pd_loss.numpy(), 7.562150, rtol=1e-06)
 
 
 def test_sisdr_loss():
     x, y = get_input()
     loss = SISDRLoss()
     pd_loss = loss(x, y)
-    np.allclose(pd_loss.numpy(), -145.3776)
+    np.allclose(pd_loss.numpy(), -145.377640, rtol=1e-06)
 
 
 def test_gan_loss():
@@ -52,10 +52,10 @@ def test_gan_loss():
     x, y = get_input()
     loss = GANLoss(My_discriminator0())
     pd_loss0, pd_loss1 = loss(x, y)
-    np.allclose(pd_loss0.numpy(), -0.1027)
-    np.allclose(pd_loss1.numpy(), -0.0010)
+    np.allclose(pd_loss0.numpy(), -0.102722, rtol=1e-06)
+    np.allclose(pd_loss1.numpy(), -0.001027, rtol=1e-06)
     loss = GANLoss(My_discriminator1())
     pd_loss0, _ = loss.generator_loss(x, y)
-    np.allclose(pd_loss0.numpy(), 1.0002)
+    np.allclose(pd_loss0.numpy(), 1.000199, rtol=1e-06)
     pd_loss = loss.discriminator_loss(x, y)
-    np.allclose(pd_loss.numpy(), 1.0002)
+    np.allclose(pd_loss.numpy(), 1.000200, rtol=1e-06)