From d58286d039068415c3e7aec6c614a770a10a5d30 Mon Sep 17 00:00:00 2001 From: cchenhaifeng <1090056852@qq.com> Date: Mon, 24 Feb 2025 13:53:32 +0800 Subject: [PATCH] fix codestyle --- paddlespeech/audiotools/core/__init__.py | 4 +-- paddlespeech/audiotools/core/audio_signal.py | 38 ++++++++++---------- paddlespeech/audiotools/core/util.py | 4 +-- paddlespeech/t2s/modules/losses.py | 2 +- tests/unit/audiotools/test_audiotools.sh | 1 - tests/unit/ci.sh | 2 +- tests/unit/tts/test_losses.py | 14 ++++---- 7 files changed, 31 insertions(+), 34 deletions(-) diff --git a/paddlespeech/audiotools/core/__init__.py b/paddlespeech/audiotools/core/__init__.py index c1cd524b0..b505b7590 100644 --- a/paddlespeech/audiotools/core/__init__.py +++ b/paddlespeech/audiotools/core/__init__.py @@ -24,5 +24,5 @@ from ._julius import SplitBands from .audio_signal import AudioSignal from .audio_signal import STFTParams from .loudness import Meter -from paddlespeech.t2s.modules import fft_conv1d -from paddlespeech.t2s.modules import FFTConv1D +from ...t2s.modules import fft_conv1d +from ...t2s.modules import FFTConv1D diff --git a/paddlespeech/audiotools/core/audio_signal.py b/paddlespeech/audiotools/core/audio_signal.py index 74e8cac67..7dcb5e120 100644 --- a/paddlespeech/audiotools/core/audio_signal.py +++ b/paddlespeech/audiotools/core/audio_signal.py @@ -19,7 +19,7 @@ import numpy as np import paddle import soundfile -from . import util +from .util import random_state, info as utilinfo, ensure_tensor, move_to_device, exp_compat, _get_value, bool_setitem_compat, bool_index_compat from ._julius import resample_frac from .display import DisplayMixin from .dsp import DSPMixin @@ -245,10 +245,10 @@ class AudioSignal( -------- >>> signal = AudioSignal.excerpt("path/to/audio", duration=5) """ - info = util.info(audio_path) + info = utilinfo(audio_path) total_duration = info.duration - state = util.random_state(state) + state = random_state(state) lower_bound = 0 if offset is None else offset upper_bound = max(total_duration - duration, 0) offset = state.uniform(lower_bound, upper_bound) @@ -305,7 +305,7 @@ class AudioSignal( duration=5 ) """ - state = util.random_state(state) + state = random_state(state) if loudness_cutoff is None: excerpt = cls.excerpt(audio_path, state=state, **kwargs) else: @@ -533,7 +533,7 @@ class AudioSignal( duration=duration, sr=None, mono=False, ) - data = util.ensure_tensor(data) + data = ensure_tensor(data) if data.shape[-1] == 0: raise RuntimeError( f"Audio file {audio_path} with offset {offset} and duration {duration} is empty!" @@ -574,7 +574,7 @@ class AudioSignal( AudioSignal AudioSignal loaded from array """ - audio_data = util.ensure_tensor(audio_array) + audio_data = ensure_tensor(audio_array) if str(audio_data.dtype) == paddle.float64: audio_data = audio_data.astype("float32") @@ -778,11 +778,11 @@ class AudioSignal( AudioSignal with all tensors moved to specified device. """ if self._loudness is not None: - self._loudness = util.move_to_device(self._loudness, device) + self._loudness = move_to_device(self._loudness, device) if self.stft_data is not None: - self.stft_data = util.move_to_device(self.stft_data, device) + self.stft_data = move_to_device(self.stft_data, device) if self.audio_data is not None: - self.audio_data = util.move_to_device(self.audio_data, device) + self.audio_data = move_to_device(self.audio_data, device) return self def float(self): @@ -1486,7 +1486,7 @@ class AudioSignal( @magnitude.setter def magnitude(self, value): - self.stft_data = value * util.exp_compat(1j * self.phase) + self.stft_data = value * exp_compat(1j * self.phase) return def log_magnitude(self, @@ -1551,17 +1551,17 @@ class AudioSignal( @phase.setter def phase(self, value): # - self.stft_data = self.magnitude * util.exp_compat(1j * value) + self.stft_data = self.magnitude * exp_compat(1j * value) return # Operator overloading def __add__(self, other): new_signal = self.clone() - new_signal.audio_data += util._get_value(other) + new_signal.audio_data += _get_value(other) return new_signal def __iadd__(self, other): - self.audio_data += util._get_value(other) + self.audio_data += _get_value(other) return self def __radd__(self, other): @@ -1569,20 +1569,20 @@ class AudioSignal( def __sub__(self, other): new_signal = self.clone() - new_signal.audio_data -= util._get_value(other) + new_signal.audio_data -= _get_value(other) return new_signal def __isub__(self, other): - self.audio_data -= util._get_value(other) + self.audio_data -= _get_value(other) return self def __mul__(self, other): new_signal = self.clone() - new_signal.audio_data *= util._get_value(other) + new_signal.audio_data *= _get_value(other) return new_signal def __imul__(self, other): - self.audio_data *= util._get_value(other) + self.audio_data *= _get_value(other) return self def __rmul__(self, other): @@ -1704,7 +1704,7 @@ class AudioSignal( key] if self._loudness is not None else None # stft_data = self.stft_data[ # key] if self.stft_data is not None else None - stft_data = util.bool_index_compat( + stft_data = bool_index_compat( self.stft_data, key) if self.stft_data is not None else None sources = None @@ -1742,7 +1742,7 @@ class AudioSignal( self._loudness[key] = value._loudness if self.stft_data is not None and value.stft_data is not None: # self.stft_data[key] = value.stft_data - self.stft_data = util.bool_setitem_compat(self.stft_data, key, + self.stft_data = bool_setitem_compat(self.stft_data, key, value.stft_data) return diff --git a/paddlespeech/audiotools/core/util.py b/paddlespeech/audiotools/core/util.py index f53321489..087388b47 100644 --- a/paddlespeech/audiotools/core/util.py +++ b/paddlespeech/audiotools/core/util.py @@ -32,6 +32,7 @@ import soundfile from flatten_dict import flatten from flatten_dict import unflatten +from .audio_signal import AudioSignal from paddlespeech.utils import satisfy_paddle_version from paddlespeech.vector.training.seeding import seed_everything @@ -231,7 +232,6 @@ def ensure_tensor( def _get_value(other): # - from .audio_signal import AudioSignal if isinstance(other, AudioSignal): return other.audio_data @@ -801,7 +801,6 @@ def collate(list_of_dicts: list, n_splits: int=None): batch = {} for k, v in dict_of_lists.items(): if isinstance(v, list): - from .audio_signal import AudioSignal if all(isinstance(s, AudioSignal) for s in v): batch[k] = AudioSignal.batch(v, pad_signals=True) else: @@ -873,7 +872,6 @@ def generate_chord_dataset( """ import librosa - from .audio_signal import AudioSignal from ..data.preprocess import create_csv min_midi = librosa.note_to_midi(min_note) diff --git a/paddlespeech/t2s/modules/losses.py b/paddlespeech/t2s/modules/losses.py index 23b89dd58..05a1ee941 100644 --- a/paddlespeech/t2s/modules/losses.py +++ b/paddlespeech/t2s/modules/losses.py @@ -1554,7 +1554,7 @@ class SISDRLoss(nn.Layer): noise = (e_res**2).sum(axis=1) sdr = -10 * paddle.log10(signal / noise + eps) - if self.clip_min is not None: + if self.clip_min != None: sdr = paddle.clip(sdr, min=self.clip_min) if self.reduction == "mean": diff --git a/tests/unit/audiotools/test_audiotools.sh b/tests/unit/audiotools/test_audiotools.sh index 3a0161900..f69447d62 100644 --- a/tests/unit/audiotools/test_audiotools.sh +++ b/tests/unit/audiotools/test_audiotools.sh @@ -1,4 +1,3 @@ -python -m pip install -r ../../../paddlespeech/audiotools/requirements.txt wget https://paddlespeech.bj.bcebos.com/PaddleAudio/audio_tools/audio.tar.gz wget https://paddlespeech.bj.bcebos.com/PaddleAudio/audio_tools/regression.tar.gz tar -zxvf audio.tar.gz diff --git a/tests/unit/ci.sh b/tests/unit/ci.sh index 020f51664..567af2210 100644 --- a/tests/unit/ci.sh +++ b/tests/unit/ci.sh @@ -1,7 +1,7 @@ function main(){ set -ex speech_ci_path=`pwd` - pip install ffmpeg flatten_dict ffmpy + python -m pip install -r ../../paddlespeech/audiotools/requirements.txt echo "Start asr" cd ${speech_ci_path}/asr diff --git a/tests/unit/tts/test_losses.py b/tests/unit/tts/test_losses.py index 5360e657d..f883d5e90 100644 --- a/tests/unit/tts/test_losses.py +++ b/tests/unit/tts/test_losses.py @@ -1,4 +1,4 @@ -# Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved. +# Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -30,14 +30,14 @@ def test_multi_scale_stft_loss(): x, y = get_input() loss = MultiScaleSTFTLoss() pd_loss = loss(x, y) - np.allclose(pd_loss.numpy(), 7.5622) + np.allclose(pd_loss.numpy(), 7.562150, rtol=1e-06) def test_sisdr_loss(): x, y = get_input() loss = SISDRLoss() pd_loss = loss(x, y) - np.allclose(pd_loss.numpy(), -145.3776) + np.allclose(pd_loss.numpy(), -145.377640, rtol=1e-06) def test_gan_loss(): @@ -52,10 +52,10 @@ def test_gan_loss(): x, y = get_input() loss = GANLoss(My_discriminator0()) pd_loss0, pd_loss1 = loss(x, y) - np.allclose(pd_loss0.numpy(), -0.1027) - np.allclose(pd_loss1.numpy(), -0.0010) + np.allclose(pd_loss0.numpy(), -0.102722, rtol=1e-06) + np.allclose(pd_loss1.numpy(), -0.001027, rtol=1e-06) loss = GANLoss(My_discriminator1()) pd_loss0, _ = loss.generator_loss(x, y) - np.allclose(pd_loss0.numpy(), 1.0002) + np.allclose(pd_loss0.numpy(), 1.000199, rtol=1e-06) pd_loss = loss.discriminator_loss(x, y) - np.allclose(pd_loss.numpy(), 1.0002) + np.allclose(pd_loss.numpy(), 1.000200, rtol=1e-06)