From 50e4f4ead254bbbf070524b8414e1f15a3366253 Mon Sep 17 00:00:00 2001 From: cchenhaifeng <1090056852@qq.com> Date: Tue, 25 Feb 2025 12:35:57 +0800 Subject: [PATCH] fix codestyle --- paddlespeech/__init__.py | 1 + paddlespeech/audiotools/core/util.py | 3 -- paddlespeech/t2s/modules/losses.py | 43 ++++++++++++++++++++++++++++ 3 files changed, 44 insertions(+), 3 deletions(-) diff --git a/paddlespeech/__init__.py b/paddlespeech/__init__.py index 6c7e75c1f..8fdc835b8 100644 --- a/paddlespeech/__init__.py +++ b/paddlespeech/__init__.py @@ -13,3 +13,4 @@ # limitations under the License. import _locale _locale._getdefaultlocale = (lambda *args: ['en_US', 'utf8']) + diff --git a/paddlespeech/audiotools/core/util.py b/paddlespeech/audiotools/core/util.py index 234f30d37..71cd07e2c 100644 --- a/paddlespeech/audiotools/core/util.py +++ b/paddlespeech/audiotools/core/util.py @@ -231,7 +231,6 @@ def ensure_tensor( def _get_value(other): # - from .audio_signal import AudioSignal if isinstance(other, AudioSignal): return other.audio_data return other @@ -800,7 +799,6 @@ def collate(list_of_dicts: list, n_splits: int=None): batch = {} for k, v in dict_of_lists.items(): if isinstance(v, list): - from .audio_signal import AudioSignal if all(isinstance(s, AudioSignal) for s in v): batch[k] = AudioSignal.batch(v, pad_signals=True) else: @@ -872,7 +870,6 @@ def generate_chord_dataset( """ import librosa - from .audio_signal import AudioSignal from ..data.preprocess import create_csv min_midi = librosa.note_to_midi(min_note) diff --git a/paddlespeech/t2s/modules/losses.py b/paddlespeech/t2s/modules/losses.py index 05a1ee941..a1a65a9dc 100644 --- a/paddlespeech/t2s/modules/losses.py +++ b/paddlespeech/t2s/modules/losses.py @@ -1408,6 +1408,16 @@ class MultiScaleSTFTLoss(nn.Layer): Returns: paddle.Tensor Multi-scale STFT loss. + + Example: + >>> from paddlespeech.audiotools.core.audio_signal import AudioSignal + >>> import paddle + + >>> x = AudioSignal("https://paddlespeech.bj.bcebos.com/PaddleAudio/en.wav", 2_05) + >>> y = x * 0.01 + >>> loss = MultiScaleSTFTLoss() + >>> loss(x, y).numpy() + 7.562150 """ for s in self.stft_params: x.stft(s.window_length, s.hop_length, s.window_type) @@ -1425,6 +1435,29 @@ class GANLoss(nn.Layer): generated waveforms/spectrograms compared to ground truth waveforms/spectrograms. Computes the loss for both the discriminator and the generator in separate functions. + + Example: + >>> from paddlespeech.audiotools.core.audio_signal import AudioSignal + >>> import paddle + + >>> x = AudioSignal("https://paddlespeech.bj.bcebos.com/PaddleAudio/en.wav", 2_05) + >>> y = x * 0.01 + >>> class My_discriminator0: + >>> def __call__(self, x): + >>> return x.sum() + >>> loss = GANLoss(My_discriminator0()) + >>> [loss(x, y)[0].numpy(), loss(x, y)[1].numpy()] + [-0.102722, -0.001027] + + >>> class My_discriminator1: + >>> def __call__(self, x): + >>> return x.sum() + >>> loss = GANLoss(My_discriminator1()) + >>> [loss.generator_loss(x, y)[0].numpy(), loss.generator_loss(x, y)[1].numpy()] + [1.00019, 0] + + >>> loss.discriminator_loss(x, y) + 1.000200 """ def __init__(self, discriminator): @@ -1480,6 +1513,16 @@ class SISDRLoss(nn.Layer): of estimated and reference audio signals or aligned features. Implementation copied from: https://github.com/descriptinc/audiotools/blob/master/audiotools/metrics/distance.py + + Example: + >>> from paddlespeech.audiotools.core.audio_signal import AudioSignal + >>> import paddle + + >>> x = AudioSignal("https://paddlespeech.bj.bcebos.com/PaddleAudio/en.wav", 2_05) + >>> y = x * 0.01 + >>> sisdr = SISDRLoss() + >>> sisdr(x, y).numpy() + -145.377640 """ def __init__(