fix codestyle

pull/3988/head
cchenhaifeng 7 months ago
parent 1ed7784727
commit d58286d039

@ -24,5 +24,5 @@ from ._julius import SplitBands
from .audio_signal import AudioSignal from .audio_signal import AudioSignal
from .audio_signal import STFTParams from .audio_signal import STFTParams
from .loudness import Meter from .loudness import Meter
from paddlespeech.t2s.modules import fft_conv1d from ...t2s.modules import fft_conv1d
from paddlespeech.t2s.modules import FFTConv1D from ...t2s.modules import FFTConv1D

@ -19,7 +19,7 @@ import numpy as np
import paddle import paddle
import soundfile import soundfile
from . import util from .util import random_state, info as utilinfo, ensure_tensor, move_to_device, exp_compat, _get_value, bool_setitem_compat, bool_index_compat
from ._julius import resample_frac from ._julius import resample_frac
from .display import DisplayMixin from .display import DisplayMixin
from .dsp import DSPMixin from .dsp import DSPMixin
@ -245,10 +245,10 @@ class AudioSignal(
-------- --------
>>> signal = AudioSignal.excerpt("path/to/audio", duration=5) >>> signal = AudioSignal.excerpt("path/to/audio", duration=5)
""" """
info = util.info(audio_path) info = utilinfo(audio_path)
total_duration = info.duration total_duration = info.duration
state = util.random_state(state) state = random_state(state)
lower_bound = 0 if offset is None else offset lower_bound = 0 if offset is None else offset
upper_bound = max(total_duration - duration, 0) upper_bound = max(total_duration - duration, 0)
offset = state.uniform(lower_bound, upper_bound) offset = state.uniform(lower_bound, upper_bound)
@ -305,7 +305,7 @@ class AudioSignal(
duration=5 duration=5
) )
""" """
state = util.random_state(state) state = random_state(state)
if loudness_cutoff is None: if loudness_cutoff is None:
excerpt = cls.excerpt(audio_path, state=state, **kwargs) excerpt = cls.excerpt(audio_path, state=state, **kwargs)
else: else:
@ -533,7 +533,7 @@ class AudioSignal(
duration=duration, duration=duration,
sr=None, sr=None,
mono=False, ) mono=False, )
data = util.ensure_tensor(data) data = ensure_tensor(data)
if data.shape[-1] == 0: if data.shape[-1] == 0:
raise RuntimeError( raise RuntimeError(
f"Audio file {audio_path} with offset {offset} and duration {duration} is empty!" f"Audio file {audio_path} with offset {offset} and duration {duration} is empty!"
@ -574,7 +574,7 @@ class AudioSignal(
AudioSignal AudioSignal
AudioSignal loaded from array AudioSignal loaded from array
""" """
audio_data = util.ensure_tensor(audio_array) audio_data = ensure_tensor(audio_array)
if str(audio_data.dtype) == paddle.float64: if str(audio_data.dtype) == paddle.float64:
audio_data = audio_data.astype("float32") audio_data = audio_data.astype("float32")
@ -778,11 +778,11 @@ class AudioSignal(
AudioSignal with all tensors moved to specified device. AudioSignal with all tensors moved to specified device.
""" """
if self._loudness is not None: if self._loudness is not None:
self._loudness = util.move_to_device(self._loudness, device) self._loudness = move_to_device(self._loudness, device)
if self.stft_data is not None: if self.stft_data is not None:
self.stft_data = util.move_to_device(self.stft_data, device) self.stft_data = move_to_device(self.stft_data, device)
if self.audio_data is not None: if self.audio_data is not None:
self.audio_data = util.move_to_device(self.audio_data, device) self.audio_data = move_to_device(self.audio_data, device)
return self return self
def float(self): def float(self):
@ -1486,7 +1486,7 @@ class AudioSignal(
@magnitude.setter @magnitude.setter
def magnitude(self, value): def magnitude(self, value):
self.stft_data = value * util.exp_compat(1j * self.phase) self.stft_data = value * exp_compat(1j * self.phase)
return return
def log_magnitude(self, def log_magnitude(self,
@ -1551,17 +1551,17 @@ class AudioSignal(
@phase.setter @phase.setter
def phase(self, value): def phase(self, value):
# #
self.stft_data = self.magnitude * util.exp_compat(1j * value) self.stft_data = self.magnitude * exp_compat(1j * value)
return return
# Operator overloading # Operator overloading
def __add__(self, other): def __add__(self, other):
new_signal = self.clone() new_signal = self.clone()
new_signal.audio_data += util._get_value(other) new_signal.audio_data += _get_value(other)
return new_signal return new_signal
def __iadd__(self, other): def __iadd__(self, other):
self.audio_data += util._get_value(other) self.audio_data += _get_value(other)
return self return self
def __radd__(self, other): def __radd__(self, other):
@ -1569,20 +1569,20 @@ class AudioSignal(
def __sub__(self, other): def __sub__(self, other):
new_signal = self.clone() new_signal = self.clone()
new_signal.audio_data -= util._get_value(other) new_signal.audio_data -= _get_value(other)
return new_signal return new_signal
def __isub__(self, other): def __isub__(self, other):
self.audio_data -= util._get_value(other) self.audio_data -= _get_value(other)
return self return self
def __mul__(self, other): def __mul__(self, other):
new_signal = self.clone() new_signal = self.clone()
new_signal.audio_data *= util._get_value(other) new_signal.audio_data *= _get_value(other)
return new_signal return new_signal
def __imul__(self, other): def __imul__(self, other):
self.audio_data *= util._get_value(other) self.audio_data *= _get_value(other)
return self return self
def __rmul__(self, other): def __rmul__(self, other):
@ -1704,7 +1704,7 @@ class AudioSignal(
key] if self._loudness is not None else None key] if self._loudness is not None else None
# stft_data = self.stft_data[ # stft_data = self.stft_data[
# key] if self.stft_data is not None else None # key] if self.stft_data is not None else None
stft_data = util.bool_index_compat( stft_data = bool_index_compat(
self.stft_data, key) if self.stft_data is not None else None self.stft_data, key) if self.stft_data is not None else None
sources = None sources = None
@ -1742,7 +1742,7 @@ class AudioSignal(
self._loudness[key] = value._loudness self._loudness[key] = value._loudness
if self.stft_data is not None and value.stft_data is not None: if self.stft_data is not None and value.stft_data is not None:
# self.stft_data[key] = value.stft_data # self.stft_data[key] = value.stft_data
self.stft_data = util.bool_setitem_compat(self.stft_data, key, self.stft_data = bool_setitem_compat(self.stft_data, key,
value.stft_data) value.stft_data)
return return

@ -32,6 +32,7 @@ import soundfile
from flatten_dict import flatten from flatten_dict import flatten
from flatten_dict import unflatten from flatten_dict import unflatten
from .audio_signal import AudioSignal
from paddlespeech.utils import satisfy_paddle_version from paddlespeech.utils import satisfy_paddle_version
from paddlespeech.vector.training.seeding import seed_everything from paddlespeech.vector.training.seeding import seed_everything
@ -231,7 +232,6 @@ def ensure_tensor(
def _get_value(other): def _get_value(other):
# #
from .audio_signal import AudioSignal
if isinstance(other, AudioSignal): if isinstance(other, AudioSignal):
return other.audio_data return other.audio_data
@ -801,7 +801,6 @@ def collate(list_of_dicts: list, n_splits: int=None):
batch = {} batch = {}
for k, v in dict_of_lists.items(): for k, v in dict_of_lists.items():
if isinstance(v, list): if isinstance(v, list):
from .audio_signal import AudioSignal
if all(isinstance(s, AudioSignal) for s in v): if all(isinstance(s, AudioSignal) for s in v):
batch[k] = AudioSignal.batch(v, pad_signals=True) batch[k] = AudioSignal.batch(v, pad_signals=True)
else: else:
@ -873,7 +872,6 @@ def generate_chord_dataset(
""" """
import librosa import librosa
from .audio_signal import AudioSignal
from ..data.preprocess import create_csv from ..data.preprocess import create_csv
min_midi = librosa.note_to_midi(min_note) min_midi = librosa.note_to_midi(min_note)

@ -1554,7 +1554,7 @@ class SISDRLoss(nn.Layer):
noise = (e_res**2).sum(axis=1) noise = (e_res**2).sum(axis=1)
sdr = -10 * paddle.log10(signal / noise + eps) sdr = -10 * paddle.log10(signal / noise + eps)
if self.clip_min is not None: if self.clip_min != None:
sdr = paddle.clip(sdr, min=self.clip_min) sdr = paddle.clip(sdr, min=self.clip_min)
if self.reduction == "mean": if self.reduction == "mean":

@ -1,4 +1,3 @@
python -m pip install -r ../../../paddlespeech/audiotools/requirements.txt
wget https://paddlespeech.bj.bcebos.com/PaddleAudio/audio_tools/audio.tar.gz wget https://paddlespeech.bj.bcebos.com/PaddleAudio/audio_tools/audio.tar.gz
wget https://paddlespeech.bj.bcebos.com/PaddleAudio/audio_tools/regression.tar.gz wget https://paddlespeech.bj.bcebos.com/PaddleAudio/audio_tools/regression.tar.gz
tar -zxvf audio.tar.gz tar -zxvf audio.tar.gz

@ -1,7 +1,7 @@
function main(){ function main(){
set -ex set -ex
speech_ci_path=`pwd` speech_ci_path=`pwd`
pip install ffmpeg flatten_dict ffmpy python -m pip install -r ../../paddlespeech/audiotools/requirements.txt
echo "Start asr" echo "Start asr"
cd ${speech_ci_path}/asr cd ${speech_ci_path}/asr

@ -1,4 +1,4 @@
# Copyright (c) 2024 PaddlePaddle Authors. All Rights Reserved. # Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
# #
# Licensed under the Apache License, Version 2.0 (the "License"); # Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License. # you may not use this file except in compliance with the License.
@ -30,14 +30,14 @@ def test_multi_scale_stft_loss():
x, y = get_input() x, y = get_input()
loss = MultiScaleSTFTLoss() loss = MultiScaleSTFTLoss()
pd_loss = loss(x, y) pd_loss = loss(x, y)
np.allclose(pd_loss.numpy(), 7.5622) np.allclose(pd_loss.numpy(), 7.562150, rtol=1e-06)
def test_sisdr_loss(): def test_sisdr_loss():
x, y = get_input() x, y = get_input()
loss = SISDRLoss() loss = SISDRLoss()
pd_loss = loss(x, y) pd_loss = loss(x, y)
np.allclose(pd_loss.numpy(), -145.3776) np.allclose(pd_loss.numpy(), -145.377640, rtol=1e-06)
def test_gan_loss(): def test_gan_loss():
@ -52,10 +52,10 @@ def test_gan_loss():
x, y = get_input() x, y = get_input()
loss = GANLoss(My_discriminator0()) loss = GANLoss(My_discriminator0())
pd_loss0, pd_loss1 = loss(x, y) pd_loss0, pd_loss1 = loss(x, y)
np.allclose(pd_loss0.numpy(), -0.1027) np.allclose(pd_loss0.numpy(), -0.102722, rtol=1e-06)
np.allclose(pd_loss1.numpy(), -0.0010) np.allclose(pd_loss1.numpy(), -0.001027, rtol=1e-06)
loss = GANLoss(My_discriminator1()) loss = GANLoss(My_discriminator1())
pd_loss0, _ = loss.generator_loss(x, y) pd_loss0, _ = loss.generator_loss(x, y)
np.allclose(pd_loss0.numpy(), 1.0002) np.allclose(pd_loss0.numpy(), 1.000199, rtol=1e-06)
pd_loss = loss.discriminator_loss(x, y) pd_loss = loss.discriminator_loss(x, y)
np.allclose(pd_loss.numpy(), 1.0002) np.allclose(pd_loss.numpy(), 1.000200, rtol=1e-06)

Loading…
Cancel
Save