pull/3900/head
drryanhuang 9 months ago
parent 47e81105f7
commit c93cdea39f

@ -1479,7 +1479,7 @@ class AudioSignal(
@magnitude.setter @magnitude.setter
def magnitude(self, value): def magnitude(self, value):
self.stft_data = value * paddle.exp(1j * self.phase) self.stft_data = value * util.exp_compat(1j * self.phase)
return return
def log_magnitude(self, def log_magnitude(self,
@ -1544,7 +1544,7 @@ class AudioSignal(
@phase.setter @phase.setter
def phase(self, value): def phase(self, value):
# #
self.stft_data = self.magnitude * paddle.exp(1j * value) self.stft_data = self.magnitude * util.exp_compat(1j * value)
return return
# Operator overloading # Operator overloading

@ -313,7 +313,7 @@ class DSPMixin:
mag = paddle.where(mask, paddle.full_like(mag, val), mag) mag = paddle.where(mask, paddle.full_like(mag, val), mag)
phase = paddle.where(mask, paddle.full_like(phase, val), phase) phase = paddle.where(mask, paddle.full_like(phase, val), phase)
self.stft_data = mag * paddle.exp(1j * phase) self.stft_data = mag * util.exp_compat(1j * phase)
return self return self
def mask_timesteps( def mask_timesteps(
@ -362,7 +362,7 @@ class DSPMixin:
mag = paddle.where(mask, paddle.full_like(mag, val), mag) mag = paddle.where(mask, paddle.full_like(mag, val), mag)
phase = paddle.where(mask, paddle.full_like(phase, val), phase) phase = paddle.where(mask, paddle.full_like(phase, val), phase)
self.stft_data = mag * paddle.exp(1j * phase) self.stft_data = mag * util.exp_compat(1j * phase)
return self return self
def mask_low_magnitudes( def mask_low_magnitudes(

@ -182,7 +182,7 @@ class EffectMixin:
# Use the input phase # Use the input phase
if use_original_phase: if use_original_phase:
self.stft() self.stft()
self.stft_data = self.magnitude * paddle.exp(1j * phase) self.stft_data = self.magnitude * util.exp_compat(1j * phase)
self.istft() self.istft()
# Rescale to the input's amplitude # Rescale to the input's amplitude
@ -230,7 +230,7 @@ class EffectMixin:
db = util.ensure_tensor(db) db = util.ensure_tensor(db)
ref_db = self.loudness() ref_db = self.loudness()
gain = db - ref_db gain = db - ref_db
gain = paddle.exp(gain * self.GAIN_FACTOR) gain = util.exp_compat(gain * self.GAIN_FACTOR)
self.audio_data = self.audio_data * gain[:, None, None] self.audio_data = self.audio_data * gain[:, None, None]
return self return self
@ -249,7 +249,7 @@ class EffectMixin:
Signal at new volume. Signal at new volume.
""" """
db = util.ensure_tensor(db, ndim=1) db = util.ensure_tensor(db, ndim=1)
gain = paddle.exp(db * self.GAIN_FACTOR) gain = util.exp_compat(db * self.GAIN_FACTOR)
self.audio_data = self.audio_data * gain[:, None, None] self.audio_data = self.audio_data * gain[:, None, None]
return self return self
@ -535,7 +535,7 @@ class EffectMixin:
# unquantize # unquantize
x = (x / mu) * 2 - 1.0 x = (x / mu) * 2 - 1.0
x = paddle.sign(x) * ( x = paddle.sign(x) * (
paddle.exp(paddle.abs(x) * paddle.log1p(mu)) - 1.0) / mu util.exp_compat(paddle.abs(x) * paddle.log1p(mu)) - 1.0) / mu
residual = (self.audio_data - x).detach() residual = (self.audio_data - x).detach()
self.audio_data = self.audio_data - residual self.audio_data = self.audio_data - residual

@ -28,10 +28,37 @@ from flatten_dict import flatten
from flatten_dict import unflatten from flatten_dict import unflatten
from .audio_signal import AudioSignal from .audio_signal import AudioSignal
from paddlespeech.utils import satisfy_paddle_version
# from ..data.preprocess import create_csv # from ..data.preprocess import create_csv
def exp_compat(x):
"""
Compute the exponential of the input tensor `x`.
This function is designed to handle compatibility issues with PaddlePaddle versions below 2.6,
which do not support the `exp` operation for complex tensors. In such cases, the computation
is offloaded to NumPy.
Args:
x (paddle.Tensor): The input tensor for which to compute the exponential.
Returns:
paddle.Tensor: The result of the exponential operation, as a PaddlePaddle tensor.
Notes:
- If the PaddlePaddle version is 2.6 or above, the function uses `paddle.exp` directly.
- For versions below 2.6, the tensor is first converted to a NumPy array, the exponential
is computed using `np.exp`, and the result is then converted back to a PaddlePaddle tensor.
"""
if satisfy_paddle_version("2.6"):
return paddle.exp(x)
else:
x_np = x.cpu().numpy()
return paddle.to_tensor(np.exp(x_np))
@dataclass @dataclass
class Info: class Info:

@ -10,6 +10,7 @@ import rich
sys.path.append("../..") sys.path.append("../..")
import audiotools import audiotools
from audiotools import AudioSignal from audiotools import AudioSignal
from audiotools import util
def test_io(): def test_io():
@ -421,7 +422,7 @@ def test_stft(window_length, hop_length, window_type):
mag = signal.magnitude mag = signal.magnitude
phase = signal.phase phase = signal.phase
recon_stft = mag * paddle.exp(1j * phase) recon_stft = mag * util.exp_compat(1j * phase)
# assert paddle.allclose(recon_stft, signal.stft_data) # assert paddle.allclose(recon_stft, signal.stft_data)
assert np.allclose(recon_stft.cpu().numpy(), assert np.allclose(recon_stft.cpu().numpy(),
signal.stft_data.cpu().numpy()) signal.stft_data.cpu().numpy())
@ -431,7 +432,7 @@ def test_stft(window_length, hop_length, window_type):
signal.stft_data = None signal.stft_data = None
phase = signal.phase phase = signal.phase
recon_stft = mag * paddle.exp(1j * phase) recon_stft = mag * util.exp_compat(1j * phase)
# assert paddle.allclose(recon_stft, signal.stft_data) # assert paddle.allclose(recon_stft, signal.stft_data)
assert np.allclose(recon_stft.cpu().numpy(), assert np.allclose(recon_stft.cpu().numpy(),
signal.stft_data.cpu().numpy()) signal.stft_data.cpu().numpy())

@ -7,6 +7,7 @@ from visualdl import LogWriter
from audiotools.ml.decorators import timer from audiotools.ml.decorators import timer
from audiotools.ml.decorators import Tracker from audiotools.ml.decorators import Tracker
from audiotools.ml.decorators import when from audiotools.ml.decorators import when
from audiotools import util
def test_all_decorators(): def test_all_decorators():
@ -26,12 +27,16 @@ def test_all_decorators():
i = tracker.step i = tracker.step
time.sleep(0.01) time.sleep(0.01)
return { return {
"loss": paddle.exp(paddle.to_tensor([-i / 100], dtype="float32")), "loss":
"mel": paddle.exp(paddle.to_tensor([-i / 100], dtype="float32")), util.exp_compat(paddle.to_tensor([-i / 100], dtype="float32")),
"stft": paddle.exp(paddle.to_tensor([-i / 100], dtype="float32")), "mel":
util.exp_compat(paddle.to_tensor([-i / 100], dtype="float32")),
"stft":
util.exp_compat(paddle.to_tensor([-i / 100], dtype="float32")),
"waveform": "waveform":
paddle.exp(paddle.to_tensor([-i / 100], dtype="float32")), util.exp_compat(paddle.to_tensor([-i / 100], dtype="float32")),
"not_scalar": paddle.arange(start=0, end=10, step=1, dtype="int64"), "not_scalar":
paddle.arange(start=0, end=10, step=1, dtype="int64"),
} }
@tracker.track("val", len(val_data)) @tracker.track("val", len(val_data))
@ -40,13 +45,18 @@ def test_all_decorators():
i = tracker.step i = tracker.step
time.sleep(0.01) time.sleep(0.01)
return { return {
"loss": paddle.exp(paddle.to_tensor([-i / 100], dtype="float32")), "loss":
"mel": paddle.exp(paddle.to_tensor([-i / 100], dtype="float32")), util.exp_compat(paddle.to_tensor([-i / 100], dtype="float32")),
"stft": paddle.exp(paddle.to_tensor([-i / 100], dtype="float32")), "mel":
util.exp_compat(paddle.to_tensor([-i / 100], dtype="float32")),
"stft":
util.exp_compat(paddle.to_tensor([-i / 100], dtype="float32")),
"waveform": "waveform":
paddle.exp(paddle.to_tensor([-i / 100], dtype="float32")), util.exp_compat(paddle.to_tensor([-i / 100], dtype="float32")),
"not_scalar": paddle.arange(10, dtype="int64"), "not_scalar":
"string": "string", paddle.arange(10, dtype="int64"),
"string":
"string",
} }
@when(lambda: tracker.step % 1000 == 0 and rank == 0) @when(lambda: tracker.step % 1000 == 0 and rank == 0)

Loading…
Cancel
Save