|
|
@ -127,7 +127,8 @@ class BaseTransform:
|
|
|
|
# masked_batch = {k: v[mask] for k, v in flatten(batch).items()}
|
|
|
|
# masked_batch = {k: v[mask] for k, v in flatten(batch).items()}
|
|
|
|
masked_batch = {}
|
|
|
|
masked_batch = {}
|
|
|
|
for k, v in flatten(batch).items():
|
|
|
|
for k, v in flatten(batch).items():
|
|
|
|
if 0 == mask.dim() and 0 == v.dim():
|
|
|
|
# `v` may be `Tensor` or `AudioSignal`
|
|
|
|
|
|
|
|
if 0 == len(v.shape) and 0 == mask.dim():
|
|
|
|
if mask: # 0d 的 True
|
|
|
|
if mask: # 0d 的 True
|
|
|
|
masked_batch[k] = v[None]
|
|
|
|
masked_batch[k] = v[None]
|
|
|
|
else:
|
|
|
|
else:
|
|
|
@ -998,64 +999,63 @@ class VolumeNorm(BaseTransform):
|
|
|
|
return signal.normalize(db)
|
|
|
|
return signal.normalize(db)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# class GlobalVolumeNorm(BaseTransform):
|
|
|
|
class GlobalVolumeNorm(BaseTransform):
|
|
|
|
# """Similar to :py:func:`audiotools.data.transforms.VolumeNorm`, this
|
|
|
|
"""Similar to :py:func:`audiotools.data.transforms.VolumeNorm`, this
|
|
|
|
# transform also normalizes the volume of a signal, but it uses
|
|
|
|
transform also normalizes the volume of a signal, but it uses
|
|
|
|
# the volume of the entire audio file the loaded excerpt comes from,
|
|
|
|
the volume of the entire audio file the loaded excerpt comes from,
|
|
|
|
# rather than the volume of just the excerpt. The volume of the
|
|
|
|
rather than the volume of just the excerpt. The volume of the
|
|
|
|
# entire audio file is expected in ``signal.metadata["loudness"]``.
|
|
|
|
entire audio file is expected in ``signal.metadata["loudness"]``.
|
|
|
|
# If loading audio from a CSV generated by :py:func:`audiotools.data.preprocess.create_csv`
|
|
|
|
If loading audio from a CSV generated by :py:func:`audiotools.data.preprocess.create_csv`
|
|
|
|
# with ``loudness = True``, like the following:
|
|
|
|
with ``loudness = True``, like the following:
|
|
|
|
|
|
|
|
|
|
|
|
# .. csv-table::
|
|
|
|
.. csv-table::
|
|
|
|
# :header: path,loudness
|
|
|
|
:header: path,loudness
|
|
|
|
|
|
|
|
|
|
|
|
# daps/produced/f1_script1_produced.wav,-16.299999237060547
|
|
|
|
daps/produced/f1_script1_produced.wav,-16.299999237060547
|
|
|
|
# daps/produced/f1_script2_produced.wav,-16.600000381469727
|
|
|
|
daps/produced/f1_script2_produced.wav,-16.600000381469727
|
|
|
|
# daps/produced/f1_script3_produced.wav,-17.299999237060547
|
|
|
|
daps/produced/f1_script3_produced.wav,-17.299999237060547
|
|
|
|
# daps/produced/f1_script4_produced.wav,-16.100000381469727
|
|
|
|
daps/produced/f1_script4_produced.wav,-16.100000381469727
|
|
|
|
# daps/produced/f1_script5_produced.wav,-16.700000762939453
|
|
|
|
daps/produced/f1_script5_produced.wav,-16.700000762939453
|
|
|
|
# daps/produced/f3_script1_produced.wav,-16.5
|
|
|
|
daps/produced/f3_script1_produced.wav,-16.5
|
|
|
|
|
|
|
|
|
|
|
|
# The ``AudioLoader`` will automatically load the loudness column into
|
|
|
|
The ``AudioLoader`` will automatically load the loudness column into
|
|
|
|
# the metadata of the signal.
|
|
|
|
the metadata of the signal.
|
|
|
|
|
|
|
|
|
|
|
|
# Uses :py:func:`audiotools.core.effects.EffectMixin.volume_change`.
|
|
|
|
Uses :py:func:`audiotools.core.effects.EffectMixin.volume_change`.
|
|
|
|
|
|
|
|
|
|
|
|
# Parameters
|
|
|
|
Parameters
|
|
|
|
# ----------
|
|
|
|
----------
|
|
|
|
# db : tuple, optional
|
|
|
|
db : tuple, optional
|
|
|
|
# dB to normalize signal to, by default ("const", -24)
|
|
|
|
dB to normalize signal to, by default ("const", -24)
|
|
|
|
# name : str, optional
|
|
|
|
name : str, optional
|
|
|
|
# Name of this transform, used to identify it in the dictionary
|
|
|
|
Name of this transform, used to identify it in the dictionary
|
|
|
|
# produced by ``self.instantiate``, by default None
|
|
|
|
produced by ``self.instantiate``, by default None
|
|
|
|
# prob : float, optional
|
|
|
|
prob : float, optional
|
|
|
|
# Probability of applying this transform, by default 1.0
|
|
|
|
Probability of applying this transform, by default 1.0
|
|
|
|
# """
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
|
|
# def __init__(
|
|
|
|
def __init__(
|
|
|
|
# self,
|
|
|
|
self,
|
|
|
|
# db: tuple = ("const", -24),
|
|
|
|
db: tuple=("const", -24),
|
|
|
|
# name: str = None,
|
|
|
|
name: str=None,
|
|
|
|
# prob: float = 1.0,
|
|
|
|
prob: float=1.0, ):
|
|
|
|
# ):
|
|
|
|
super().__init__(name=name, prob=prob)
|
|
|
|
# super().__init__(name=name, prob=prob)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# self.db = db
|
|
|
|
self.db = db
|
|
|
|
|
|
|
|
|
|
|
|
# def _instantiate(self, state: RandomState, signal: AudioSignal):
|
|
|
|
def _instantiate(self, state: RandomState, signal: AudioSignal):
|
|
|
|
# if "loudness" not in signal.metadata:
|
|
|
|
if "loudness" not in signal.metadata:
|
|
|
|
# db_change = 0.0
|
|
|
|
db_change = 0.0
|
|
|
|
# elif float(signal.metadata["loudness"]) == float("-inf"):
|
|
|
|
elif float(signal.metadata["loudness"]) == float("-inf"):
|
|
|
|
# db_change = 0.0
|
|
|
|
db_change = 0.0
|
|
|
|
# else:
|
|
|
|
else:
|
|
|
|
# db = util.sample_from_dist(self.db, state)
|
|
|
|
db = util.sample_from_dist(self.db, state)
|
|
|
|
# db_change = db - float(signal.metadata["loudness"])
|
|
|
|
db_change = db - float(signal.metadata["loudness"])
|
|
|
|
|
|
|
|
|
|
|
|
# return {"db": db_change}
|
|
|
|
return {"db": db_change}
|
|
|
|
|
|
|
|
|
|
|
|
# def _transform(self, signal, db):
|
|
|
|
def _transform(self, signal, db):
|
|
|
|
# return signal.volume_change(db)
|
|
|
|
return signal.volume_change(db)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class Silence(BaseTransform):
|
|
|
|
class Silence(BaseTransform):
|
|
|
@ -1266,94 +1266,95 @@ class HighPass(BaseTransform):
|
|
|
|
# def _transform(self, signal, corruption):
|
|
|
|
# def _transform(self, signal, corruption):
|
|
|
|
# return signal.shift_phase(shift=corruption)
|
|
|
|
# return signal.shift_phase(shift=corruption)
|
|
|
|
|
|
|
|
|
|
|
|
# class FrequencyMask(SpectralTransform):
|
|
|
|
|
|
|
|
# """Masks a band of frequencies at a center frequency
|
|
|
|
|
|
|
|
# from the audio.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# Uses :py:func:`audiotools.core.dsp.DSPMixin.mask_frequencies`.
|
|
|
|
class FrequencyMask(SpectralTransform):
|
|
|
|
|
|
|
|
"""Masks a band of frequencies at a center frequency
|
|
|
|
|
|
|
|
from the audio.
|
|
|
|
|
|
|
|
|
|
|
|
# Parameters
|
|
|
|
Uses :py:func:`audiotools.core.dsp.DSPMixin.mask_frequencies`.
|
|
|
|
# ----------
|
|
|
|
|
|
|
|
# f_center : tuple, optional
|
|
|
|
|
|
|
|
# Center frequency between 0.0 and 1.0 (Nyquist), by default ("uniform", 0.0, 1.0)
|
|
|
|
|
|
|
|
# f_width : tuple, optional
|
|
|
|
|
|
|
|
# Width of zero'd out band, by default ("const", 0.1)
|
|
|
|
|
|
|
|
# name : str, optional
|
|
|
|
|
|
|
|
# Name of this transform, used to identify it in the dictionary
|
|
|
|
|
|
|
|
# produced by ``self.instantiate``, by default None
|
|
|
|
|
|
|
|
# prob : float, optional
|
|
|
|
|
|
|
|
# Probability of applying this transform, by default 1.0
|
|
|
|
|
|
|
|
# """
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# def __init__(
|
|
|
|
Parameters
|
|
|
|
# self,
|
|
|
|
----------
|
|
|
|
# f_center: tuple = ("uniform", 0.0, 1.0),
|
|
|
|
f_center : tuple, optional
|
|
|
|
# f_width: tuple = ("const", 0.1),
|
|
|
|
Center frequency between 0.0 and 1.0 (Nyquist), by default ("uniform", 0.0, 1.0)
|
|
|
|
# name: str = None,
|
|
|
|
f_width : tuple, optional
|
|
|
|
# prob: float = 1,
|
|
|
|
Width of zero'd out band, by default ("const", 0.1)
|
|
|
|
# ):
|
|
|
|
name : str, optional
|
|
|
|
# super().__init__(name=name, prob=prob)
|
|
|
|
Name of this transform, used to identify it in the dictionary
|
|
|
|
# self.f_center = f_center
|
|
|
|
produced by ``self.instantiate``, by default None
|
|
|
|
# self.f_width = f_width
|
|
|
|
prob : float, optional
|
|
|
|
|
|
|
|
Probability of applying this transform, by default 1.0
|
|
|
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
|
|
# def _instantiate(self, state: RandomState, signal: AudioSignal):
|
|
|
|
def __init__(
|
|
|
|
# f_center = util.sample_from_dist(self.f_center, state)
|
|
|
|
self,
|
|
|
|
# f_width = util.sample_from_dist(self.f_width, state)
|
|
|
|
f_center: tuple=("uniform", 0.0, 1.0),
|
|
|
|
|
|
|
|
f_width: tuple=("const", 0.1),
|
|
|
|
|
|
|
|
name: str=None,
|
|
|
|
|
|
|
|
prob: float=1, ):
|
|
|
|
|
|
|
|
super().__init__(name=name, prob=prob)
|
|
|
|
|
|
|
|
self.f_center = f_center
|
|
|
|
|
|
|
|
self.f_width = f_width
|
|
|
|
|
|
|
|
|
|
|
|
# fmin = max(f_center - (f_width / 2), 0.0)
|
|
|
|
def _instantiate(self, state: RandomState, signal: AudioSignal):
|
|
|
|
# fmax = min(f_center + (f_width / 2), 1.0)
|
|
|
|
f_center = util.sample_from_dist(self.f_center, state)
|
|
|
|
|
|
|
|
f_width = util.sample_from_dist(self.f_width, state)
|
|
|
|
|
|
|
|
|
|
|
|
# fmin_hz = (signal.sample_rate / 2) * fmin
|
|
|
|
fmin = max(f_center - (f_width / 2), 0.0)
|
|
|
|
# fmax_hz = (signal.sample_rate / 2) * fmax
|
|
|
|
fmax = min(f_center + (f_width / 2), 1.0)
|
|
|
|
|
|
|
|
|
|
|
|
# return {"fmin_hz": fmin_hz, "fmax_hz": fmax_hz}
|
|
|
|
fmin_hz = (signal.sample_rate / 2) * fmin
|
|
|
|
|
|
|
|
fmax_hz = (signal.sample_rate / 2) * fmax
|
|
|
|
|
|
|
|
|
|
|
|
# def _transform(self, signal, fmin_hz: float, fmax_hz: float):
|
|
|
|
return {"fmin_hz": fmin_hz, "fmax_hz": fmax_hz}
|
|
|
|
# return signal.mask_frequencies(fmin_hz=fmin_hz, fmax_hz=fmax_hz)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# class TimeMask(SpectralTransform):
|
|
|
|
def _transform(self, signal, fmin_hz: float, fmax_hz: float):
|
|
|
|
# """Masks out contiguous time-steps from signal.
|
|
|
|
return signal.mask_frequencies(fmin_hz=fmin_hz, fmax_hz=fmax_hz)
|
|
|
|
|
|
|
|
|
|
|
|
# Uses :py:func:`audiotools.core.dsp.DSPMixin.mask_timesteps`.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# Parameters
|
|
|
|
class TimeMask(SpectralTransform):
|
|
|
|
# ----------
|
|
|
|
"""Masks out contiguous time-steps from signal.
|
|
|
|
# t_center : tuple, optional
|
|
|
|
|
|
|
|
# Center time in terms of 0.0 and 1.0 (duration of signal),
|
|
|
|
|
|
|
|
# by default ("uniform", 0.0, 1.0)
|
|
|
|
|
|
|
|
# t_width : tuple, optional
|
|
|
|
|
|
|
|
# Width of dropped out portion, by default ("const", 0.025)
|
|
|
|
|
|
|
|
# name : str, optional
|
|
|
|
|
|
|
|
# Name of this transform, used to identify it in the dictionary
|
|
|
|
|
|
|
|
# produced by ``self.instantiate``, by default None
|
|
|
|
|
|
|
|
# prob : float, optional
|
|
|
|
|
|
|
|
# Probability of applying this transform, by default 1.0
|
|
|
|
|
|
|
|
# """
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# def __init__(
|
|
|
|
Uses :py:func:`audiotools.core.dsp.DSPMixin.mask_timesteps`.
|
|
|
|
# self,
|
|
|
|
|
|
|
|
# t_center: tuple = ("uniform", 0.0, 1.0),
|
|
|
|
|
|
|
|
# t_width: tuple = ("const", 0.025),
|
|
|
|
|
|
|
|
# name: str = None,
|
|
|
|
|
|
|
|
# prob: float = 1,
|
|
|
|
|
|
|
|
# ):
|
|
|
|
|
|
|
|
# super().__init__(name=name, prob=prob)
|
|
|
|
|
|
|
|
# self.t_center = t_center
|
|
|
|
|
|
|
|
# self.t_width = t_width
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# def _instantiate(self, state: RandomState, signal: AudioSignal):
|
|
|
|
Parameters
|
|
|
|
# t_center = util.sample_from_dist(self.t_center, state)
|
|
|
|
----------
|
|
|
|
# t_width = util.sample_from_dist(self.t_width, state)
|
|
|
|
t_center : tuple, optional
|
|
|
|
|
|
|
|
Center time in terms of 0.0 and 1.0 (duration of signal),
|
|
|
|
|
|
|
|
by default ("uniform", 0.0, 1.0)
|
|
|
|
|
|
|
|
t_width : tuple, optional
|
|
|
|
|
|
|
|
Width of dropped out portion, by default ("const", 0.025)
|
|
|
|
|
|
|
|
name : str, optional
|
|
|
|
|
|
|
|
Name of this transform, used to identify it in the dictionary
|
|
|
|
|
|
|
|
produced by ``self.instantiate``, by default None
|
|
|
|
|
|
|
|
prob : float, optional
|
|
|
|
|
|
|
|
Probability of applying this transform, by default 1.0
|
|
|
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def __init__(
|
|
|
|
|
|
|
|
self,
|
|
|
|
|
|
|
|
t_center: tuple=("uniform", 0.0, 1.0),
|
|
|
|
|
|
|
|
t_width: tuple=("const", 0.025),
|
|
|
|
|
|
|
|
name: str=None,
|
|
|
|
|
|
|
|
prob: float=1, ):
|
|
|
|
|
|
|
|
super().__init__(name=name, prob=prob)
|
|
|
|
|
|
|
|
self.t_center = t_center
|
|
|
|
|
|
|
|
self.t_width = t_width
|
|
|
|
|
|
|
|
|
|
|
|
# tmin = max(t_center - (t_width / 2), 0.0)
|
|
|
|
def _instantiate(self, state: RandomState, signal: AudioSignal):
|
|
|
|
# tmax = min(t_center + (t_width / 2), 1.0)
|
|
|
|
t_center = util.sample_from_dist(self.t_center, state)
|
|
|
|
|
|
|
|
t_width = util.sample_from_dist(self.t_width, state)
|
|
|
|
|
|
|
|
|
|
|
|
# tmin_s = signal.signal_duration * tmin
|
|
|
|
tmin = max(t_center - (t_width / 2), 0.0)
|
|
|
|
# tmax_s = signal.signal_duration * tmax
|
|
|
|
tmax = min(t_center + (t_width / 2), 1.0)
|
|
|
|
# return {"tmin_s": tmin_s, "tmax_s": tmax_s}
|
|
|
|
|
|
|
|
|
|
|
|
tmin_s = signal.signal_duration * tmin
|
|
|
|
|
|
|
|
tmax_s = signal.signal_duration * tmax
|
|
|
|
|
|
|
|
return {"tmin_s": tmin_s, "tmax_s": tmax_s}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _transform(self, signal, tmin_s: float, tmax_s: float):
|
|
|
|
|
|
|
|
return signal.mask_timesteps(tmin_s=tmin_s, tmax_s=tmax_s)
|
|
|
|
|
|
|
|
|
|
|
|
# def _transform(self, signal, tmin_s: float, tmax_s: float):
|
|
|
|
|
|
|
|
# return signal.mask_timesteps(tmin_s=tmin_s, tmax_s=tmax_s)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# class MaskLowMagnitudes(SpectralTransform):
|
|
|
|
# class MaskLowMagnitudes(SpectralTransform):
|
|
|
|
# """Masks low magnitude regions out of signal.
|
|
|
|
# """Masks low magnitude regions out of signal.
|
|
|
@ -1387,55 +1388,55 @@ class HighPass(BaseTransform):
|
|
|
|
# def _transform(self, signal, db_cutoff: float):
|
|
|
|
# def _transform(self, signal, db_cutoff: float):
|
|
|
|
# return signal.mask_low_magnitudes(db_cutoff)
|
|
|
|
# return signal.mask_low_magnitudes(db_cutoff)
|
|
|
|
|
|
|
|
|
|
|
|
# class Smoothing(BaseTransform):
|
|
|
|
|
|
|
|
# """Convolves the signal with a smoothing window.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# Uses :py:func:`audiotools.core.effects.EffectMixin.convolve`.
|
|
|
|
class Smoothing(BaseTransform):
|
|
|
|
|
|
|
|
"""Convolves the signal with a smoothing window.
|
|
|
|
|
|
|
|
|
|
|
|
# Parameters
|
|
|
|
Uses :py:func:`audiotools.core.effects.EffectMixin.convolve`.
|
|
|
|
# ----------
|
|
|
|
|
|
|
|
# window_type : tuple, optional
|
|
|
|
|
|
|
|
# Type of window to use, by default ("const", "average")
|
|
|
|
|
|
|
|
# window_length : tuple, optional
|
|
|
|
|
|
|
|
# Length of smoothing window, by
|
|
|
|
|
|
|
|
# default ("choice", [8, 16, 32, 64, 128, 256, 512])
|
|
|
|
|
|
|
|
# name : str, optional
|
|
|
|
|
|
|
|
# Name of this transform, used to identify it in the dictionary
|
|
|
|
|
|
|
|
# produced by ``self.instantiate``, by default None
|
|
|
|
|
|
|
|
# prob : float, optional
|
|
|
|
|
|
|
|
# Probability of applying this transform, by default 1.0
|
|
|
|
|
|
|
|
# """
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# def __init__(
|
|
|
|
Parameters
|
|
|
|
# self,
|
|
|
|
----------
|
|
|
|
# window_type: tuple = ("const", "average"),
|
|
|
|
window_type : tuple, optional
|
|
|
|
# window_length: tuple = ("choice", [8, 16, 32, 64, 128, 256, 512]),
|
|
|
|
Type of window to use, by default ("const", "average")
|
|
|
|
# name: str = None,
|
|
|
|
window_length : tuple, optional
|
|
|
|
# prob: float = 1,
|
|
|
|
Length of smoothing window, by
|
|
|
|
# ):
|
|
|
|
default ("choice", [8, 16, 32, 64, 128, 256, 512])
|
|
|
|
# super().__init__(name=name, prob=prob)
|
|
|
|
name : str, optional
|
|
|
|
# self.window_type = window_type
|
|
|
|
Name of this transform, used to identify it in the dictionary
|
|
|
|
# self.window_length = window_length
|
|
|
|
produced by ``self.instantiate``, by default None
|
|
|
|
|
|
|
|
prob : float, optional
|
|
|
|
|
|
|
|
Probability of applying this transform, by default 1.0
|
|
|
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
|
|
# def _instantiate(self, state: RandomState, signal: AudioSignal = None):
|
|
|
|
def __init__(
|
|
|
|
# window_type = util.sample_from_dist(self.window_type, state)
|
|
|
|
self,
|
|
|
|
# window_length = util.sample_from_dist(self.window_length, state)
|
|
|
|
window_type: tuple=("const", "average"),
|
|
|
|
# window = signal.get_window(
|
|
|
|
window_length: tuple=("choice", [8, 16, 32, 64, 128, 256, 512]),
|
|
|
|
# window_type=window_type, window_length=window_length, device="cpu"
|
|
|
|
name: str=None,
|
|
|
|
# )
|
|
|
|
prob: float=1, ):
|
|
|
|
# return {"window": AudioSignal(window, signal.sample_rate)}
|
|
|
|
super().__init__(name=name, prob=prob)
|
|
|
|
|
|
|
|
self.window_type = window_type
|
|
|
|
|
|
|
|
self.window_length = window_length
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _instantiate(self, state: RandomState, signal: AudioSignal=None):
|
|
|
|
|
|
|
|
window_type = util.sample_from_dist(self.window_type, state)
|
|
|
|
|
|
|
|
window_length = util.sample_from_dist(self.window_length, state)
|
|
|
|
|
|
|
|
window = signal.get_window(
|
|
|
|
|
|
|
|
window_type=window_type, window_length=window_length, device="cpu")
|
|
|
|
|
|
|
|
return {"window": AudioSignal(window, signal.sample_rate)}
|
|
|
|
|
|
|
|
|
|
|
|
# def _transform(self, signal, window):
|
|
|
|
def _transform(self, signal, window):
|
|
|
|
# sscale = signal.audio_data.abs().max(dim=-1, keepdim=True).values
|
|
|
|
sscale = signal.audio_data.abs().max(axis=-1, keepdim=True)
|
|
|
|
# sscale[sscale == 0.0] = 1.0
|
|
|
|
sscale[sscale == 0.0] = 1.0
|
|
|
|
|
|
|
|
|
|
|
|
# out = signal.convolve(window)
|
|
|
|
out = signal.convolve(window)
|
|
|
|
|
|
|
|
|
|
|
|
# oscale = out.audio_data.abs().max(dim=-1, keepdim=True).values
|
|
|
|
oscale = out.audio_data.abs().max(axis=-1, keepdim=True)
|
|
|
|
# oscale[oscale == 0.0] = 1.0
|
|
|
|
oscale[oscale == 0.0] = 1.0
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
out = out * (sscale / oscale)
|
|
|
|
|
|
|
|
return out
|
|
|
|
|
|
|
|
|
|
|
|
# out = out * (sscale / oscale)
|
|
|
|
|
|
|
|
# return out
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# class TimeNoise(TimeMask):
|
|
|
|
# class TimeNoise(TimeMask):
|
|
|
|
# """Similar to :py:func:`audiotools.data.transforms.TimeMask`, but
|
|
|
|
# """Similar to :py:func:`audiotools.data.transforms.TimeMask`, but
|
|
|
@ -1478,45 +1479,51 @@ class HighPass(BaseTransform):
|
|
|
|
# signal.phase = phase
|
|
|
|
# signal.phase = phase
|
|
|
|
# return signal
|
|
|
|
# return signal
|
|
|
|
|
|
|
|
|
|
|
|
# class FrequencyNoise(FrequencyMask):
|
|
|
|
|
|
|
|
# """Similar to :py:func:`audiotools.data.transforms.FrequencyMask`, but
|
|
|
|
|
|
|
|
# replaces with noise instead of zeros.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# Parameters
|
|
|
|
class FrequencyNoise(FrequencyMask):
|
|
|
|
# ----------
|
|
|
|
"""Similar to :py:func:`audiotools.data.transforms.FrequencyMask`, but
|
|
|
|
# f_center : tuple, optional
|
|
|
|
replaces with noise instead of zeros.
|
|
|
|
# Center frequency between 0.0 and 1.0 (Nyquist), by default ("uniform", 0.0, 1.0)
|
|
|
|
|
|
|
|
# f_width : tuple, optional
|
|
|
|
|
|
|
|
# Width of zero'd out band, by default ("const", 0.1)
|
|
|
|
|
|
|
|
# name : str, optional
|
|
|
|
|
|
|
|
# Name of this transform, used to identify it in the dictionary
|
|
|
|
|
|
|
|
# produced by ``self.instantiate``, by default None
|
|
|
|
|
|
|
|
# prob : float, optional
|
|
|
|
|
|
|
|
# Probability of applying this transform, by default 1.0
|
|
|
|
|
|
|
|
# """
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# def __init__(
|
|
|
|
Parameters
|
|
|
|
# self,
|
|
|
|
----------
|
|
|
|
# f_center: tuple = ("uniform", 0.0, 1.0),
|
|
|
|
f_center : tuple, optional
|
|
|
|
# f_width: tuple = ("const", 0.1),
|
|
|
|
Center frequency between 0.0 and 1.0 (Nyquist), by default ("uniform", 0.0, 1.0)
|
|
|
|
# name: str = None,
|
|
|
|
f_width : tuple, optional
|
|
|
|
# prob: float = 1,
|
|
|
|
Width of zero'd out band, by default ("const", 0.1)
|
|
|
|
# ):
|
|
|
|
name : str, optional
|
|
|
|
# super().__init__(f_center=f_center, f_width=f_width, name=name, prob=prob)
|
|
|
|
Name of this transform, used to identify it in the dictionary
|
|
|
|
|
|
|
|
produced by ``self.instantiate``, by default None
|
|
|
|
|
|
|
|
prob : float, optional
|
|
|
|
|
|
|
|
Probability of applying this transform, by default 1.0
|
|
|
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
|
|
# def _transform(self, signal, fmin_hz: float, fmax_hz: float):
|
|
|
|
def __init__(
|
|
|
|
# signal = signal.mask_frequencies(fmin_hz=fmin_hz, fmax_hz=fmax_hz)
|
|
|
|
self,
|
|
|
|
# mag, phase = signal.magnitude, signal.phase
|
|
|
|
f_center: tuple=("uniform", 0.0, 1.0),
|
|
|
|
|
|
|
|
f_width: tuple=("const", 0.1),
|
|
|
|
|
|
|
|
name: str=None,
|
|
|
|
|
|
|
|
prob: float=1, ):
|
|
|
|
|
|
|
|
super().__init__(
|
|
|
|
|
|
|
|
f_center=f_center, f_width=f_width, name=name, prob=prob)
|
|
|
|
|
|
|
|
|
|
|
|
# mag_r, phase_r = torch.randn_like(mag), torch.randn_like(phase)
|
|
|
|
def _transform(self, signal, fmin_hz: float, fmax_hz: float):
|
|
|
|
# mask = (mag == 0.0) * (phase == 0.0)
|
|
|
|
signal = signal.mask_frequencies(fmin_hz=fmin_hz, fmax_hz=fmax_hz)
|
|
|
|
|
|
|
|
mag, phase = signal.magnitude, signal.phase
|
|
|
|
|
|
|
|
|
|
|
|
# mag[mask] = mag_r[mask]
|
|
|
|
mag_r, phase_r = paddle.randn(
|
|
|
|
# phase[mask] = phase_r[mask]
|
|
|
|
shape=mag.shape, dtype=mag.dtype), paddle.randn(
|
|
|
|
|
|
|
|
shape=phase.shape, dtype=phase.dtype)
|
|
|
|
|
|
|
|
mask = (mag == 0.0) * (phase == 0.0)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# mag[mask] = mag_r[mask]
|
|
|
|
|
|
|
|
# phase[mask] = phase_r[mask]
|
|
|
|
|
|
|
|
mag = paddle.where(mask, mag_r, mag)
|
|
|
|
|
|
|
|
phase = paddle.where(mask, phase_r, phase)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
signal.magnitude = mag
|
|
|
|
|
|
|
|
signal.phase = phase
|
|
|
|
|
|
|
|
return signal
|
|
|
|
|
|
|
|
|
|
|
|
# signal.magnitude = mag
|
|
|
|
|
|
|
|
# signal.phase = phase
|
|
|
|
|
|
|
|
# return signal
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# class SpectralDenoising(Equalizer):
|
|
|
|
# class SpectralDenoising(Equalizer):
|
|
|
|
# """Applies denoising algorithm detailed in
|
|
|
|
# """Applies denoising algorithm detailed in
|
|
|
|