|
|
@ -608,108 +608,6 @@ class Equalizer(BaseTransform):
|
|
|
|
return signal.equalizer(eq)
|
|
|
|
return signal.equalizer(eq)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# class Quantization(BaseTransform):
|
|
|
|
|
|
|
|
# """Applies quantization to the input waveform. Corresponds
|
|
|
|
|
|
|
|
# to :py:func:`audiotools.core.effects.EffectMixin.quantization`.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# Parameters
|
|
|
|
|
|
|
|
# ----------
|
|
|
|
|
|
|
|
# channels : tuple, optional
|
|
|
|
|
|
|
|
# Number of evenly spaced quantization channels to quantize
|
|
|
|
|
|
|
|
# to, by default ("choice", [8, 32, 128, 256, 1024])
|
|
|
|
|
|
|
|
# name : str, optional
|
|
|
|
|
|
|
|
# Name of this transform, used to identify it in the dictionary
|
|
|
|
|
|
|
|
# produced by ``self.instantiate``, by default None
|
|
|
|
|
|
|
|
# prob : float, optional
|
|
|
|
|
|
|
|
# Probability of applying this transform, by default 1.0
|
|
|
|
|
|
|
|
# """
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# def __init__(
|
|
|
|
|
|
|
|
# self,
|
|
|
|
|
|
|
|
# channels: tuple = ("choice", [8, 32, 128, 256, 1024]),
|
|
|
|
|
|
|
|
# name: str = None,
|
|
|
|
|
|
|
|
# prob: float = 1.0,
|
|
|
|
|
|
|
|
# ):
|
|
|
|
|
|
|
|
# super().__init__(name=name, prob=prob)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# self.channels = channels
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# def _instantiate(self, state: RandomState):
|
|
|
|
|
|
|
|
# return {"channels": util.sample_from_dist(self.channels, state)}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# def _transform(self, signal, channels):
|
|
|
|
|
|
|
|
# return signal.quantization(channels)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# class MuLawQuantization(BaseTransform):
|
|
|
|
|
|
|
|
# """Applies mu-law quantization to the input waveform. Corresponds
|
|
|
|
|
|
|
|
# to :py:func:`audiotools.core.effects.EffectMixin.mulaw_quantization`.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# Parameters
|
|
|
|
|
|
|
|
# ----------
|
|
|
|
|
|
|
|
# channels : tuple, optional
|
|
|
|
|
|
|
|
# Number of mu-law spaced quantization channels to quantize
|
|
|
|
|
|
|
|
# to, by default ("choice", [8, 32, 128, 256, 1024])
|
|
|
|
|
|
|
|
# name : str, optional
|
|
|
|
|
|
|
|
# Name of this transform, used to identify it in the dictionary
|
|
|
|
|
|
|
|
# produced by ``self.instantiate``, by default None
|
|
|
|
|
|
|
|
# prob : float, optional
|
|
|
|
|
|
|
|
# Probability of applying this transform, by default 1.0
|
|
|
|
|
|
|
|
# """
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# def __init__(
|
|
|
|
|
|
|
|
# self,
|
|
|
|
|
|
|
|
# channels: tuple = ("choice", [8, 32, 128, 256, 1024]),
|
|
|
|
|
|
|
|
# name: str = None,
|
|
|
|
|
|
|
|
# prob: float = 1.0,
|
|
|
|
|
|
|
|
# ):
|
|
|
|
|
|
|
|
# super().__init__(name=name, prob=prob)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# self.channels = channels
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# def _instantiate(self, state: RandomState):
|
|
|
|
|
|
|
|
# return {"channels": util.sample_from_dist(self.channels, state)}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# def _transform(self, signal, channels):
|
|
|
|
|
|
|
|
# return signal.mulaw_quantization(channels)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# class NoiseFloor(BaseTransform):
|
|
|
|
|
|
|
|
# """Adds a noise floor of Gaussian noise to the signal at a specified
|
|
|
|
|
|
|
|
# dB.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# Parameters
|
|
|
|
|
|
|
|
# ----------
|
|
|
|
|
|
|
|
# db : tuple, optional
|
|
|
|
|
|
|
|
# Level of noise to add to signal, by default ("const", -50.0)
|
|
|
|
|
|
|
|
# name : str, optional
|
|
|
|
|
|
|
|
# Name of this transform, used to identify it in the dictionary
|
|
|
|
|
|
|
|
# produced by ``self.instantiate``, by default None
|
|
|
|
|
|
|
|
# prob : float, optional
|
|
|
|
|
|
|
|
# Probability of applying this transform, by default 1.0
|
|
|
|
|
|
|
|
# """
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# def __init__(
|
|
|
|
|
|
|
|
# self,
|
|
|
|
|
|
|
|
# db: tuple = ("const", -50.0),
|
|
|
|
|
|
|
|
# name: str = None,
|
|
|
|
|
|
|
|
# prob: float = 1.0,
|
|
|
|
|
|
|
|
# ):
|
|
|
|
|
|
|
|
# super().__init__(name=name, prob=prob)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# self.db = db
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# def _instantiate(self, state: RandomState, signal: AudioSignal):
|
|
|
|
|
|
|
|
# db = util.sample_from_dist(self.db, state)
|
|
|
|
|
|
|
|
# audio_data = state.randn(signal.num_channels, signal.signal_length)
|
|
|
|
|
|
|
|
# nz_signal = AudioSignal(audio_data, signal.sample_rate)
|
|
|
|
|
|
|
|
# nz_signal.normalize(db)
|
|
|
|
|
|
|
|
# return {"nz_signal": nz_signal}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# def _transform(self, signal, nz_signal):
|
|
|
|
|
|
|
|
# # Clone bg_signal so that transform can be repeatedly applied
|
|
|
|
|
|
|
|
# # to different signals with the same effect.
|
|
|
|
|
|
|
|
# return signal + nz_signal
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class BackgroundNoise(BaseTransform):
|
|
|
|
class BackgroundNoise(BaseTransform):
|
|
|
|
"""Adds background noise from audio specified by a set of CSV files.
|
|
|
|
"""Adds background noise from audio specified by a set of CSV files.
|
|
|
|
A valid CSV file looks like, and is typically generated by
|
|
|
|
A valid CSV file looks like, and is typically generated by
|
|
|
@ -796,68 +694,6 @@ class BackgroundNoise(BaseTransform):
|
|
|
|
return signal.mix(bg_signal.clone(), snr, eq)
|
|
|
|
return signal.mix(bg_signal.clone(), snr, eq)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# class CrossTalk(BaseTransform):
|
|
|
|
|
|
|
|
# """Adds crosstalk between speakers, whose audio is drawn from a CSV file
|
|
|
|
|
|
|
|
# that was produced via :py:func:`audiotools.data.preprocess.create_csv`.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# This transform calls :py:func:`audiotools.core.effects.EffectMixin.mix`
|
|
|
|
|
|
|
|
# under the hood.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# Parameters
|
|
|
|
|
|
|
|
# ----------
|
|
|
|
|
|
|
|
# snr : tuple, optional
|
|
|
|
|
|
|
|
# How loud cross-talk speaker is relative to original signal in dB,
|
|
|
|
|
|
|
|
# by default ("uniform", 0.0, 10.0)
|
|
|
|
|
|
|
|
# sources : List[str], optional
|
|
|
|
|
|
|
|
# Sources containing folders, or CSVs with paths to audio files,
|
|
|
|
|
|
|
|
# by default None
|
|
|
|
|
|
|
|
# weights : List[float], optional
|
|
|
|
|
|
|
|
# Weights to sample audio files from each source, by default None
|
|
|
|
|
|
|
|
# name : str, optional
|
|
|
|
|
|
|
|
# Name of this transform, used to identify it in the dictionary
|
|
|
|
|
|
|
|
# produced by ``self.instantiate``, by default None
|
|
|
|
|
|
|
|
# prob : float, optional
|
|
|
|
|
|
|
|
# Probability of applying this transform, by default 1.0
|
|
|
|
|
|
|
|
# loudness_cutoff : float, optional
|
|
|
|
|
|
|
|
# Loudness cutoff when loading from audio files, by default -40
|
|
|
|
|
|
|
|
# """
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# def __init__(
|
|
|
|
|
|
|
|
# self,
|
|
|
|
|
|
|
|
# snr: tuple = ("uniform", 0.0, 10.0),
|
|
|
|
|
|
|
|
# sources: List[str] = None,
|
|
|
|
|
|
|
|
# weights: List[float] = None,
|
|
|
|
|
|
|
|
# name: str = None,
|
|
|
|
|
|
|
|
# prob: float = 1.0,
|
|
|
|
|
|
|
|
# loudness_cutoff: float = -40,
|
|
|
|
|
|
|
|
# ):
|
|
|
|
|
|
|
|
# super().__init__(name=name, prob=prob)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# self.snr = snr
|
|
|
|
|
|
|
|
# self.loader = AudioLoader(sources, weights)
|
|
|
|
|
|
|
|
# self.loudness_cutoff = loudness_cutoff
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# def _instantiate(self, state: RandomState, signal: AudioSignal):
|
|
|
|
|
|
|
|
# snr = util.sample_from_dist(self.snr, state)
|
|
|
|
|
|
|
|
# crosstalk_signal = self.loader(
|
|
|
|
|
|
|
|
# state,
|
|
|
|
|
|
|
|
# signal.sample_rate,
|
|
|
|
|
|
|
|
# duration=signal.signal_duration,
|
|
|
|
|
|
|
|
# loudness_cutoff=self.loudness_cutoff,
|
|
|
|
|
|
|
|
# num_channels=signal.num_channels,
|
|
|
|
|
|
|
|
# )["signal"]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# return {"crosstalk_signal": crosstalk_signal, "snr": snr}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# def _transform(self, signal, crosstalk_signal, snr):
|
|
|
|
|
|
|
|
# # Clone bg_signal so that transform can be repeatedly applied
|
|
|
|
|
|
|
|
# # to different signals with the same effect.
|
|
|
|
|
|
|
|
# loudness = signal.loudness()
|
|
|
|
|
|
|
|
# mix = signal.mix(crosstalk_signal.clone(), snr)
|
|
|
|
|
|
|
|
# mix.normalize(loudness)
|
|
|
|
|
|
|
|
# return mix
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class RoomImpulseResponse(BaseTransform):
|
|
|
|
class RoomImpulseResponse(BaseTransform):
|
|
|
|
"""Convolves signal with a room impulse response, at a specified
|
|
|
|
"""Convolves signal with a room impulse response, at a specified
|
|
|
|
direct-to-reverberant ratio, with equalization applied. Room impulse
|
|
|
|
direct-to-reverberant ratio, with equalization applied. Room impulse
|
|
|
@ -942,38 +778,6 @@ class RoomImpulseResponse(BaseTransform):
|
|
|
|
use_original_phase=self.use_original_phase)
|
|
|
|
use_original_phase=self.use_original_phase)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# class VolumeChange(BaseTransform):
|
|
|
|
|
|
|
|
# """Changes the volume of the input signal.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# Uses :py:func:`audiotools.core.effects.EffectMixin.volume_change`.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# Parameters
|
|
|
|
|
|
|
|
# ----------
|
|
|
|
|
|
|
|
# db : tuple, optional
|
|
|
|
|
|
|
|
# Change in volume in decibels, by default ("uniform", -12.0, 0.0)
|
|
|
|
|
|
|
|
# name : str, optional
|
|
|
|
|
|
|
|
# Name of this transform, used to identify it in the dictionary
|
|
|
|
|
|
|
|
# produced by ``self.instantiate``, by default None
|
|
|
|
|
|
|
|
# prob : float, optional
|
|
|
|
|
|
|
|
# Probability of applying this transform, by default 1.0
|
|
|
|
|
|
|
|
# """
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# def __init__(
|
|
|
|
|
|
|
|
# self,
|
|
|
|
|
|
|
|
# db: tuple = ("uniform", -12.0, 0.0),
|
|
|
|
|
|
|
|
# name: str = None,
|
|
|
|
|
|
|
|
# prob: float = 1.0,
|
|
|
|
|
|
|
|
# ):
|
|
|
|
|
|
|
|
# super().__init__(name=name, prob=prob)
|
|
|
|
|
|
|
|
# self.db = db
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# def _instantiate(self, state: RandomState):
|
|
|
|
|
|
|
|
# return {"db": util.sample_from_dist(self.db, state)}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# def _transform(self, signal, db):
|
|
|
|
|
|
|
|
# return signal.volume_change(db)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class VolumeNorm(BaseTransform):
|
|
|
|
class VolumeNorm(BaseTransform):
|
|
|
|
"""Normalizes the volume of the excerpt to a specified decibel.
|
|
|
|
"""Normalizes the volume of the excerpt to a specified decibel.
|
|
|
|
|
|
|
|
|
|
|
@ -1169,111 +973,6 @@ class HighPass(BaseTransform):
|
|
|
|
return signal.high_pass(cutoff, zeros=self.zeros)
|
|
|
|
return signal.high_pass(cutoff, zeros=self.zeros)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# class RescaleAudio(BaseTransform):
|
|
|
|
|
|
|
|
# """Rescales the audio so it is in between ``-val`` and ``val``
|
|
|
|
|
|
|
|
# only if the original audio exceeds those bounds. Useful if
|
|
|
|
|
|
|
|
# transforms have caused the audio to clip.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# Uses :py:func:`audiotools.core.effects.EffectMixin.ensure_max_of_audio`.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# Parameters
|
|
|
|
|
|
|
|
# ----------
|
|
|
|
|
|
|
|
# val : float, optional
|
|
|
|
|
|
|
|
# Max absolute value of signal, by default 1.0
|
|
|
|
|
|
|
|
# name : str, optional
|
|
|
|
|
|
|
|
# Name of this transform, used to identify it in the dictionary
|
|
|
|
|
|
|
|
# produced by ``self.instantiate``, by default None
|
|
|
|
|
|
|
|
# prob : float, optional
|
|
|
|
|
|
|
|
# Probability of applying this transform, by default 1.0
|
|
|
|
|
|
|
|
# """
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# def __init__(self, val: float = 1.0, name: str = None, prob: float = 1):
|
|
|
|
|
|
|
|
# super().__init__(name=name, prob=prob)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# self.val = val
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# def _transform(self, signal):
|
|
|
|
|
|
|
|
# return signal.ensure_max_of_audio(self.val)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# class ShiftPhase(SpectralTransform):
|
|
|
|
|
|
|
|
# """Shifts the phase of the audio.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# Uses :py:func:`audiotools.core.dsp.DSPMixin.shift)phase`.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# Parameters
|
|
|
|
|
|
|
|
# ----------
|
|
|
|
|
|
|
|
# shift : tuple, optional
|
|
|
|
|
|
|
|
# How much to shift phase by, by default ("uniform", -np.pi, np.pi)
|
|
|
|
|
|
|
|
# name : str, optional
|
|
|
|
|
|
|
|
# Name of this transform, used to identify it in the dictionary
|
|
|
|
|
|
|
|
# produced by ``self.instantiate``, by default None
|
|
|
|
|
|
|
|
# prob : float, optional
|
|
|
|
|
|
|
|
# Probability of applying this transform, by default 1.0
|
|
|
|
|
|
|
|
# """
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# def __init__(
|
|
|
|
|
|
|
|
# self,
|
|
|
|
|
|
|
|
# shift: tuple = ("uniform", -np.pi, np.pi),
|
|
|
|
|
|
|
|
# name: str = None,
|
|
|
|
|
|
|
|
# prob: float = 1,
|
|
|
|
|
|
|
|
# ):
|
|
|
|
|
|
|
|
# super().__init__(name=name, prob=prob)
|
|
|
|
|
|
|
|
# self.shift = shift
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# def _instantiate(self, state: RandomState):
|
|
|
|
|
|
|
|
# return {"shift": util.sample_from_dist(self.shift, state)}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# def _transform(self, signal, shift):
|
|
|
|
|
|
|
|
# return signal.shift_phase(shift)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# class InvertPhase(ShiftPhase):
|
|
|
|
|
|
|
|
# """Inverts the phase of the audio.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# Uses :py:func:`audiotools.core.dsp.DSPMixin.shift_phase`.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# Parameters
|
|
|
|
|
|
|
|
# ----------
|
|
|
|
|
|
|
|
# name : str, optional
|
|
|
|
|
|
|
|
# Name of this transform, used to identify it in the dictionary
|
|
|
|
|
|
|
|
# produced by ``self.instantiate``, by default None
|
|
|
|
|
|
|
|
# prob : float, optional
|
|
|
|
|
|
|
|
# Probability of applying this transform, by default 1.0
|
|
|
|
|
|
|
|
# """
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# def __init__(self, name: str = None, prob: float = 1):
|
|
|
|
|
|
|
|
# super().__init__(shift=("const", np.pi), name=name, prob=prob)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# class CorruptPhase(SpectralTransform):
|
|
|
|
|
|
|
|
# """Corrupts the phase of the audio.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# Uses :py:func:`audiotools.core.dsp.DSPMixin.corrupt_phase`.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# Parameters
|
|
|
|
|
|
|
|
# ----------
|
|
|
|
|
|
|
|
# scale : tuple, optional
|
|
|
|
|
|
|
|
# How much to corrupt phase by, by default ("uniform", 0, np.pi)
|
|
|
|
|
|
|
|
# name : str, optional
|
|
|
|
|
|
|
|
# Name of this transform, used to identify it in the dictionary
|
|
|
|
|
|
|
|
# produced by ``self.instantiate``, by default None
|
|
|
|
|
|
|
|
# prob : float, optional
|
|
|
|
|
|
|
|
# Probability of applying this transform, by default 1.0
|
|
|
|
|
|
|
|
# """
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# def __init__(
|
|
|
|
|
|
|
|
# self, scale: tuple = ("uniform", 0, np.pi), name: str = None, prob: float = 1
|
|
|
|
|
|
|
|
# ):
|
|
|
|
|
|
|
|
# super().__init__(name=name, prob=prob)
|
|
|
|
|
|
|
|
# self.scale = scale
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# def _instantiate(self, state: RandomState, signal: AudioSignal = None):
|
|
|
|
|
|
|
|
# scale = util.sample_from_dist(self.scale, state)
|
|
|
|
|
|
|
|
# corruption = state.normal(scale=scale, size=signal.phase.shape[1:])
|
|
|
|
|
|
|
|
# return {"corruption": corruption.astype("float32")}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# def _transform(self, signal, corruption):
|
|
|
|
|
|
|
|
# return signal.shift_phase(shift=corruption)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class FrequencyMask(SpectralTransform):
|
|
|
|
class FrequencyMask(SpectralTransform):
|
|
|
|
"""Masks a band of frequencies at a center frequency
|
|
|
|
"""Masks a band of frequencies at a center frequency
|
|
|
|
from the audio.
|
|
|
|
from the audio.
|
|
|
@ -1363,39 +1062,6 @@ class TimeMask(SpectralTransform):
|
|
|
|
return signal.mask_timesteps(tmin_s=tmin_s, tmax_s=tmax_s)
|
|
|
|
return signal.mask_timesteps(tmin_s=tmin_s, tmax_s=tmax_s)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# class MaskLowMagnitudes(SpectralTransform):
|
|
|
|
|
|
|
|
# """Masks low magnitude regions out of signal.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# Uses :py:func:`audiotools.core.dsp.DSPMixin.mask_low_magnitudes`.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# Parameters
|
|
|
|
|
|
|
|
# ----------
|
|
|
|
|
|
|
|
# db_cutoff : tuple, optional
|
|
|
|
|
|
|
|
# Decibel value for which things below it will be masked away,
|
|
|
|
|
|
|
|
# by default ("uniform", -10, 10)
|
|
|
|
|
|
|
|
# name : str, optional
|
|
|
|
|
|
|
|
# Name of this transform, used to identify it in the dictionary
|
|
|
|
|
|
|
|
# produced by ``self.instantiate``, by default None
|
|
|
|
|
|
|
|
# prob : float, optional
|
|
|
|
|
|
|
|
# Probability of applying this transform, by default 1.0
|
|
|
|
|
|
|
|
# """
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# def __init__(
|
|
|
|
|
|
|
|
# self,
|
|
|
|
|
|
|
|
# db_cutoff: tuple = ("uniform", -10, 10),
|
|
|
|
|
|
|
|
# name: str = None,
|
|
|
|
|
|
|
|
# prob: float = 1,
|
|
|
|
|
|
|
|
# ):
|
|
|
|
|
|
|
|
# super().__init__(name=name, prob=prob)
|
|
|
|
|
|
|
|
# self.db_cutoff = db_cutoff
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# def _instantiate(self, state: RandomState, signal: AudioSignal = None):
|
|
|
|
|
|
|
|
# return {"db_cutoff": util.sample_from_dist(self.db_cutoff, state)}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# def _transform(self, signal, db_cutoff: float):
|
|
|
|
|
|
|
|
# return signal.mask_low_magnitudes(db_cutoff)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class Smoothing(BaseTransform):
|
|
|
|
class Smoothing(BaseTransform):
|
|
|
|
"""Convolves the signal with a smoothing window.
|
|
|
|
"""Convolves the signal with a smoothing window.
|
|
|
|
|
|
|
|
|
|
|
@ -1445,48 +1111,6 @@ class Smoothing(BaseTransform):
|
|
|
|
return out
|
|
|
|
return out
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# class TimeNoise(TimeMask):
|
|
|
|
|
|
|
|
# """Similar to :py:func:`audiotools.data.transforms.TimeMask`, but
|
|
|
|
|
|
|
|
# replaces with noise instead of zeros.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# Parameters
|
|
|
|
|
|
|
|
# ----------
|
|
|
|
|
|
|
|
# t_center : tuple, optional
|
|
|
|
|
|
|
|
# Center time in terms of 0.0 and 1.0 (duration of signal),
|
|
|
|
|
|
|
|
# by default ("uniform", 0.0, 1.0)
|
|
|
|
|
|
|
|
# t_width : tuple, optional
|
|
|
|
|
|
|
|
# Width of dropped out portion, by default ("const", 0.025)
|
|
|
|
|
|
|
|
# name : str, optional
|
|
|
|
|
|
|
|
# Name of this transform, used to identify it in the dictionary
|
|
|
|
|
|
|
|
# produced by ``self.instantiate``, by default None
|
|
|
|
|
|
|
|
# prob : float, optional
|
|
|
|
|
|
|
|
# Probability of applying this transform, by default 1.0
|
|
|
|
|
|
|
|
# """
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# def __init__(
|
|
|
|
|
|
|
|
# self,
|
|
|
|
|
|
|
|
# t_center: tuple = ("uniform", 0.0, 1.0),
|
|
|
|
|
|
|
|
# t_width: tuple = ("const", 0.025),
|
|
|
|
|
|
|
|
# name: str = None,
|
|
|
|
|
|
|
|
# prob: float = 1,
|
|
|
|
|
|
|
|
# ):
|
|
|
|
|
|
|
|
# super().__init__(t_center=t_center, t_width=t_width, name=name, prob=prob)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# def _transform(self, signal, tmin_s: float, tmax_s: float):
|
|
|
|
|
|
|
|
# signal = signal.mask_timesteps(tmin_s=tmin_s, tmax_s=tmax_s, val=0.0)
|
|
|
|
|
|
|
|
# mag, phase = signal.magnitude, signal.phase
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# mag_r, phase_r = torch.randn_like(mag), torch.randn_like(phase)
|
|
|
|
|
|
|
|
# mask = (mag == 0.0) * (phase == 0.0)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# mag[mask] = mag_r[mask]
|
|
|
|
|
|
|
|
# phase[mask] = phase_r[mask]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# signal.magnitude = mag
|
|
|
|
|
|
|
|
# signal.phase = phase
|
|
|
|
|
|
|
|
# return signal
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class FrequencyNoise(FrequencyMask):
|
|
|
|
class FrequencyNoise(FrequencyMask):
|
|
|
|
"""Similar to :py:func:`audiotools.data.transforms.FrequencyMask`, but
|
|
|
|
"""Similar to :py:func:`audiotools.data.transforms.FrequencyMask`, but
|
|
|
|
replaces with noise instead of zeros.
|
|
|
|
replaces with noise instead of zeros.
|
|
|
@ -1530,59 +1154,3 @@ class FrequencyNoise(FrequencyMask):
|
|
|
|
signal.magnitude = mag
|
|
|
|
signal.magnitude = mag
|
|
|
|
signal.phase = phase
|
|
|
|
signal.phase = phase
|
|
|
|
return signal
|
|
|
|
return signal
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# class SpectralDenoising(Equalizer):
|
|
|
|
|
|
|
|
# """Applies denoising algorithm detailed in
|
|
|
|
|
|
|
|
# :py:func:`audiotools.ml.layers.spectral_gate.SpectralGate`,
|
|
|
|
|
|
|
|
# using a randomly generated noise signal for denoising.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# Parameters
|
|
|
|
|
|
|
|
# ----------
|
|
|
|
|
|
|
|
# eq_amount : tuple, optional
|
|
|
|
|
|
|
|
# Amount of eq to apply to noise signal, by default ("const", 1.0)
|
|
|
|
|
|
|
|
# denoise_amount : tuple, optional
|
|
|
|
|
|
|
|
# Amount to denoise by, by default ("uniform", 0.8, 1.0)
|
|
|
|
|
|
|
|
# nz_volume : float, optional
|
|
|
|
|
|
|
|
# Volume of noise to denoise with, by default -40
|
|
|
|
|
|
|
|
# n_bands : int, optional
|
|
|
|
|
|
|
|
# Number of bands in equalizer, by default 6
|
|
|
|
|
|
|
|
# n_freq : int, optional
|
|
|
|
|
|
|
|
# Number of frequency bins to smooth by, by default 3
|
|
|
|
|
|
|
|
# n_time : int, optional
|
|
|
|
|
|
|
|
# Number of time bins to smooth by, by default 5
|
|
|
|
|
|
|
|
# name : str, optional
|
|
|
|
|
|
|
|
# Name of this transform, used to identify it in the dictionary
|
|
|
|
|
|
|
|
# produced by ``self.instantiate``, by default None
|
|
|
|
|
|
|
|
# prob : float, optional
|
|
|
|
|
|
|
|
# Probability of applying this transform, by default 1.0
|
|
|
|
|
|
|
|
# """
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# def __init__(
|
|
|
|
|
|
|
|
# self,
|
|
|
|
|
|
|
|
# eq_amount: tuple = ("const", 1.0),
|
|
|
|
|
|
|
|
# denoise_amount: tuple = ("uniform", 0.8, 1.0),
|
|
|
|
|
|
|
|
# nz_volume: float = -40,
|
|
|
|
|
|
|
|
# n_bands: int = 6,
|
|
|
|
|
|
|
|
# n_freq: int = 3,
|
|
|
|
|
|
|
|
# n_time: int = 5,
|
|
|
|
|
|
|
|
# name: str = None,
|
|
|
|
|
|
|
|
# prob: float = 1,
|
|
|
|
|
|
|
|
# ):
|
|
|
|
|
|
|
|
# super().__init__(eq_amount=eq_amount, n_bands=n_bands, name=name, prob=prob)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# self.nz_volume = nz_volume
|
|
|
|
|
|
|
|
# self.denoise_amount = denoise_amount
|
|
|
|
|
|
|
|
# self.spectral_gate = ml.layers.SpectralGate(n_freq, n_time)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# def _transform(self, signal, nz, eq, denoise_amount):
|
|
|
|
|
|
|
|
# nz = nz.normalize(self.nz_volume).equalizer(eq)
|
|
|
|
|
|
|
|
# self.spectral_gate = self.spectral_gate.to(signal.device)
|
|
|
|
|
|
|
|
# signal = self.spectral_gate(signal, nz, denoise_amount)
|
|
|
|
|
|
|
|
# return signal
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# def _instantiate(self, state: RandomState):
|
|
|
|
|
|
|
|
# kwargs = super()._instantiate(state)
|
|
|
|
|
|
|
|
# kwargs["denoise_amount"] = util.sample_from_dist(self.denoise_amount, state)
|
|
|
|
|
|
|
|
# kwargs["nz"] = AudioSignal(state.randn(22050), 44100)
|
|
|
|
|
|
|
|
# return kwargs
|
|
|
|
|
|
|
|