rm some useless comment

9 months ago · 3599089040
parent 1bfc9bc2b2
commit 3599089040
7 changed files with 1 additions and 774 deletions
--- a/audio/audiotools/core/_julius.py
+++ b/audio/audiotools/core/_julius.py
@ -245,177 +245,6 @@ def pure_tone(freq: float, sr: float=128, dur: float=4, device=None):
    return paddle.cos(2 * math.pi * freq * time)
 # def _new_rfft(x: paddle.Tensor):
 #     z = paddle.fft.rfft(x, axis=-1)
 #     z_real = paddle.real(z)
 #     z_imag = paddle.imag(z)
 #     z_view_as_real = paddle.stack([z_real, z_imag], axis=-1)
 #     return z_view_as_real
 # def _new_irfft(x: paddle.Tensor, length: int):
 #     x_real = x[..., 0]
 #     x_imag = x[..., 1]
 #     x_view_as_complex = paddle.complex(x_real, x_imag)
 #     return paddle.fft.irfft(x_view_as_complex, n=length, axis=-1)
 # def _compl_mul_conjugate(a: paddle.Tensor, b: paddle.Tensor):
 #     """
 #     Given a and b two tensors of dimension 4
 #     with the last dimension being the real and imaginary part,
 #     returns a multiplied by the conjugate of b, the multiplication
 #     being with respect to the second dimension.
 #     PaddlePaddle does not have direct support for complex number operations
 #     using einsum in the same manner as PyTorch, but we can manually compute
 #     the equivalent result.
 #     """
 #     # Extract the real and imaginary parts of a and b
 #     real_a = a[..., 0]
 #     imag_a = a[..., 1]
 #     real_b = b[..., 0]
 #     imag_b = b[..., 1]
 #     # Compute the multiplication with respect to the second dimension manually
 #     real_part = paddle.einsum("bcft,dct->bdft", real_a, real_b) + paddle.einsum(
 #         "bcft,dct->bdft", imag_a, imag_b)
 #     imag_part = paddle.einsum("bcft,dct->bdft", imag_a, real_b) - paddle.einsum(
 #         "bcft,dct->bdft", real_a, imag_b)
 #     # Stack the real and imaginary parts together
 #     result = paddle.stack([real_part, imag_part], axis=-1)
 #     return result
 # def fft_conv1d(
 #         _input: paddle.Tensor,
 #         weight: paddle.Tensor,
 #         bias: Optional[paddle.Tensor]=None,
 #         stride: int=1,
 #         padding: int=0,
 #         block_ratio: float=5, ):
 #     """
 #     Same as `paddle.nn.functional.conv1d` but using FFT for the convolution.
 #     Please check PaddlePaddle documentation for more information.
 #     Args:
 #         _input (Tensor): _input signal of shape `[B, C, T]`.
 #         weight (Tensor): weight of the convolution `[D, C, K]` with `D` the number
 #             of output channels.
 #         bias (Tensor or None): if not None, bias term for the convolution.
 #         stride (int): stride of convolution.
 #         padding (int): padding to apply to the _input.
 #         block_ratio (float): can be tuned for speed. The _input is splitted in chunks
 #             with a size of `int(block_ratio * kernel_size)`.
 #     Shape:
 #         - Inputs: `_input` is `[B, C, T]`, `weight` is `[D, C, K]` and bias is `[D]`.
 #         - Output: `(*, T)`
 #     ..note::
 #         This function is faster than `paddle.nn.functional.conv1d` only in specific cases.
 #         Typically, the kernel size should be of the order of 256 to see any real gain,
 #         for a stride of 1.
 #     ..Warning::
 #         Dilation and groups are not supported at the moment. This function might use
 #         more memory than the default Conv1d implementation.
 #     """
 #     _input = F.pad(_input, (padding, padding), data_format="NCL")
 #     batch, channels, length = _input.shape
 #     out_channels, _, kernel_size = weight.shape
 #     if length < kernel_size:
 #         raise RuntimeError(
 #             f"Input should be at least as large as the kernel size {kernel_size}, "
 #             f"but it is only {length} samples long.")
 #     if block_ratio < 1:
 #         raise RuntimeError("Block ratio must be greater than 1.")
 #     block_size: int = min(int(kernel_size * block_ratio), length)
 #     fold_stride = block_size - kernel_size + 1
 #     weight = pad_to(weight, block_size)
 #     weight_z = _new_rfft(weight)
 #     # We pad the _input and get the different frames, on which
 #     frames = unfold(_input, block_size, fold_stride)
 #     frames_z = _new_rfft(frames)
 #     out_z = _compl_mul_conjugate(frames_z, weight_z)
 #     out = _new_irfft(out_z, block_size)
 #     # The last bit is invalid, because FFT will do a circular convolution.
 #     out = out[..., :-kernel_size + 1]
 #     out = out.reshape([batch, out_channels, -1])
 #     out = out[..., ::stride]
 #     target_length = (length - kernel_size) // stride + 1
 #     out = out[..., :target_length]
 #     if bias is not None:
 #         out += bias[:, None]
 #     return out
 # class FFTConv1D(paddle.nn.Layer):
 #     """
 #     Same as `paddle.nn.Conv1D` but based on a custom FFT-based convolution.
 #     Please check PaddlePaddle documentation for more information on `paddle.nn.Conv1D`.
 #     Args:
 #         in_channels (int): number of _input channels.
 #         out_channels (int): number of output channels.
 #         kernel_size (int): kernel size of convolution.
 #         stride (int): stride of convolution.
 #         padding (int): padding to apply to the _input.
 #         bias (bool): if True, use a bias term.
 #     ..note::
 #         This module is faster than `paddle.nn.Conv1D` only in specific cases.
 #         Typically, `kernel_size` should be of the order of 256 to see any real gain,
 #         for a stride of 1.
 #     ..warning::
 #         Dilation and groups are not supported at the moment. This module might use
 #         more memory than the default Conv1D implementation.
 #     >>> fftconv = FFTConv1D(12, 24, 128, 4)
 #     >>> x = paddle.randn([4, 12, 1024])
 #     >>> print(list(fftconv(x).shape))
 #     [4, 24, 225]
 #     """
 #     def __init__(
 #             self,
 #             in_channels: int,
 #             out_channels: int,
 #             kernel_size: int,
 #             stride: int=1,
 #             padding: int=0,
 #             bias: bool=True, ):
 #         super(FFTConv1D, self).__init__()
 #         self.in_channels = in_channels
 #         self.out_channels = out_channels
 #         self.kernel_size = kernel_size
 #         self.stride = stride
 #         self.padding = padding
 #         # Create a Conv1D layer to initialize weights and bias
 #         conv = paddle.nn.Conv1D(
 #             in_channels,
 #             out_channels,
 #             kernel_size,
 #             stride=stride,
 #             padding=padding,
 #             bias_attr=bias)
 #         self.weight = conv.weight
 #         if bias:
 #             self.bias = conv.bias
 #         else:
 #             self.bias = None
 #     def forward(self, _input: paddle.Tensor):
 #         return fft_conv1d(_input, self.weight, self.bias, self.stride,
 #                           self.padding)
 class LowPassFilters(nn.Layer):
    """
    Bank of low pass filters.
--- a/audio/audiotools/core/audio_signal.py
+++ b/audio/audiotools/core/audio_signal.py
@ -94,13 +94,10 @@ STFTParams.__new__.__defaults__ = (None, None, None, None, None)
 class AudioSignal(
        EffectMixin,
        LoudnessMixin,
        # PlayMixin,
        ImpulseResponseMixin,
        DSPMixin,
        DisplayMixin,
-        FFMPEGMixin,
+        FFMPEGMixin, ):
        # WhisperMixin,
 ):
    """This is the core object of this library. Audio is always
    loaded into an AudioSignal, which then enables all the features
    of this library, including audio augmentations, I/O, playback,
--- a/audio/audiotools/core/effects.py
+++ b/audio/audiotools/core/effects.py
@ -6,8 +6,6 @@ import paddle
 from . import util
 from ._julius import SplitBands
 # from . import _julius
 class EffectMixin:
    GAIN_FACTOR = np.log(10) / 20
@ -253,152 +251,6 @@ class EffectMixin:
        self.audio_data = self.audio_data * gain[:, None, None]
        return self
    # def _to_2d(self):
    #     waveform = self.audio_data.reshape(-1, self.signal_length)
    #     return waveform
    # def _to_3d(self, waveform):
    #     return waveform.reshape(self.batch_size, self.num_channels, -1)
    # def pitch_shift(self, n_semitones: int, quick: bool = True):
    #     """Pitch shift the signal. All items in the batch
    #     get the same pitch shift.
    #     Parameters
    #     ----------
    #     n_semitones : int
    #         How many semitones to shift the signal by.
    #     quick : bool, optional
    #         Using quick pitch shifting, by default True
    #     Returns
    #     -------
    #     AudioSignal
    #         Pitch shifted audio signal.
    #     """
    #     device = self.device
    #     effects = [
    #         ["pitch", str(n_semitones * 100)],
    #         ["rate", str(self.sample_rate)],
    #     ]
    #     if quick:
    #         effects[0].insert(1, "-q")
    #     waveform = self._to_2d().cpu()
    #     waveform, sample_rate = torchaudio.sox_effects.apply_effects_tensor(
    #         waveform, self.sample_rate, effects, channels_first=True
    #     )
    #     self.sample_rate = sample_rate
    #     self.audio_data = self._to_3d(waveform)
    #     return self.to(device)
    # def time_stretch(self, factor: float, quick: bool = True):
    #     """Time stretch the audio signal.
    #     Parameters
    #     ----------
    #     factor : float
    #         Factor by which to stretch the AudioSignal. Typically
    #         between 0.8 and 1.2.
    #     quick : bool, optional
    #         Whether to use quick time stretching, by default True
    #     Returns
    #     -------
    #     AudioSignal
    #         Time-stretched AudioSignal.
    #     """
    #     device = self.device
    #     effects = [
    #         ["tempo", str(factor)],
    #         ["rate", str(self.sample_rate)],
    #     ]
    #     if quick:
    #         effects[0].insert(1, "-q")
    #     waveform = self._to_2d().cpu()
    #     waveform, sample_rate = torchaudio.sox_effects.apply_effects_tensor(
    #         waveform, self.sample_rate, effects, channels_first=True
    #     )
    #     self.sample_rate = sample_rate
    #     self.audio_data = self._to_3d(waveform)
    #     return self.to(device)
    # def apply_codec(
    #     self,
    #     preset: str = None,
    #     format: str = "wav",
    #     encoding: str = None,
    #     bits_per_sample: int = None,
    #     compression: int = None,
    # ):  # pragma: no cover
    #     """Applies an audio codec to the signal.
    #     Parameters
    #     ----------
    #     preset : str, optional
    #         One of the keys in ``self.CODEC_PRESETS``, by default None
    #     format : str, optional
    #         Format for audio codec, by default "wav"
    #     encoding : str, optional
    #         Encoding to use, by default None
    #     bits_per_sample : int, optional
    #         How many bits per sample, by default None
    #     compression : int, optional
    #         Compression amount of codec, by default None
    #     Returns
    #     -------
    #     AudioSignal
    #         AudioSignal with codec applied.
    #     Raises
    #     ------
    #     ValueError
    #         If preset is not in ``self.CODEC_PRESETS``, an error
    #         is thrown.
    #     """
    #     torchaudio_version_070 = "0.7" in torchaudio.__version__
    #     if torchaudio_version_070:
    #         return self
    #     kwargs = {
    #         "format": format,
    #         "encoding": encoding,
    #         "bits_per_sample": bits_per_sample,
    #         "compression": compression,
    #     }
    #     if preset is not None:
    #         if preset in self.CODEC_PRESETS:
    #             kwargs = self.CODEC_PRESETS[preset]
    #         else:
    #             raise ValueError(
    #                 f"Unknown preset: {preset}. "
    #                 f"Known presets: {list(self.CODEC_PRESETS.keys())}"
    #             )
    #     waveform = self._to_2d()
    #     if kwargs["format"] in ["vorbis", "mp3", "ogg", "amr-nb"]:
    #         # Apply it in a for loop
    #         augmented = torch.cat(
    #             [
    #                 torchaudio.functional.apply_codec(
    #                     waveform[i][None, :], self.sample_rate, **kwargs
    #                 )
    #                 for i in range(waveform.shape[0])
    #             ],
    #             dim=0,
    #         )
    #     else:
    #         augmented = torchaudio.functional.apply_codec(
    #             waveform, self.sample_rate, **kwargs
    #         )
    #     augmented = self._to_3d(augmented)
    #     self.audio_data = augmented
    #     return self
    def mel_filterbank(self, n_bands: int):
        """Breaks signal into mel bands.
--- a/audio/audiotools/data/datasets.py
+++ b/audio/audiotools/data/datasets.py
@ -478,21 +478,6 @@ class ConcatDataset(AudioDataset):
        return dataset[idx // len(self.datasets)]
 # class ResumableDistributedSampler(DistributedSampler):  # pragma: no cover
 #     """Distributed sampler that can be resumed from a given start index."""
 #     def __init__(self, dataset, start_idx: int = None, **kwargs):
 #         super().__init__(dataset, **kwargs)
 #         # Start index, allows to resume an experiment at the index it was
 #         self.start_idx = start_idx // self.num_replicas if start_idx is not None else 0
 #     def __iter__(self):
 #         for i, idx in enumerate(super().__iter__()):
 #             if i >= self.start_idx:
 #                 yield idx
 #         self.start_idx = 0  # set the index back to 0 so for the next epoch
 class ResumableDistributedSampler(DistributedBatchSampler):  # pragma: no cover
    """Distributed sampler that can be resumed from a given start index."""
--- a/audio/audiotools/data/transforms.py
+++ b/audio/audiotools/data/transforms.py
@ -608,108 +608,6 @@ class Equalizer(BaseTransform):
        return signal.equalizer(eq)
 # class Quantization(BaseTransform):
 #     """Applies quantization to the input waveform. Corresponds
 #     to :py:func:`audiotools.core.effects.EffectMixin.quantization`.
 #     Parameters
 #     ----------
 #     channels : tuple, optional
 #         Number of evenly spaced quantization channels to quantize
 #         to, by default ("choice", [8, 32, 128, 256, 1024])
 #     name : str, optional
 #         Name of this transform, used to identify it in the dictionary
 #         produced by ``self.instantiate``, by default None
 #     prob : float, optional
 #         Probability of applying this transform, by default 1.0
 #     """
 #     def __init__(
 #         self,
 #         channels: tuple = ("choice", [8, 32, 128, 256, 1024]),
 #         name: str = None,
 #         prob: float = 1.0,
 #     ):
 #         super().__init__(name=name, prob=prob)
 #         self.channels = channels
 #     def _instantiate(self, state: RandomState):
 #         return {"channels": util.sample_from_dist(self.channels, state)}
 #     def _transform(self, signal, channels):
 #         return signal.quantization(channels)
 # class MuLawQuantization(BaseTransform):
 #     """Applies mu-law quantization to the input waveform. Corresponds
 #     to :py:func:`audiotools.core.effects.EffectMixin.mulaw_quantization`.
 #     Parameters
 #     ----------
 #     channels : tuple, optional
 #         Number of mu-law spaced quantization channels to quantize
 #         to, by default ("choice", [8, 32, 128, 256, 1024])
 #     name : str, optional
 #         Name of this transform, used to identify it in the dictionary
 #         produced by ``self.instantiate``, by default None
 #     prob : float, optional
 #         Probability of applying this transform, by default 1.0
 #     """
 #     def __init__(
 #         self,
 #         channels: tuple = ("choice", [8, 32, 128, 256, 1024]),
 #         name: str = None,
 #         prob: float = 1.0,
 #     ):
 #         super().__init__(name=name, prob=prob)
 #         self.channels = channels
 #     def _instantiate(self, state: RandomState):
 #         return {"channels": util.sample_from_dist(self.channels, state)}
 #     def _transform(self, signal, channels):
 #         return signal.mulaw_quantization(channels)
 # class NoiseFloor(BaseTransform):
 #     """Adds a noise floor of Gaussian noise to the signal at a specified
 #     dB.
 #     Parameters
 #     ----------
 #     db : tuple, optional
 #         Level of noise to add to signal, by default ("const", -50.0)
 #     name : str, optional
 #         Name of this transform, used to identify it in the dictionary
 #         produced by ``self.instantiate``, by default None
 #     prob : float, optional
 #         Probability of applying this transform, by default 1.0
 #     """
 #     def __init__(
 #         self,
 #         db: tuple = ("const", -50.0),
 #         name: str = None,
 #         prob: float = 1.0,
 #     ):
 #         super().__init__(name=name, prob=prob)
 #         self.db = db
 #     def _instantiate(self, state: RandomState, signal: AudioSignal):
 #         db = util.sample_from_dist(self.db, state)
 #         audio_data = state.randn(signal.num_channels, signal.signal_length)
 #         nz_signal = AudioSignal(audio_data, signal.sample_rate)
 #         nz_signal.normalize(db)
 #         return {"nz_signal": nz_signal}
 #     def _transform(self, signal, nz_signal):
 #         # Clone bg_signal so that transform can be repeatedly applied
 #         # to different signals with the same effect.
 #         return signal + nz_signal
 class BackgroundNoise(BaseTransform):
    """Adds background noise from audio specified by a set of CSV files.
    A valid CSV file looks like, and is typically generated by
@ -796,68 +694,6 @@ class BackgroundNoise(BaseTransform):
        return signal.mix(bg_signal.clone(), snr, eq)
 # class CrossTalk(BaseTransform):
 #     """Adds crosstalk between speakers, whose audio is drawn from a CSV file
 #     that was produced via :py:func:`audiotools.data.preprocess.create_csv`.
 #     This transform calls :py:func:`audiotools.core.effects.EffectMixin.mix`
 #     under the hood.
 #     Parameters
 #     ----------
 #     snr : tuple, optional
 #         How loud cross-talk speaker is relative to original signal in dB,
 #         by default ("uniform", 0.0, 10.0)
 #     sources : List[str], optional
 #         Sources containing folders, or CSVs with paths to audio files,
 #         by default None
 #     weights : List[float], optional
 #         Weights to sample audio files from each source, by default None
 #     name : str, optional
 #         Name of this transform, used to identify it in the dictionary
 #         produced by ``self.instantiate``, by default None
 #     prob : float, optional
 #         Probability of applying this transform, by default 1.0
 #     loudness_cutoff : float, optional
 #         Loudness cutoff when loading from audio files, by default -40
 #     """
 #     def __init__(
 #         self,
 #         snr: tuple = ("uniform", 0.0, 10.0),
 #         sources: List[str] = None,
 #         weights: List[float] = None,
 #         name: str = None,
 #         prob: float = 1.0,
 #         loudness_cutoff: float = -40,
 #     ):
 #         super().__init__(name=name, prob=prob)
 #         self.snr = snr
 #         self.loader = AudioLoader(sources, weights)
 #         self.loudness_cutoff = loudness_cutoff
 #     def _instantiate(self, state: RandomState, signal: AudioSignal):
 #         snr = util.sample_from_dist(self.snr, state)
 #         crosstalk_signal = self.loader(
 #             state,
 #             signal.sample_rate,
 #             duration=signal.signal_duration,
 #             loudness_cutoff=self.loudness_cutoff,
 #             num_channels=signal.num_channels,
 #         )["signal"]
 #         return {"crosstalk_signal": crosstalk_signal, "snr": snr}
 #     def _transform(self, signal, crosstalk_signal, snr):
 #         # Clone bg_signal so that transform can be repeatedly applied
 #         # to different signals with the same effect.
 #         loudness = signal.loudness()
 #         mix = signal.mix(crosstalk_signal.clone(), snr)
 #         mix.normalize(loudness)
 #         return mix
 class RoomImpulseResponse(BaseTransform):
    """Convolves signal with a room impulse response, at a specified
    direct-to-reverberant ratio, with equalization applied. Room impulse
@ -942,38 +778,6 @@ class RoomImpulseResponse(BaseTransform):
            use_original_phase=self.use_original_phase)
 # class VolumeChange(BaseTransform):
 #     """Changes the volume of the input signal.
 #     Uses :py:func:`audiotools.core.effects.EffectMixin.volume_change`.
 #     Parameters
 #     ----------
 #     db : tuple, optional
 #         Change in volume in decibels, by default ("uniform", -12.0, 0.0)
 #     name : str, optional
 #         Name of this transform, used to identify it in the dictionary
 #         produced by ``self.instantiate``, by default None
 #     prob : float, optional
 #         Probability of applying this transform, by default 1.0
 #     """
 #     def __init__(
 #         self,
 #         db: tuple = ("uniform", -12.0, 0.0),
 #         name: str = None,
 #         prob: float = 1.0,
 #     ):
 #         super().__init__(name=name, prob=prob)
 #         self.db = db
 #     def _instantiate(self, state: RandomState):
 #         return {"db": util.sample_from_dist(self.db, state)}
 #     def _transform(self, signal, db):
 #         return signal.volume_change(db)
 class VolumeNorm(BaseTransform):
    """Normalizes the volume of the excerpt to a specified decibel.
@ -1169,111 +973,6 @@ class HighPass(BaseTransform):
        return signal.high_pass(cutoff, zeros=self.zeros)
 # class RescaleAudio(BaseTransform):
 #     """Rescales the audio so it is in between ``-val`` and ``val``
 #     only if the original audio exceeds those bounds. Useful if
 #     transforms have caused the audio to clip.
 #     Uses :py:func:`audiotools.core.effects.EffectMixin.ensure_max_of_audio`.
 #     Parameters
 #     ----------
 #     val : float, optional
 #         Max absolute value of signal, by default 1.0
 #     name : str, optional
 #         Name of this transform, used to identify it in the dictionary
 #         produced by ``self.instantiate``, by default None
 #     prob : float, optional
 #         Probability of applying this transform, by default 1.0
 #     """
 #     def __init__(self, val: float = 1.0, name: str = None, prob: float = 1):
 #         super().__init__(name=name, prob=prob)
 #         self.val = val
 #     def _transform(self, signal):
 #         return signal.ensure_max_of_audio(self.val)
 # class ShiftPhase(SpectralTransform):
 #     """Shifts the phase of the audio.
 #     Uses :py:func:`audiotools.core.dsp.DSPMixin.shift)phase`.
 #     Parameters
 #     ----------
 #     shift : tuple, optional
 #         How much to shift phase by, by default ("uniform", -np.pi, np.pi)
 #     name : str, optional
 #         Name of this transform, used to identify it in the dictionary
 #         produced by ``self.instantiate``, by default None
 #     prob : float, optional
 #         Probability of applying this transform, by default 1.0
 #     """
 #     def __init__(
 #         self,
 #         shift: tuple = ("uniform", -np.pi, np.pi),
 #         name: str = None,
 #         prob: float = 1,
 #     ):
 #         super().__init__(name=name, prob=prob)
 #         self.shift = shift
 #     def _instantiate(self, state: RandomState):
 #         return {"shift": util.sample_from_dist(self.shift, state)}
 #     def _transform(self, signal, shift):
 #         return signal.shift_phase(shift)
 # class InvertPhase(ShiftPhase):
 #     """Inverts the phase of the audio.
 #     Uses :py:func:`audiotools.core.dsp.DSPMixin.shift_phase`.
 #     Parameters
 #     ----------
 #     name : str, optional
 #         Name of this transform, used to identify it in the dictionary
 #         produced by ``self.instantiate``, by default None
 #     prob : float, optional
 #         Probability of applying this transform, by default 1.0
 #     """
 #     def __init__(self, name: str = None, prob: float = 1):
 #         super().__init__(shift=("const", np.pi), name=name, prob=prob)
 # class CorruptPhase(SpectralTransform):
 #     """Corrupts the phase of the audio.
 #     Uses :py:func:`audiotools.core.dsp.DSPMixin.corrupt_phase`.
 #     Parameters
 #     ----------
 #     scale : tuple, optional
 #         How much to corrupt phase by, by default ("uniform", 0, np.pi)
 #     name : str, optional
 #         Name of this transform, used to identify it in the dictionary
 #         produced by ``self.instantiate``, by default None
 #     prob : float, optional
 #         Probability of applying this transform, by default 1.0
 #     """
 #     def __init__(
 #         self, scale: tuple = ("uniform", 0, np.pi), name: str = None, prob: float = 1
 #     ):
 #         super().__init__(name=name, prob=prob)
 #         self.scale = scale
 #     def _instantiate(self, state: RandomState, signal: AudioSignal = None):
 #         scale = util.sample_from_dist(self.scale, state)
 #         corruption = state.normal(scale=scale, size=signal.phase.shape[1:])
 #         return {"corruption": corruption.astype("float32")}
 #     def _transform(self, signal, corruption):
 #         return signal.shift_phase(shift=corruption)
 class FrequencyMask(SpectralTransform):
    """Masks a band of frequencies at a center frequency
    from the audio.
@ -1363,39 +1062,6 @@ class TimeMask(SpectralTransform):
        return signal.mask_timesteps(tmin_s=tmin_s, tmax_s=tmax_s)
 # class MaskLowMagnitudes(SpectralTransform):
 #     """Masks low magnitude regions out of signal.
 #     Uses :py:func:`audiotools.core.dsp.DSPMixin.mask_low_magnitudes`.
 #     Parameters
 #     ----------
 #     db_cutoff : tuple, optional
 #         Decibel value for which things below it will be masked away,
 #         by default ("uniform", -10, 10)
 #     name : str, optional
 #         Name of this transform, used to identify it in the dictionary
 #         produced by ``self.instantiate``, by default None
 #     prob : float, optional
 #         Probability of applying this transform, by default 1.0
 #     """
 #     def __init__(
 #         self,
 #         db_cutoff: tuple = ("uniform", -10, 10),
 #         name: str = None,
 #         prob: float = 1,
 #     ):
 #         super().__init__(name=name, prob=prob)
 #         self.db_cutoff = db_cutoff
 #     def _instantiate(self, state: RandomState, signal: AudioSignal = None):
 #         return {"db_cutoff": util.sample_from_dist(self.db_cutoff, state)}
 #     def _transform(self, signal, db_cutoff: float):
 #         return signal.mask_low_magnitudes(db_cutoff)
 class Smoothing(BaseTransform):
    """Convolves the signal with a smoothing window.
@ -1445,48 +1111,6 @@ class Smoothing(BaseTransform):
        return out
 # class TimeNoise(TimeMask):
 #     """Similar to :py:func:`audiotools.data.transforms.TimeMask`, but
 #     replaces with noise instead of zeros.
 #     Parameters
 #     ----------
 #     t_center : tuple, optional
 #         Center time in terms of 0.0 and 1.0 (duration of signal),
 #         by default ("uniform", 0.0, 1.0)
 #     t_width : tuple, optional
 #         Width of dropped out portion, by default ("const", 0.025)
 #     name : str, optional
 #         Name of this transform, used to identify it in the dictionary
 #         produced by ``self.instantiate``, by default None
 #     prob : float, optional
 #         Probability of applying this transform, by default 1.0
 #     """
 #     def __init__(
 #         self,
 #         t_center: tuple = ("uniform", 0.0, 1.0),
 #         t_width: tuple = ("const", 0.025),
 #         name: str = None,
 #         prob: float = 1,
 #     ):
 #         super().__init__(t_center=t_center, t_width=t_width, name=name, prob=prob)
 #     def _transform(self, signal, tmin_s: float, tmax_s: float):
 #         signal = signal.mask_timesteps(tmin_s=tmin_s, tmax_s=tmax_s, val=0.0)
 #         mag, phase = signal.magnitude, signal.phase
 #         mag_r, phase_r = torch.randn_like(mag), torch.randn_like(phase)
 #         mask = (mag == 0.0) * (phase == 0.0)
 #         mag[mask] = mag_r[mask]
 #         phase[mask] = phase_r[mask]
 #         signal.magnitude = mag
 #         signal.phase = phase
 #         return signal
 class FrequencyNoise(FrequencyMask):
    """Similar to :py:func:`audiotools.data.transforms.FrequencyMask`, but
    replaces with noise instead of zeros.
@ -1530,59 +1154,3 @@ class FrequencyNoise(FrequencyMask):
        signal.magnitude = mag
        signal.phase = phase
        return signal
 # class SpectralDenoising(Equalizer):
 #     """Applies denoising algorithm detailed in
 #     :py:func:`audiotools.ml.layers.spectral_gate.SpectralGate`,
 #     using a randomly generated noise signal for denoising.
 #     Parameters
 #     ----------
 #     eq_amount : tuple, optional
 #         Amount of eq to apply to noise signal, by default ("const", 1.0)
 #     denoise_amount : tuple, optional
 #         Amount to denoise by, by default ("uniform", 0.8, 1.0)
 #     nz_volume : float, optional
 #         Volume of noise to denoise with, by default -40
 #     n_bands : int, optional
 #         Number of bands in equalizer, by default 6
 #     n_freq : int, optional
 #         Number of frequency bins to smooth by, by default 3
 #     n_time : int, optional
 #         Number of time bins to smooth by, by default 5
 #     name : str, optional
 #         Name of this transform, used to identify it in the dictionary
 #         produced by ``self.instantiate``, by default None
 #     prob : float, optional
 #         Probability of applying this transform, by default 1.0
 #     """
 #     def __init__(
 #         self,
 #         eq_amount: tuple = ("const", 1.0),
 #         denoise_amount: tuple = ("uniform", 0.8, 1.0),
 #         nz_volume: float = -40,
 #         n_bands: int = 6,
 #         n_freq: int = 3,
 #         n_time: int = 5,
 #         name: str = None,
 #         prob: float = 1,
 #     ):
 #         super().__init__(eq_amount=eq_amount, n_bands=n_bands, name=name, prob=prob)
 #         self.nz_volume = nz_volume
 #         self.denoise_amount = denoise_amount
 #         self.spectral_gate = ml.layers.SpectralGate(n_freq, n_time)
 #     def _transform(self, signal, nz, eq, denoise_amount):
 #         nz = nz.normalize(self.nz_volume).equalizer(eq)
 #         self.spectral_gate = self.spectral_gate.to(signal.device)
 #         signal = self.spectral_gate(signal, nz, denoise_amount)
 #         return signal
 #     def _instantiate(self, state: RandomState):
 #         kwargs = super()._instantiate(state)
 #         kwargs["denoise_amount"] = util.sample_from_dist(self.denoise_amount, state)
 #         kwargs["nz"] = AudioSignal(state.randn(22050), 44100)
 #         return kwargs
--- a/audio/audiotools/metrics/init.py
+++ b/audio/audiotools/metrics/init.py
@ -1,6 +1,4 @@
 """
 Functions for comparing AudioSignal objects to one another.
 """
 # from . import distance
 from . import quality
 # from . import spectral
--- a/audio/audiotools/ml/init.py
+++ b/audio/audiotools/ml/init.py
@ -1,5 +1,3 @@
 from . import decorators
 from .accelerator import Accelerator
 from .basemodel import BaseModel
 # from . import layers
 # from .experiment import Experiment