rm some useless comment

9 months ago · 3599089040
parent 1bfc9bc2b2
commit 3599089040
7 changed files with 1 additions and 774 deletions
--- a/audio/audiotools/core/_julius.py
+++ b/audio/audiotools/core/_julius.py
@ -245,177 +245,6 @@ def pure_tone(freq: float, sr: float=128, dur: float=4, device=None):
    return paddle.cos(2 * math.pi * freq * time)


-# def _new_rfft(x: paddle.Tensor):
-#     z = paddle.fft.rfft(x, axis=-1)
-
-#     z_real = paddle.real(z)
-#     z_imag = paddle.imag(z)
-
-#     z_view_as_real = paddle.stack([z_real, z_imag], axis=-1)
-#     return z_view_as_real
-
-# def _new_irfft(x: paddle.Tensor, length: int):
-#     x_real = x[..., 0]
-#     x_imag = x[..., 1]
-#     x_view_as_complex = paddle.complex(x_real, x_imag)
-#     return paddle.fft.irfft(x_view_as_complex, n=length, axis=-1)
-
-# def _compl_mul_conjugate(a: paddle.Tensor, b: paddle.Tensor):
-#     """
-#     Given a and b two tensors of dimension 4
-#     with the last dimension being the real and imaginary part,
-#     returns a multiplied by the conjugate of b, the multiplication
-#     being with respect to the second dimension.
-
-#     PaddlePaddle does not have direct support for complex number operations
-#     using einsum in the same manner as PyTorch, but we can manually compute
-#     the equivalent result.
-#     """
-#     # Extract the real and imaginary parts of a and b
-#     real_a = a[..., 0]
-#     imag_a = a[..., 1]
-#     real_b = b[..., 0]
-#     imag_b = b[..., 1]
-
-#     # Compute the multiplication with respect to the second dimension manually
-#     real_part = paddle.einsum("bcft,dct->bdft", real_a, real_b) + paddle.einsum(
-#         "bcft,dct->bdft", imag_a, imag_b)
-#     imag_part = paddle.einsum("bcft,dct->bdft", imag_a, real_b) - paddle.einsum(
-#         "bcft,dct->bdft", real_a, imag_b)
-
-#     # Stack the real and imaginary parts together
-#     result = paddle.stack([real_part, imag_part], axis=-1)
-#     return result
-
-# def fft_conv1d(
-#         _input: paddle.Tensor,
-#         weight: paddle.Tensor,
-#         bias: Optional[paddle.Tensor]=None,
-#         stride: int=1,
-#         padding: int=0,
-#         block_ratio: float=5, ):
-#     """
-#     Same as `paddle.nn.functional.conv1d` but using FFT for the convolution.
-#     Please check PaddlePaddle documentation for more information.
-
-#     Args:
-#         _input (Tensor): _input signal of shape `[B, C, T]`.
-#         weight (Tensor): weight of the convolution `[D, C, K]` with `D` the number
-#             of output channels.
-#         bias (Tensor or None): if not None, bias term for the convolution.
-#         stride (int): stride of convolution.
-#         padding (int): padding to apply to the _input.
-#         block_ratio (float): can be tuned for speed. The _input is splitted in chunks
-#             with a size of `int(block_ratio * kernel_size)`.
-
-#     Shape:
-
-#         - Inputs: `_input` is `[B, C, T]`, `weight` is `[D, C, K]` and bias is `[D]`.
-#         - Output: `(*, T)`
-
-#     ..note::
-#         This function is faster than `paddle.nn.functional.conv1d` only in specific cases.
-#         Typically, the kernel size should be of the order of 256 to see any real gain,
-#         for a stride of 1.
-
-#     ..Warning::
-#         Dilation and groups are not supported at the moment. This function might use
-#         more memory than the default Conv1d implementation.
-#     """
-#     _input = F.pad(_input, (padding, padding), data_format="NCL")
-#     batch, channels, length = _input.shape
-#     out_channels, _, kernel_size = weight.shape
-
-#     if length < kernel_size:
-#         raise RuntimeError(
-#             f"Input should be at least as large as the kernel size {kernel_size}, "
-#             f"but it is only {length} samples long.")
-#     if block_ratio < 1:
-#         raise RuntimeError("Block ratio must be greater than 1.")
-
-#     block_size: int = min(int(kernel_size * block_ratio), length)
-#     fold_stride = block_size - kernel_size + 1
-#     weight = pad_to(weight, block_size)
-#     weight_z = _new_rfft(weight)
-
-#     # We pad the _input and get the different frames, on which
-#     frames = unfold(_input, block_size, fold_stride)
-
-#     frames_z = _new_rfft(frames)
-#     out_z = _compl_mul_conjugate(frames_z, weight_z)
-#     out = _new_irfft(out_z, block_size)
-#     # The last bit is invalid, because FFT will do a circular convolution.
-#     out = out[..., :-kernel_size + 1]
-#     out = out.reshape([batch, out_channels, -1])
-#     out = out[..., ::stride]
-#     target_length = (length - kernel_size) // stride + 1
-#     out = out[..., :target_length]
-#     if bias is not None:
-#         out += bias[:, None]
-#     return out
-
-# class FFTConv1D(paddle.nn.Layer):
-#     """
-#     Same as `paddle.nn.Conv1D` but based on a custom FFT-based convolution.
-#     Please check PaddlePaddle documentation for more information on `paddle.nn.Conv1D`.
-
-#     Args:
-#         in_channels (int): number of _input channels.
-#         out_channels (int): number of output channels.
-#         kernel_size (int): kernel size of convolution.
-#         stride (int): stride of convolution.
-#         padding (int): padding to apply to the _input.
-#         bias (bool): if True, use a bias term.
-
-#     ..note::
-#         This module is faster than `paddle.nn.Conv1D` only in specific cases.
-#         Typically, `kernel_size` should be of the order of 256 to see any real gain,
-#         for a stride of 1.
-
-#     ..warning::
-#         Dilation and groups are not supported at the moment. This module might use
-#         more memory than the default Conv1D implementation.
-
-#     >>> fftconv = FFTConv1D(12, 24, 128, 4)
-#     >>> x = paddle.randn([4, 12, 1024])
-#     >>> print(list(fftconv(x).shape))
-#     [4, 24, 225]
-#     """
-
-#     def __init__(
-#             self,
-#             in_channels: int,
-#             out_channels: int,
-#             kernel_size: int,
-#             stride: int=1,
-#             padding: int=0,
-#             bias: bool=True, ):
-#         super(FFTConv1D, self).__init__()
-#         self.in_channels = in_channels
-#         self.out_channels = out_channels
-#         self.kernel_size = kernel_size
-#         self.stride = stride
-#         self.padding = padding
-
-#         # Create a Conv1D layer to initialize weights and bias
-#         conv = paddle.nn.Conv1D(
-#             in_channels,
-#             out_channels,
-#             kernel_size,
-#             stride=stride,
-#             padding=padding,
-#             bias_attr=bias)
-#         self.weight = conv.weight
-#         if bias:
-#             self.bias = conv.bias
-#         else:
-#             self.bias = None
-
-#     def forward(self, _input: paddle.Tensor):
-#         return fft_conv1d(_input, self.weight, self.bias, self.stride,
-#                           self.padding)
-
-
 class LowPassFilters(nn.Layer):
    """
    Bank of low pass filters.
--- a/audio/audiotools/core/audio_signal.py
+++ b/audio/audiotools/core/audio_signal.py
@ -94,13 +94,10 @@ STFTParams.__new__.__defaults__ = (None, None, None, None, None)
 class AudioSignal(
        EffectMixin,
        LoudnessMixin,
-        # PlayMixin,
        ImpulseResponseMixin,
        DSPMixin,
        DisplayMixin,
-        FFMPEGMixin,
-        # WhisperMixin,
-):
+        FFMPEGMixin, ):
    """This is the core object of this library. Audio is always
    loaded into an AudioSignal, which then enables all the features
    of this library, including audio augmentations, I/O, playback,
--- a/audio/audiotools/core/effects.py
+++ b/audio/audiotools/core/effects.py
@ -6,8 +6,6 @@ import paddle
 from . import util
 from ._julius import SplitBands

-# from . import _julius
-

 class EffectMixin:
    GAIN_FACTOR = np.log(10) / 20
@ -253,152 +251,6 @@ class EffectMixin:
        self.audio_data = self.audio_data * gain[:, None, None]
        return self

-    # def _to_2d(self):
-    #     waveform = self.audio_data.reshape(-1, self.signal_length)
-    #     return waveform
-
-    # def _to_3d(self, waveform):
-    #     return waveform.reshape(self.batch_size, self.num_channels, -1)
-
-    # def pitch_shift(self, n_semitones: int, quick: bool = True):
-    #     """Pitch shift the signal. All items in the batch
-    #     get the same pitch shift.
-
-    #     Parameters
-    #     ----------
-    #     n_semitones : int
-    #         How many semitones to shift the signal by.
-    #     quick : bool, optional
-    #         Using quick pitch shifting, by default True
-
-    #     Returns
-    #     -------
-    #     AudioSignal
-    #         Pitch shifted audio signal.
-    #     """
-    #     device = self.device
-    #     effects = [
-    #         ["pitch", str(n_semitones * 100)],
-    #         ["rate", str(self.sample_rate)],
-    #     ]
-    #     if quick:
-    #         effects[0].insert(1, "-q")
-
-    #     waveform = self._to_2d().cpu()
-    #     waveform, sample_rate = torchaudio.sox_effects.apply_effects_tensor(
-    #         waveform, self.sample_rate, effects, channels_first=True
-    #     )
-    #     self.sample_rate = sample_rate
-    #     self.audio_data = self._to_3d(waveform)
-    #     return self.to(device)
-
-    # def time_stretch(self, factor: float, quick: bool = True):
-    #     """Time stretch the audio signal.
-
-    #     Parameters
-    #     ----------
-    #     factor : float
-    #         Factor by which to stretch the AudioSignal. Typically
-    #         between 0.8 and 1.2.
-    #     quick : bool, optional
-    #         Whether to use quick time stretching, by default True
-
-    #     Returns
-    #     -------
-    #     AudioSignal
-    #         Time-stretched AudioSignal.
-    #     """
-    #     device = self.device
-    #     effects = [
-    #         ["tempo", str(factor)],
-    #         ["rate", str(self.sample_rate)],
-    #     ]
-    #     if quick:
-    #         effects[0].insert(1, "-q")
-
-    #     waveform = self._to_2d().cpu()
-    #     waveform, sample_rate = torchaudio.sox_effects.apply_effects_tensor(
-    #         waveform, self.sample_rate, effects, channels_first=True
-    #     )
-    #     self.sample_rate = sample_rate
-    #     self.audio_data = self._to_3d(waveform)
-    #     return self.to(device)
-
-    # def apply_codec(
-    #     self,
-    #     preset: str = None,
-    #     format: str = "wav",
-    #     encoding: str = None,
-    #     bits_per_sample: int = None,
-    #     compression: int = None,
-    # ):  # pragma: no cover
-    #     """Applies an audio codec to the signal.
-
-    #     Parameters
-    #     ----------
-    #     preset : str, optional
-    #         One of the keys in ``self.CODEC_PRESETS``, by default None
-    #     format : str, optional
-    #         Format for audio codec, by default "wav"
-    #     encoding : str, optional
-    #         Encoding to use, by default None
-    #     bits_per_sample : int, optional
-    #         How many bits per sample, by default None
-    #     compression : int, optional
-    #         Compression amount of codec, by default None
-
-    #     Returns
-    #     -------
-    #     AudioSignal
-    #         AudioSignal with codec applied.
-
-    #     Raises
-    #     ------
-    #     ValueError
-    #         If preset is not in ``self.CODEC_PRESETS``, an error
-    #         is thrown.
-    #     """
-    #     torchaudio_version_070 = "0.7" in torchaudio.__version__
-    #     if torchaudio_version_070:
-    #         return self
-
-    #     kwargs = {
-    #         "format": format,
-    #         "encoding": encoding,
-    #         "bits_per_sample": bits_per_sample,
-    #         "compression": compression,
-    #     }
-
-    #     if preset is not None:
-    #         if preset in self.CODEC_PRESETS:
-    #             kwargs = self.CODEC_PRESETS[preset]
-    #         else:
-    #             raise ValueError(
-    #                 f"Unknown preset: {preset}. "
-    #                 f"Known presets: {list(self.CODEC_PRESETS.keys())}"
-    #             )
-
-    #     waveform = self._to_2d()
-    #     if kwargs["format"] in ["vorbis", "mp3", "ogg", "amr-nb"]:
-    #         # Apply it in a for loop
-    #         augmented = torch.cat(
-    #             [
-    #                 torchaudio.functional.apply_codec(
-    #                     waveform[i][None, :], self.sample_rate, **kwargs
-    #                 )
-    #                 for i in range(waveform.shape[0])
-    #             ],
-    #             dim=0,
-    #         )
-    #     else:
-    #         augmented = torchaudio.functional.apply_codec(
-    #             waveform, self.sample_rate, **kwargs
-    #         )
-    #     augmented = self._to_3d(augmented)
-
-    #     self.audio_data = augmented
-    #     return self
-
    def mel_filterbank(self, n_bands: int):
        """Breaks signal into mel bands.

--- a/audio/audiotools/data/datasets.py
+++ b/audio/audiotools/data/datasets.py
@ -478,21 +478,6 @@ class ConcatDataset(AudioDataset):
        return dataset[idx // len(self.datasets)]


-# class ResumableDistributedSampler(DistributedSampler):  # pragma: no cover
-#     """Distributed sampler that can be resumed from a given start index."""
-
-#     def __init__(self, dataset, start_idx: int = None, **kwargs):
-#         super().__init__(dataset, **kwargs)
-#         # Start index, allows to resume an experiment at the index it was
-#         self.start_idx = start_idx // self.num_replicas if start_idx is not None else 0
-
-#     def __iter__(self):
-#         for i, idx in enumerate(super().__iter__()):
-#             if i >= self.start_idx:
-#                 yield idx
-#         self.start_idx = 0  # set the index back to 0 so for the next epoch
-
-
 class ResumableDistributedSampler(DistributedBatchSampler):  # pragma: no cover
    """Distributed sampler that can be resumed from a given start index."""

--- a/audio/audiotools/data/transforms.py
+++ b/audio/audiotools/data/transforms.py
@ -608,108 +608,6 @@ class Equalizer(BaseTransform):
        return signal.equalizer(eq)


-# class Quantization(BaseTransform):
-#     """Applies quantization to the input waveform. Corresponds
-#     to :py:func:`audiotools.core.effects.EffectMixin.quantization`.
-
-#     Parameters
-#     ----------
-#     channels : tuple, optional
-#         Number of evenly spaced quantization channels to quantize
-#         to, by default ("choice", [8, 32, 128, 256, 1024])
-#     name : str, optional
-#         Name of this transform, used to identify it in the dictionary
-#         produced by ``self.instantiate``, by default None
-#     prob : float, optional
-#         Probability of applying this transform, by default 1.0
-#     """
-
-#     def __init__(
-#         self,
-#         channels: tuple = ("choice", [8, 32, 128, 256, 1024]),
-#         name: str = None,
-#         prob: float = 1.0,
-#     ):
-#         super().__init__(name=name, prob=prob)
-
-#         self.channels = channels
-
-#     def _instantiate(self, state: RandomState):
-#         return {"channels": util.sample_from_dist(self.channels, state)}
-
-#     def _transform(self, signal, channels):
-#         return signal.quantization(channels)
-
-# class MuLawQuantization(BaseTransform):
-#     """Applies mu-law quantization to the input waveform. Corresponds
-#     to :py:func:`audiotools.core.effects.EffectMixin.mulaw_quantization`.
-
-#     Parameters
-#     ----------
-#     channels : tuple, optional
-#         Number of mu-law spaced quantization channels to quantize
-#         to, by default ("choice", [8, 32, 128, 256, 1024])
-#     name : str, optional
-#         Name of this transform, used to identify it in the dictionary
-#         produced by ``self.instantiate``, by default None
-#     prob : float, optional
-#         Probability of applying this transform, by default 1.0
-#     """
-
-#     def __init__(
-#         self,
-#         channels: tuple = ("choice", [8, 32, 128, 256, 1024]),
-#         name: str = None,
-#         prob: float = 1.0,
-#     ):
-#         super().__init__(name=name, prob=prob)
-
-#         self.channels = channels
-
-#     def _instantiate(self, state: RandomState):
-#         return {"channels": util.sample_from_dist(self.channels, state)}
-
-#     def _transform(self, signal, channels):
-#         return signal.mulaw_quantization(channels)
-
-# class NoiseFloor(BaseTransform):
-#     """Adds a noise floor of Gaussian noise to the signal at a specified
-#     dB.
-
-#     Parameters
-#     ----------
-#     db : tuple, optional
-#         Level of noise to add to signal, by default ("const", -50.0)
-#     name : str, optional
-#         Name of this transform, used to identify it in the dictionary
-#         produced by ``self.instantiate``, by default None
-#     prob : float, optional
-#         Probability of applying this transform, by default 1.0
-#     """
-
-#     def __init__(
-#         self,
-#         db: tuple = ("const", -50.0),
-#         name: str = None,
-#         prob: float = 1.0,
-#     ):
-#         super().__init__(name=name, prob=prob)
-
-#         self.db = db
-
-#     def _instantiate(self, state: RandomState, signal: AudioSignal):
-#         db = util.sample_from_dist(self.db, state)
-#         audio_data = state.randn(signal.num_channels, signal.signal_length)
-#         nz_signal = AudioSignal(audio_data, signal.sample_rate)
-#         nz_signal.normalize(db)
-#         return {"nz_signal": nz_signal}
-
-#     def _transform(self, signal, nz_signal):
-#         # Clone bg_signal so that transform can be repeatedly applied
-#         # to different signals with the same effect.
-#         return signal + nz_signal
-
-
 class BackgroundNoise(BaseTransform):
    """Adds background noise from audio specified by a set of CSV files.
    A valid CSV file looks like, and is typically generated by
@ -796,68 +694,6 @@ class BackgroundNoise(BaseTransform):
        return signal.mix(bg_signal.clone(), snr, eq)


-# class CrossTalk(BaseTransform):
-#     """Adds crosstalk between speakers, whose audio is drawn from a CSV file
-#     that was produced via :py:func:`audiotools.data.preprocess.create_csv`.
-
-#     This transform calls :py:func:`audiotools.core.effects.EffectMixin.mix`
-#     under the hood.
-
-#     Parameters
-#     ----------
-#     snr : tuple, optional
-#         How loud cross-talk speaker is relative to original signal in dB,
-#         by default ("uniform", 0.0, 10.0)
-#     sources : List[str], optional
-#         Sources containing folders, or CSVs with paths to audio files,
-#         by default None
-#     weights : List[float], optional
-#         Weights to sample audio files from each source, by default None
-#     name : str, optional
-#         Name of this transform, used to identify it in the dictionary
-#         produced by ``self.instantiate``, by default None
-#     prob : float, optional
-#         Probability of applying this transform, by default 1.0
-#     loudness_cutoff : float, optional
-#         Loudness cutoff when loading from audio files, by default -40
-#     """
-
-#     def __init__(
-#         self,
-#         snr: tuple = ("uniform", 0.0, 10.0),
-#         sources: List[str] = None,
-#         weights: List[float] = None,
-#         name: str = None,
-#         prob: float = 1.0,
-#         loudness_cutoff: float = -40,
-#     ):
-#         super().__init__(name=name, prob=prob)
-
-#         self.snr = snr
-#         self.loader = AudioLoader(sources, weights)
-#         self.loudness_cutoff = loudness_cutoff
-
-#     def _instantiate(self, state: RandomState, signal: AudioSignal):
-#         snr = util.sample_from_dist(self.snr, state)
-#         crosstalk_signal = self.loader(
-#             state,
-#             signal.sample_rate,
-#             duration=signal.signal_duration,
-#             loudness_cutoff=self.loudness_cutoff,
-#             num_channels=signal.num_channels,
-#         )["signal"]
-
-#         return {"crosstalk_signal": crosstalk_signal, "snr": snr}
-
-#     def _transform(self, signal, crosstalk_signal, snr):
-#         # Clone bg_signal so that transform can be repeatedly applied
-#         # to different signals with the same effect.
-#         loudness = signal.loudness()
-#         mix = signal.mix(crosstalk_signal.clone(), snr)
-#         mix.normalize(loudness)
-#         return mix
-
-
 class RoomImpulseResponse(BaseTransform):
    """Convolves signal with a room impulse response, at a specified
    direct-to-reverberant ratio, with equalization applied. Room impulse
@ -942,38 +778,6 @@ class RoomImpulseResponse(BaseTransform):
            use_original_phase=self.use_original_phase)


-# class VolumeChange(BaseTransform):
-#     """Changes the volume of the input signal.
-
-#     Uses :py:func:`audiotools.core.effects.EffectMixin.volume_change`.
-
-#     Parameters
-#     ----------
-#     db : tuple, optional
-#         Change in volume in decibels, by default ("uniform", -12.0, 0.0)
-#     name : str, optional
-#         Name of this transform, used to identify it in the dictionary
-#         produced by ``self.instantiate``, by default None
-#     prob : float, optional
-#         Probability of applying this transform, by default 1.0
-#     """
-
-#     def __init__(
-#         self,
-#         db: tuple = ("uniform", -12.0, 0.0),
-#         name: str = None,
-#         prob: float = 1.0,
-#     ):
-#         super().__init__(name=name, prob=prob)
-#         self.db = db
-
-#     def _instantiate(self, state: RandomState):
-#         return {"db": util.sample_from_dist(self.db, state)}
-
-#     def _transform(self, signal, db):
-#         return signal.volume_change(db)
-
-
 class VolumeNorm(BaseTransform):
    """Normalizes the volume of the excerpt to a specified decibel.

@ -1169,111 +973,6 @@ class HighPass(BaseTransform):
        return signal.high_pass(cutoff, zeros=self.zeros)


-# class RescaleAudio(BaseTransform):
-#     """Rescales the audio so it is in between ``-val`` and ``val``
-#     only if the original audio exceeds those bounds. Useful if
-#     transforms have caused the audio to clip.
-
-#     Uses :py:func:`audiotools.core.effects.EffectMixin.ensure_max_of_audio`.
-
-#     Parameters
-#     ----------
-#     val : float, optional
-#         Max absolute value of signal, by default 1.0
-#     name : str, optional
-#         Name of this transform, used to identify it in the dictionary
-#         produced by ``self.instantiate``, by default None
-#     prob : float, optional
-#         Probability of applying this transform, by default 1.0
-#     """
-
-#     def __init__(self, val: float = 1.0, name: str = None, prob: float = 1):
-#         super().__init__(name=name, prob=prob)
-
-#         self.val = val
-
-#     def _transform(self, signal):
-#         return signal.ensure_max_of_audio(self.val)
-
-# class ShiftPhase(SpectralTransform):
-#     """Shifts the phase of the audio.
-
-#     Uses :py:func:`audiotools.core.dsp.DSPMixin.shift)phase`.
-
-#     Parameters
-#     ----------
-#     shift : tuple, optional
-#         How much to shift phase by, by default ("uniform", -np.pi, np.pi)
-#     name : str, optional
-#         Name of this transform, used to identify it in the dictionary
-#         produced by ``self.instantiate``, by default None
-#     prob : float, optional
-#         Probability of applying this transform, by default 1.0
-#     """
-
-#     def __init__(
-#         self,
-#         shift: tuple = ("uniform", -np.pi, np.pi),
-#         name: str = None,
-#         prob: float = 1,
-#     ):
-#         super().__init__(name=name, prob=prob)
-#         self.shift = shift
-
-#     def _instantiate(self, state: RandomState):
-#         return {"shift": util.sample_from_dist(self.shift, state)}
-
-#     def _transform(self, signal, shift):
-#         return signal.shift_phase(shift)
-
-# class InvertPhase(ShiftPhase):
-#     """Inverts the phase of the audio.
-
-#     Uses :py:func:`audiotools.core.dsp.DSPMixin.shift_phase`.
-
-#     Parameters
-#     ----------
-#     name : str, optional
-#         Name of this transform, used to identify it in the dictionary
-#         produced by ``self.instantiate``, by default None
-#     prob : float, optional
-#         Probability of applying this transform, by default 1.0
-#     """
-
-#     def __init__(self, name: str = None, prob: float = 1):
-#         super().__init__(shift=("const", np.pi), name=name, prob=prob)
-
-# class CorruptPhase(SpectralTransform):
-#     """Corrupts the phase of the audio.
-
-#     Uses :py:func:`audiotools.core.dsp.DSPMixin.corrupt_phase`.
-
-#     Parameters
-#     ----------
-#     scale : tuple, optional
-#         How much to corrupt phase by, by default ("uniform", 0, np.pi)
-#     name : str, optional
-#         Name of this transform, used to identify it in the dictionary
-#         produced by ``self.instantiate``, by default None
-#     prob : float, optional
-#         Probability of applying this transform, by default 1.0
-#     """
-
-#     def __init__(
-#         self, scale: tuple = ("uniform", 0, np.pi), name: str = None, prob: float = 1
-#     ):
-#         super().__init__(name=name, prob=prob)
-#         self.scale = scale
-
-#     def _instantiate(self, state: RandomState, signal: AudioSignal = None):
-#         scale = util.sample_from_dist(self.scale, state)
-#         corruption = state.normal(scale=scale, size=signal.phase.shape[1:])
-#         return {"corruption": corruption.astype("float32")}
-
-#     def _transform(self, signal, corruption):
-#         return signal.shift_phase(shift=corruption)
-
-
 class FrequencyMask(SpectralTransform):
    """Masks a band of frequencies at a center frequency
    from the audio.
@ -1363,39 +1062,6 @@ class TimeMask(SpectralTransform):
        return signal.mask_timesteps(tmin_s=tmin_s, tmax_s=tmax_s)


-# class MaskLowMagnitudes(SpectralTransform):
-#     """Masks low magnitude regions out of signal.
-
-#     Uses :py:func:`audiotools.core.dsp.DSPMixin.mask_low_magnitudes`.
-
-#     Parameters
-#     ----------
-#     db_cutoff : tuple, optional
-#         Decibel value for which things below it will be masked away,
-#         by default ("uniform", -10, 10)
-#     name : str, optional
-#         Name of this transform, used to identify it in the dictionary
-#         produced by ``self.instantiate``, by default None
-#     prob : float, optional
-#         Probability of applying this transform, by default 1.0
-#     """
-
-#     def __init__(
-#         self,
-#         db_cutoff: tuple = ("uniform", -10, 10),
-#         name: str = None,
-#         prob: float = 1,
-#     ):
-#         super().__init__(name=name, prob=prob)
-#         self.db_cutoff = db_cutoff
-
-#     def _instantiate(self, state: RandomState, signal: AudioSignal = None):
-#         return {"db_cutoff": util.sample_from_dist(self.db_cutoff, state)}
-
-#     def _transform(self, signal, db_cutoff: float):
-#         return signal.mask_low_magnitudes(db_cutoff)
-
-
 class Smoothing(BaseTransform):
    """Convolves the signal with a smoothing window.

@ -1445,48 +1111,6 @@ class Smoothing(BaseTransform):
        return out


-# class TimeNoise(TimeMask):
-#     """Similar to :py:func:`audiotools.data.transforms.TimeMask`, but
-#     replaces with noise instead of zeros.
-
-#     Parameters
-#     ----------
-#     t_center : tuple, optional
-#         Center time in terms of 0.0 and 1.0 (duration of signal),
-#         by default ("uniform", 0.0, 1.0)
-#     t_width : tuple, optional
-#         Width of dropped out portion, by default ("const", 0.025)
-#     name : str, optional
-#         Name of this transform, used to identify it in the dictionary
-#         produced by ``self.instantiate``, by default None
-#     prob : float, optional
-#         Probability of applying this transform, by default 1.0
-#     """
-
-#     def __init__(
-#         self,
-#         t_center: tuple = ("uniform", 0.0, 1.0),
-#         t_width: tuple = ("const", 0.025),
-#         name: str = None,
-#         prob: float = 1,
-#     ):
-#         super().__init__(t_center=t_center, t_width=t_width, name=name, prob=prob)
-
-#     def _transform(self, signal, tmin_s: float, tmax_s: float):
-#         signal = signal.mask_timesteps(tmin_s=tmin_s, tmax_s=tmax_s, val=0.0)
-#         mag, phase = signal.magnitude, signal.phase
-
-#         mag_r, phase_r = torch.randn_like(mag), torch.randn_like(phase)
-#         mask = (mag == 0.0) * (phase == 0.0)
-
-#         mag[mask] = mag_r[mask]
-#         phase[mask] = phase_r[mask]
-
-#         signal.magnitude = mag
-#         signal.phase = phase
-#         return signal
-
-
 class FrequencyNoise(FrequencyMask):
    """Similar to :py:func:`audiotools.data.transforms.FrequencyMask`, but
    replaces with noise instead of zeros.
@ -1530,59 +1154,3 @@ class FrequencyNoise(FrequencyMask):
        signal.magnitude = mag
        signal.phase = phase
        return signal
-
-
-# class SpectralDenoising(Equalizer):
-#     """Applies denoising algorithm detailed in
-#     :py:func:`audiotools.ml.layers.spectral_gate.SpectralGate`,
-#     using a randomly generated noise signal for denoising.
-
-#     Parameters
-#     ----------
-#     eq_amount : tuple, optional
-#         Amount of eq to apply to noise signal, by default ("const", 1.0)
-#     denoise_amount : tuple, optional
-#         Amount to denoise by, by default ("uniform", 0.8, 1.0)
-#     nz_volume : float, optional
-#         Volume of noise to denoise with, by default -40
-#     n_bands : int, optional
-#         Number of bands in equalizer, by default 6
-#     n_freq : int, optional
-#         Number of frequency bins to smooth by, by default 3
-#     n_time : int, optional
-#         Number of time bins to smooth by, by default 5
-#     name : str, optional
-#         Name of this transform, used to identify it in the dictionary
-#         produced by ``self.instantiate``, by default None
-#     prob : float, optional
-#         Probability of applying this transform, by default 1.0
-#     """
-
-#     def __init__(
-#         self,
-#         eq_amount: tuple = ("const", 1.0),
-#         denoise_amount: tuple = ("uniform", 0.8, 1.0),
-#         nz_volume: float = -40,
-#         n_bands: int = 6,
-#         n_freq: int = 3,
-#         n_time: int = 5,
-#         name: str = None,
-#         prob: float = 1,
-#     ):
-#         super().__init__(eq_amount=eq_amount, n_bands=n_bands, name=name, prob=prob)
-
-#         self.nz_volume = nz_volume
-#         self.denoise_amount = denoise_amount
-#         self.spectral_gate = ml.layers.SpectralGate(n_freq, n_time)
-
-#     def _transform(self, signal, nz, eq, denoise_amount):
-#         nz = nz.normalize(self.nz_volume).equalizer(eq)
-#         self.spectral_gate = self.spectral_gate.to(signal.device)
-#         signal = self.spectral_gate(signal, nz, denoise_amount)
-#         return signal
-
-#     def _instantiate(self, state: RandomState):
-#         kwargs = super()._instantiate(state)
-#         kwargs["denoise_amount"] = util.sample_from_dist(self.denoise_amount, state)
-#         kwargs["nz"] = AudioSignal(state.randn(22050), 44100)
-#         return kwargs
--- a/audio/audiotools/metrics/init.py
+++ b/audio/audiotools/metrics/init.py
@ -1,6 +1,4 @@
 """
 Functions for comparing AudioSignal objects to one another.
 """
-# from . import distance
 from . import quality
-# from . import spectral
--- a/audio/audiotools/ml/init.py
+++ b/audio/audiotools/ml/init.py
@ -1,5 +1,3 @@
 from . import decorators
 from .accelerator import Accelerator
 from .basemodel import BaseModel
-# from . import layers
-# from .experiment import Experiment