diff --git a/audio/audiotools/core/_julius.py b/audio/audiotools/core/_julius.py index 459c7255a..af70c3878 100644 --- a/audio/audiotools/core/_julius.py +++ b/audio/audiotools/core/_julius.py @@ -245,177 +245,6 @@ def pure_tone(freq: float, sr: float=128, dur: float=4, device=None): return paddle.cos(2 * math.pi * freq * time) -# def _new_rfft(x: paddle.Tensor): -# z = paddle.fft.rfft(x, axis=-1) - -# z_real = paddle.real(z) -# z_imag = paddle.imag(z) - -# z_view_as_real = paddle.stack([z_real, z_imag], axis=-1) -# return z_view_as_real - -# def _new_irfft(x: paddle.Tensor, length: int): -# x_real = x[..., 0] -# x_imag = x[..., 1] -# x_view_as_complex = paddle.complex(x_real, x_imag) -# return paddle.fft.irfft(x_view_as_complex, n=length, axis=-1) - -# def _compl_mul_conjugate(a: paddle.Tensor, b: paddle.Tensor): -# """ -# Given a and b two tensors of dimension 4 -# with the last dimension being the real and imaginary part, -# returns a multiplied by the conjugate of b, the multiplication -# being with respect to the second dimension. - -# PaddlePaddle does not have direct support for complex number operations -# using einsum in the same manner as PyTorch, but we can manually compute -# the equivalent result. -# """ -# # Extract the real and imaginary parts of a and b -# real_a = a[..., 0] -# imag_a = a[..., 1] -# real_b = b[..., 0] -# imag_b = b[..., 1] - -# # Compute the multiplication with respect to the second dimension manually -# real_part = paddle.einsum("bcft,dct->bdft", real_a, real_b) + paddle.einsum( -# "bcft,dct->bdft", imag_a, imag_b) -# imag_part = paddle.einsum("bcft,dct->bdft", imag_a, real_b) - paddle.einsum( -# "bcft,dct->bdft", real_a, imag_b) - -# # Stack the real and imaginary parts together -# result = paddle.stack([real_part, imag_part], axis=-1) -# return result - -# def fft_conv1d( -# _input: paddle.Tensor, -# weight: paddle.Tensor, -# bias: Optional[paddle.Tensor]=None, -# stride: int=1, -# padding: int=0, -# block_ratio: float=5, ): -# """ -# Same as `paddle.nn.functional.conv1d` but using FFT for the convolution. -# Please check PaddlePaddle documentation for more information. - -# Args: -# _input (Tensor): _input signal of shape `[B, C, T]`. -# weight (Tensor): weight of the convolution `[D, C, K]` with `D` the number -# of output channels. -# bias (Tensor or None): if not None, bias term for the convolution. -# stride (int): stride of convolution. -# padding (int): padding to apply to the _input. -# block_ratio (float): can be tuned for speed. The _input is splitted in chunks -# with a size of `int(block_ratio * kernel_size)`. - -# Shape: - -# - Inputs: `_input` is `[B, C, T]`, `weight` is `[D, C, K]` and bias is `[D]`. -# - Output: `(*, T)` - -# ..note:: -# This function is faster than `paddle.nn.functional.conv1d` only in specific cases. -# Typically, the kernel size should be of the order of 256 to see any real gain, -# for a stride of 1. - -# ..Warning:: -# Dilation and groups are not supported at the moment. This function might use -# more memory than the default Conv1d implementation. -# """ -# _input = F.pad(_input, (padding, padding), data_format="NCL") -# batch, channels, length = _input.shape -# out_channels, _, kernel_size = weight.shape - -# if length < kernel_size: -# raise RuntimeError( -# f"Input should be at least as large as the kernel size {kernel_size}, " -# f"but it is only {length} samples long.") -# if block_ratio < 1: -# raise RuntimeError("Block ratio must be greater than 1.") - -# block_size: int = min(int(kernel_size * block_ratio), length) -# fold_stride = block_size - kernel_size + 1 -# weight = pad_to(weight, block_size) -# weight_z = _new_rfft(weight) - -# # We pad the _input and get the different frames, on which -# frames = unfold(_input, block_size, fold_stride) - -# frames_z = _new_rfft(frames) -# out_z = _compl_mul_conjugate(frames_z, weight_z) -# out = _new_irfft(out_z, block_size) -# # The last bit is invalid, because FFT will do a circular convolution. -# out = out[..., :-kernel_size + 1] -# out = out.reshape([batch, out_channels, -1]) -# out = out[..., ::stride] -# target_length = (length - kernel_size) // stride + 1 -# out = out[..., :target_length] -# if bias is not None: -# out += bias[:, None] -# return out - -# class FFTConv1D(paddle.nn.Layer): -# """ -# Same as `paddle.nn.Conv1D` but based on a custom FFT-based convolution. -# Please check PaddlePaddle documentation for more information on `paddle.nn.Conv1D`. - -# Args: -# in_channels (int): number of _input channels. -# out_channels (int): number of output channels. -# kernel_size (int): kernel size of convolution. -# stride (int): stride of convolution. -# padding (int): padding to apply to the _input. -# bias (bool): if True, use a bias term. - -# ..note:: -# This module is faster than `paddle.nn.Conv1D` only in specific cases. -# Typically, `kernel_size` should be of the order of 256 to see any real gain, -# for a stride of 1. - -# ..warning:: -# Dilation and groups are not supported at the moment. This module might use -# more memory than the default Conv1D implementation. - -# >>> fftconv = FFTConv1D(12, 24, 128, 4) -# >>> x = paddle.randn([4, 12, 1024]) -# >>> print(list(fftconv(x).shape)) -# [4, 24, 225] -# """ - -# def __init__( -# self, -# in_channels: int, -# out_channels: int, -# kernel_size: int, -# stride: int=1, -# padding: int=0, -# bias: bool=True, ): -# super(FFTConv1D, self).__init__() -# self.in_channels = in_channels -# self.out_channels = out_channels -# self.kernel_size = kernel_size -# self.stride = stride -# self.padding = padding - -# # Create a Conv1D layer to initialize weights and bias -# conv = paddle.nn.Conv1D( -# in_channels, -# out_channels, -# kernel_size, -# stride=stride, -# padding=padding, -# bias_attr=bias) -# self.weight = conv.weight -# if bias: -# self.bias = conv.bias -# else: -# self.bias = None - -# def forward(self, _input: paddle.Tensor): -# return fft_conv1d(_input, self.weight, self.bias, self.stride, -# self.padding) - - class LowPassFilters(nn.Layer): """ Bank of low pass filters. diff --git a/audio/audiotools/core/audio_signal.py b/audio/audiotools/core/audio_signal.py index 92cb88353..9d1faca20 100644 --- a/audio/audiotools/core/audio_signal.py +++ b/audio/audiotools/core/audio_signal.py @@ -94,13 +94,10 @@ STFTParams.__new__.__defaults__ = (None, None, None, None, None) class AudioSignal( EffectMixin, LoudnessMixin, - # PlayMixin, ImpulseResponseMixin, DSPMixin, DisplayMixin, - FFMPEGMixin, - # WhisperMixin, -): + FFMPEGMixin, ): """This is the core object of this library. Audio is always loaded into an AudioSignal, which then enables all the features of this library, including audio augmentations, I/O, playback, diff --git a/audio/audiotools/core/effects.py b/audio/audiotools/core/effects.py index edaf35969..cda0b4f2e 100644 --- a/audio/audiotools/core/effects.py +++ b/audio/audiotools/core/effects.py @@ -6,8 +6,6 @@ import paddle from . import util from ._julius import SplitBands -# from . import _julius - class EffectMixin: GAIN_FACTOR = np.log(10) / 20 @@ -253,152 +251,6 @@ class EffectMixin: self.audio_data = self.audio_data * gain[:, None, None] return self - # def _to_2d(self): - # waveform = self.audio_data.reshape(-1, self.signal_length) - # return waveform - - # def _to_3d(self, waveform): - # return waveform.reshape(self.batch_size, self.num_channels, -1) - - # def pitch_shift(self, n_semitones: int, quick: bool = True): - # """Pitch shift the signal. All items in the batch - # get the same pitch shift. - - # Parameters - # ---------- - # n_semitones : int - # How many semitones to shift the signal by. - # quick : bool, optional - # Using quick pitch shifting, by default True - - # Returns - # ------- - # AudioSignal - # Pitch shifted audio signal. - # """ - # device = self.device - # effects = [ - # ["pitch", str(n_semitones * 100)], - # ["rate", str(self.sample_rate)], - # ] - # if quick: - # effects[0].insert(1, "-q") - - # waveform = self._to_2d().cpu() - # waveform, sample_rate = torchaudio.sox_effects.apply_effects_tensor( - # waveform, self.sample_rate, effects, channels_first=True - # ) - # self.sample_rate = sample_rate - # self.audio_data = self._to_3d(waveform) - # return self.to(device) - - # def time_stretch(self, factor: float, quick: bool = True): - # """Time stretch the audio signal. - - # Parameters - # ---------- - # factor : float - # Factor by which to stretch the AudioSignal. Typically - # between 0.8 and 1.2. - # quick : bool, optional - # Whether to use quick time stretching, by default True - - # Returns - # ------- - # AudioSignal - # Time-stretched AudioSignal. - # """ - # device = self.device - # effects = [ - # ["tempo", str(factor)], - # ["rate", str(self.sample_rate)], - # ] - # if quick: - # effects[0].insert(1, "-q") - - # waveform = self._to_2d().cpu() - # waveform, sample_rate = torchaudio.sox_effects.apply_effects_tensor( - # waveform, self.sample_rate, effects, channels_first=True - # ) - # self.sample_rate = sample_rate - # self.audio_data = self._to_3d(waveform) - # return self.to(device) - - # def apply_codec( - # self, - # preset: str = None, - # format: str = "wav", - # encoding: str = None, - # bits_per_sample: int = None, - # compression: int = None, - # ): # pragma: no cover - # """Applies an audio codec to the signal. - - # Parameters - # ---------- - # preset : str, optional - # One of the keys in ``self.CODEC_PRESETS``, by default None - # format : str, optional - # Format for audio codec, by default "wav" - # encoding : str, optional - # Encoding to use, by default None - # bits_per_sample : int, optional - # How many bits per sample, by default None - # compression : int, optional - # Compression amount of codec, by default None - - # Returns - # ------- - # AudioSignal - # AudioSignal with codec applied. - - # Raises - # ------ - # ValueError - # If preset is not in ``self.CODEC_PRESETS``, an error - # is thrown. - # """ - # torchaudio_version_070 = "0.7" in torchaudio.__version__ - # if torchaudio_version_070: - # return self - - # kwargs = { - # "format": format, - # "encoding": encoding, - # "bits_per_sample": bits_per_sample, - # "compression": compression, - # } - - # if preset is not None: - # if preset in self.CODEC_PRESETS: - # kwargs = self.CODEC_PRESETS[preset] - # else: - # raise ValueError( - # f"Unknown preset: {preset}. " - # f"Known presets: {list(self.CODEC_PRESETS.keys())}" - # ) - - # waveform = self._to_2d() - # if kwargs["format"] in ["vorbis", "mp3", "ogg", "amr-nb"]: - # # Apply it in a for loop - # augmented = torch.cat( - # [ - # torchaudio.functional.apply_codec( - # waveform[i][None, :], self.sample_rate, **kwargs - # ) - # for i in range(waveform.shape[0]) - # ], - # dim=0, - # ) - # else: - # augmented = torchaudio.functional.apply_codec( - # waveform, self.sample_rate, **kwargs - # ) - # augmented = self._to_3d(augmented) - - # self.audio_data = augmented - # return self - def mel_filterbank(self, n_bands: int): """Breaks signal into mel bands. diff --git a/audio/audiotools/data/datasets.py b/audio/audiotools/data/datasets.py index e5f6ddf19..61764c0bf 100644 --- a/audio/audiotools/data/datasets.py +++ b/audio/audiotools/data/datasets.py @@ -478,21 +478,6 @@ class ConcatDataset(AudioDataset): return dataset[idx // len(self.datasets)] -# class ResumableDistributedSampler(DistributedSampler): # pragma: no cover -# """Distributed sampler that can be resumed from a given start index.""" - -# def __init__(self, dataset, start_idx: int = None, **kwargs): -# super().__init__(dataset, **kwargs) -# # Start index, allows to resume an experiment at the index it was -# self.start_idx = start_idx // self.num_replicas if start_idx is not None else 0 - -# def __iter__(self): -# for i, idx in enumerate(super().__iter__()): -# if i >= self.start_idx: -# yield idx -# self.start_idx = 0 # set the index back to 0 so for the next epoch - - class ResumableDistributedSampler(DistributedBatchSampler): # pragma: no cover """Distributed sampler that can be resumed from a given start index.""" diff --git a/audio/audiotools/data/transforms.py b/audio/audiotools/data/transforms.py index 868fb724b..102b46c79 100644 --- a/audio/audiotools/data/transforms.py +++ b/audio/audiotools/data/transforms.py @@ -608,108 +608,6 @@ class Equalizer(BaseTransform): return signal.equalizer(eq) -# class Quantization(BaseTransform): -# """Applies quantization to the input waveform. Corresponds -# to :py:func:`audiotools.core.effects.EffectMixin.quantization`. - -# Parameters -# ---------- -# channels : tuple, optional -# Number of evenly spaced quantization channels to quantize -# to, by default ("choice", [8, 32, 128, 256, 1024]) -# name : str, optional -# Name of this transform, used to identify it in the dictionary -# produced by ``self.instantiate``, by default None -# prob : float, optional -# Probability of applying this transform, by default 1.0 -# """ - -# def __init__( -# self, -# channels: tuple = ("choice", [8, 32, 128, 256, 1024]), -# name: str = None, -# prob: float = 1.0, -# ): -# super().__init__(name=name, prob=prob) - -# self.channels = channels - -# def _instantiate(self, state: RandomState): -# return {"channels": util.sample_from_dist(self.channels, state)} - -# def _transform(self, signal, channels): -# return signal.quantization(channels) - -# class MuLawQuantization(BaseTransform): -# """Applies mu-law quantization to the input waveform. Corresponds -# to :py:func:`audiotools.core.effects.EffectMixin.mulaw_quantization`. - -# Parameters -# ---------- -# channels : tuple, optional -# Number of mu-law spaced quantization channels to quantize -# to, by default ("choice", [8, 32, 128, 256, 1024]) -# name : str, optional -# Name of this transform, used to identify it in the dictionary -# produced by ``self.instantiate``, by default None -# prob : float, optional -# Probability of applying this transform, by default 1.0 -# """ - -# def __init__( -# self, -# channels: tuple = ("choice", [8, 32, 128, 256, 1024]), -# name: str = None, -# prob: float = 1.0, -# ): -# super().__init__(name=name, prob=prob) - -# self.channels = channels - -# def _instantiate(self, state: RandomState): -# return {"channels": util.sample_from_dist(self.channels, state)} - -# def _transform(self, signal, channels): -# return signal.mulaw_quantization(channels) - -# class NoiseFloor(BaseTransform): -# """Adds a noise floor of Gaussian noise to the signal at a specified -# dB. - -# Parameters -# ---------- -# db : tuple, optional -# Level of noise to add to signal, by default ("const", -50.0) -# name : str, optional -# Name of this transform, used to identify it in the dictionary -# produced by ``self.instantiate``, by default None -# prob : float, optional -# Probability of applying this transform, by default 1.0 -# """ - -# def __init__( -# self, -# db: tuple = ("const", -50.0), -# name: str = None, -# prob: float = 1.0, -# ): -# super().__init__(name=name, prob=prob) - -# self.db = db - -# def _instantiate(self, state: RandomState, signal: AudioSignal): -# db = util.sample_from_dist(self.db, state) -# audio_data = state.randn(signal.num_channels, signal.signal_length) -# nz_signal = AudioSignal(audio_data, signal.sample_rate) -# nz_signal.normalize(db) -# return {"nz_signal": nz_signal} - -# def _transform(self, signal, nz_signal): -# # Clone bg_signal so that transform can be repeatedly applied -# # to different signals with the same effect. -# return signal + nz_signal - - class BackgroundNoise(BaseTransform): """Adds background noise from audio specified by a set of CSV files. A valid CSV file looks like, and is typically generated by @@ -796,68 +694,6 @@ class BackgroundNoise(BaseTransform): return signal.mix(bg_signal.clone(), snr, eq) -# class CrossTalk(BaseTransform): -# """Adds crosstalk between speakers, whose audio is drawn from a CSV file -# that was produced via :py:func:`audiotools.data.preprocess.create_csv`. - -# This transform calls :py:func:`audiotools.core.effects.EffectMixin.mix` -# under the hood. - -# Parameters -# ---------- -# snr : tuple, optional -# How loud cross-talk speaker is relative to original signal in dB, -# by default ("uniform", 0.0, 10.0) -# sources : List[str], optional -# Sources containing folders, or CSVs with paths to audio files, -# by default None -# weights : List[float], optional -# Weights to sample audio files from each source, by default None -# name : str, optional -# Name of this transform, used to identify it in the dictionary -# produced by ``self.instantiate``, by default None -# prob : float, optional -# Probability of applying this transform, by default 1.0 -# loudness_cutoff : float, optional -# Loudness cutoff when loading from audio files, by default -40 -# """ - -# def __init__( -# self, -# snr: tuple = ("uniform", 0.0, 10.0), -# sources: List[str] = None, -# weights: List[float] = None, -# name: str = None, -# prob: float = 1.0, -# loudness_cutoff: float = -40, -# ): -# super().__init__(name=name, prob=prob) - -# self.snr = snr -# self.loader = AudioLoader(sources, weights) -# self.loudness_cutoff = loudness_cutoff - -# def _instantiate(self, state: RandomState, signal: AudioSignal): -# snr = util.sample_from_dist(self.snr, state) -# crosstalk_signal = self.loader( -# state, -# signal.sample_rate, -# duration=signal.signal_duration, -# loudness_cutoff=self.loudness_cutoff, -# num_channels=signal.num_channels, -# )["signal"] - -# return {"crosstalk_signal": crosstalk_signal, "snr": snr} - -# def _transform(self, signal, crosstalk_signal, snr): -# # Clone bg_signal so that transform can be repeatedly applied -# # to different signals with the same effect. -# loudness = signal.loudness() -# mix = signal.mix(crosstalk_signal.clone(), snr) -# mix.normalize(loudness) -# return mix - - class RoomImpulseResponse(BaseTransform): """Convolves signal with a room impulse response, at a specified direct-to-reverberant ratio, with equalization applied. Room impulse @@ -942,38 +778,6 @@ class RoomImpulseResponse(BaseTransform): use_original_phase=self.use_original_phase) -# class VolumeChange(BaseTransform): -# """Changes the volume of the input signal. - -# Uses :py:func:`audiotools.core.effects.EffectMixin.volume_change`. - -# Parameters -# ---------- -# db : tuple, optional -# Change in volume in decibels, by default ("uniform", -12.0, 0.0) -# name : str, optional -# Name of this transform, used to identify it in the dictionary -# produced by ``self.instantiate``, by default None -# prob : float, optional -# Probability of applying this transform, by default 1.0 -# """ - -# def __init__( -# self, -# db: tuple = ("uniform", -12.0, 0.0), -# name: str = None, -# prob: float = 1.0, -# ): -# super().__init__(name=name, prob=prob) -# self.db = db - -# def _instantiate(self, state: RandomState): -# return {"db": util.sample_from_dist(self.db, state)} - -# def _transform(self, signal, db): -# return signal.volume_change(db) - - class VolumeNorm(BaseTransform): """Normalizes the volume of the excerpt to a specified decibel. @@ -1169,111 +973,6 @@ class HighPass(BaseTransform): return signal.high_pass(cutoff, zeros=self.zeros) -# class RescaleAudio(BaseTransform): -# """Rescales the audio so it is in between ``-val`` and ``val`` -# only if the original audio exceeds those bounds. Useful if -# transforms have caused the audio to clip. - -# Uses :py:func:`audiotools.core.effects.EffectMixin.ensure_max_of_audio`. - -# Parameters -# ---------- -# val : float, optional -# Max absolute value of signal, by default 1.0 -# name : str, optional -# Name of this transform, used to identify it in the dictionary -# produced by ``self.instantiate``, by default None -# prob : float, optional -# Probability of applying this transform, by default 1.0 -# """ - -# def __init__(self, val: float = 1.0, name: str = None, prob: float = 1): -# super().__init__(name=name, prob=prob) - -# self.val = val - -# def _transform(self, signal): -# return signal.ensure_max_of_audio(self.val) - -# class ShiftPhase(SpectralTransform): -# """Shifts the phase of the audio. - -# Uses :py:func:`audiotools.core.dsp.DSPMixin.shift)phase`. - -# Parameters -# ---------- -# shift : tuple, optional -# How much to shift phase by, by default ("uniform", -np.pi, np.pi) -# name : str, optional -# Name of this transform, used to identify it in the dictionary -# produced by ``self.instantiate``, by default None -# prob : float, optional -# Probability of applying this transform, by default 1.0 -# """ - -# def __init__( -# self, -# shift: tuple = ("uniform", -np.pi, np.pi), -# name: str = None, -# prob: float = 1, -# ): -# super().__init__(name=name, prob=prob) -# self.shift = shift - -# def _instantiate(self, state: RandomState): -# return {"shift": util.sample_from_dist(self.shift, state)} - -# def _transform(self, signal, shift): -# return signal.shift_phase(shift) - -# class InvertPhase(ShiftPhase): -# """Inverts the phase of the audio. - -# Uses :py:func:`audiotools.core.dsp.DSPMixin.shift_phase`. - -# Parameters -# ---------- -# name : str, optional -# Name of this transform, used to identify it in the dictionary -# produced by ``self.instantiate``, by default None -# prob : float, optional -# Probability of applying this transform, by default 1.0 -# """ - -# def __init__(self, name: str = None, prob: float = 1): -# super().__init__(shift=("const", np.pi), name=name, prob=prob) - -# class CorruptPhase(SpectralTransform): -# """Corrupts the phase of the audio. - -# Uses :py:func:`audiotools.core.dsp.DSPMixin.corrupt_phase`. - -# Parameters -# ---------- -# scale : tuple, optional -# How much to corrupt phase by, by default ("uniform", 0, np.pi) -# name : str, optional -# Name of this transform, used to identify it in the dictionary -# produced by ``self.instantiate``, by default None -# prob : float, optional -# Probability of applying this transform, by default 1.0 -# """ - -# def __init__( -# self, scale: tuple = ("uniform", 0, np.pi), name: str = None, prob: float = 1 -# ): -# super().__init__(name=name, prob=prob) -# self.scale = scale - -# def _instantiate(self, state: RandomState, signal: AudioSignal = None): -# scale = util.sample_from_dist(self.scale, state) -# corruption = state.normal(scale=scale, size=signal.phase.shape[1:]) -# return {"corruption": corruption.astype("float32")} - -# def _transform(self, signal, corruption): -# return signal.shift_phase(shift=corruption) - - class FrequencyMask(SpectralTransform): """Masks a band of frequencies at a center frequency from the audio. @@ -1363,39 +1062,6 @@ class TimeMask(SpectralTransform): return signal.mask_timesteps(tmin_s=tmin_s, tmax_s=tmax_s) -# class MaskLowMagnitudes(SpectralTransform): -# """Masks low magnitude regions out of signal. - -# Uses :py:func:`audiotools.core.dsp.DSPMixin.mask_low_magnitudes`. - -# Parameters -# ---------- -# db_cutoff : tuple, optional -# Decibel value for which things below it will be masked away, -# by default ("uniform", -10, 10) -# name : str, optional -# Name of this transform, used to identify it in the dictionary -# produced by ``self.instantiate``, by default None -# prob : float, optional -# Probability of applying this transform, by default 1.0 -# """ - -# def __init__( -# self, -# db_cutoff: tuple = ("uniform", -10, 10), -# name: str = None, -# prob: float = 1, -# ): -# super().__init__(name=name, prob=prob) -# self.db_cutoff = db_cutoff - -# def _instantiate(self, state: RandomState, signal: AudioSignal = None): -# return {"db_cutoff": util.sample_from_dist(self.db_cutoff, state)} - -# def _transform(self, signal, db_cutoff: float): -# return signal.mask_low_magnitudes(db_cutoff) - - class Smoothing(BaseTransform): """Convolves the signal with a smoothing window. @@ -1445,48 +1111,6 @@ class Smoothing(BaseTransform): return out -# class TimeNoise(TimeMask): -# """Similar to :py:func:`audiotools.data.transforms.TimeMask`, but -# replaces with noise instead of zeros. - -# Parameters -# ---------- -# t_center : tuple, optional -# Center time in terms of 0.0 and 1.0 (duration of signal), -# by default ("uniform", 0.0, 1.0) -# t_width : tuple, optional -# Width of dropped out portion, by default ("const", 0.025) -# name : str, optional -# Name of this transform, used to identify it in the dictionary -# produced by ``self.instantiate``, by default None -# prob : float, optional -# Probability of applying this transform, by default 1.0 -# """ - -# def __init__( -# self, -# t_center: tuple = ("uniform", 0.0, 1.0), -# t_width: tuple = ("const", 0.025), -# name: str = None, -# prob: float = 1, -# ): -# super().__init__(t_center=t_center, t_width=t_width, name=name, prob=prob) - -# def _transform(self, signal, tmin_s: float, tmax_s: float): -# signal = signal.mask_timesteps(tmin_s=tmin_s, tmax_s=tmax_s, val=0.0) -# mag, phase = signal.magnitude, signal.phase - -# mag_r, phase_r = torch.randn_like(mag), torch.randn_like(phase) -# mask = (mag == 0.0) * (phase == 0.0) - -# mag[mask] = mag_r[mask] -# phase[mask] = phase_r[mask] - -# signal.magnitude = mag -# signal.phase = phase -# return signal - - class FrequencyNoise(FrequencyMask): """Similar to :py:func:`audiotools.data.transforms.FrequencyMask`, but replaces with noise instead of zeros. @@ -1530,59 +1154,3 @@ class FrequencyNoise(FrequencyMask): signal.magnitude = mag signal.phase = phase return signal - - -# class SpectralDenoising(Equalizer): -# """Applies denoising algorithm detailed in -# :py:func:`audiotools.ml.layers.spectral_gate.SpectralGate`, -# using a randomly generated noise signal for denoising. - -# Parameters -# ---------- -# eq_amount : tuple, optional -# Amount of eq to apply to noise signal, by default ("const", 1.0) -# denoise_amount : tuple, optional -# Amount to denoise by, by default ("uniform", 0.8, 1.0) -# nz_volume : float, optional -# Volume of noise to denoise with, by default -40 -# n_bands : int, optional -# Number of bands in equalizer, by default 6 -# n_freq : int, optional -# Number of frequency bins to smooth by, by default 3 -# n_time : int, optional -# Number of time bins to smooth by, by default 5 -# name : str, optional -# Name of this transform, used to identify it in the dictionary -# produced by ``self.instantiate``, by default None -# prob : float, optional -# Probability of applying this transform, by default 1.0 -# """ - -# def __init__( -# self, -# eq_amount: tuple = ("const", 1.0), -# denoise_amount: tuple = ("uniform", 0.8, 1.0), -# nz_volume: float = -40, -# n_bands: int = 6, -# n_freq: int = 3, -# n_time: int = 5, -# name: str = None, -# prob: float = 1, -# ): -# super().__init__(eq_amount=eq_amount, n_bands=n_bands, name=name, prob=prob) - -# self.nz_volume = nz_volume -# self.denoise_amount = denoise_amount -# self.spectral_gate = ml.layers.SpectralGate(n_freq, n_time) - -# def _transform(self, signal, nz, eq, denoise_amount): -# nz = nz.normalize(self.nz_volume).equalizer(eq) -# self.spectral_gate = self.spectral_gate.to(signal.device) -# signal = self.spectral_gate(signal, nz, denoise_amount) -# return signal - -# def _instantiate(self, state: RandomState): -# kwargs = super()._instantiate(state) -# kwargs["denoise_amount"] = util.sample_from_dist(self.denoise_amount, state) -# kwargs["nz"] = AudioSignal(state.randn(22050), 44100) -# return kwargs diff --git a/audio/audiotools/metrics/__init__.py b/audio/audiotools/metrics/__init__.py index 3e014ff4f..4c492ab99 100644 --- a/audio/audiotools/metrics/__init__.py +++ b/audio/audiotools/metrics/__init__.py @@ -1,6 +1,4 @@ """ Functions for comparing AudioSignal objects to one another. """ -# from . import distance from . import quality -# from . import spectral diff --git a/audio/audiotools/ml/__init__.py b/audio/audiotools/ml/__init__.py index 176231d44..04d529789 100644 --- a/audio/audiotools/ml/__init__.py +++ b/audio/audiotools/ml/__init__.py @@ -1,5 +1,3 @@ from . import decorators from .accelerator import Accelerator from .basemodel import BaseModel -# from . import layers -# from .experiment import Experiment