rm some useless comment

pull/3900/head
drryanhuang 9 months ago
parent 1bfc9bc2b2
commit 3599089040

@ -245,177 +245,6 @@ def pure_tone(freq: float, sr: float=128, dur: float=4, device=None):
return paddle.cos(2 * math.pi * freq * time) return paddle.cos(2 * math.pi * freq * time)
# def _new_rfft(x: paddle.Tensor):
# z = paddle.fft.rfft(x, axis=-1)
# z_real = paddle.real(z)
# z_imag = paddle.imag(z)
# z_view_as_real = paddle.stack([z_real, z_imag], axis=-1)
# return z_view_as_real
# def _new_irfft(x: paddle.Tensor, length: int):
# x_real = x[..., 0]
# x_imag = x[..., 1]
# x_view_as_complex = paddle.complex(x_real, x_imag)
# return paddle.fft.irfft(x_view_as_complex, n=length, axis=-1)
# def _compl_mul_conjugate(a: paddle.Tensor, b: paddle.Tensor):
# """
# Given a and b two tensors of dimension 4
# with the last dimension being the real and imaginary part,
# returns a multiplied by the conjugate of b, the multiplication
# being with respect to the second dimension.
# PaddlePaddle does not have direct support for complex number operations
# using einsum in the same manner as PyTorch, but we can manually compute
# the equivalent result.
# """
# # Extract the real and imaginary parts of a and b
# real_a = a[..., 0]
# imag_a = a[..., 1]
# real_b = b[..., 0]
# imag_b = b[..., 1]
# # Compute the multiplication with respect to the second dimension manually
# real_part = paddle.einsum("bcft,dct->bdft", real_a, real_b) + paddle.einsum(
# "bcft,dct->bdft", imag_a, imag_b)
# imag_part = paddle.einsum("bcft,dct->bdft", imag_a, real_b) - paddle.einsum(
# "bcft,dct->bdft", real_a, imag_b)
# # Stack the real and imaginary parts together
# result = paddle.stack([real_part, imag_part], axis=-1)
# return result
# def fft_conv1d(
# _input: paddle.Tensor,
# weight: paddle.Tensor,
# bias: Optional[paddle.Tensor]=None,
# stride: int=1,
# padding: int=0,
# block_ratio: float=5, ):
# """
# Same as `paddle.nn.functional.conv1d` but using FFT for the convolution.
# Please check PaddlePaddle documentation for more information.
# Args:
# _input (Tensor): _input signal of shape `[B, C, T]`.
# weight (Tensor): weight of the convolution `[D, C, K]` with `D` the number
# of output channels.
# bias (Tensor or None): if not None, bias term for the convolution.
# stride (int): stride of convolution.
# padding (int): padding to apply to the _input.
# block_ratio (float): can be tuned for speed. The _input is splitted in chunks
# with a size of `int(block_ratio * kernel_size)`.
# Shape:
# - Inputs: `_input` is `[B, C, T]`, `weight` is `[D, C, K]` and bias is `[D]`.
# - Output: `(*, T)`
# ..note::
# This function is faster than `paddle.nn.functional.conv1d` only in specific cases.
# Typically, the kernel size should be of the order of 256 to see any real gain,
# for a stride of 1.
# ..Warning::
# Dilation and groups are not supported at the moment. This function might use
# more memory than the default Conv1d implementation.
# """
# _input = F.pad(_input, (padding, padding), data_format="NCL")
# batch, channels, length = _input.shape
# out_channels, _, kernel_size = weight.shape
# if length < kernel_size:
# raise RuntimeError(
# f"Input should be at least as large as the kernel size {kernel_size}, "
# f"but it is only {length} samples long.")
# if block_ratio < 1:
# raise RuntimeError("Block ratio must be greater than 1.")
# block_size: int = min(int(kernel_size * block_ratio), length)
# fold_stride = block_size - kernel_size + 1
# weight = pad_to(weight, block_size)
# weight_z = _new_rfft(weight)
# # We pad the _input and get the different frames, on which
# frames = unfold(_input, block_size, fold_stride)
# frames_z = _new_rfft(frames)
# out_z = _compl_mul_conjugate(frames_z, weight_z)
# out = _new_irfft(out_z, block_size)
# # The last bit is invalid, because FFT will do a circular convolution.
# out = out[..., :-kernel_size + 1]
# out = out.reshape([batch, out_channels, -1])
# out = out[..., ::stride]
# target_length = (length - kernel_size) // stride + 1
# out = out[..., :target_length]
# if bias is not None:
# out += bias[:, None]
# return out
# class FFTConv1D(paddle.nn.Layer):
# """
# Same as `paddle.nn.Conv1D` but based on a custom FFT-based convolution.
# Please check PaddlePaddle documentation for more information on `paddle.nn.Conv1D`.
# Args:
# in_channels (int): number of _input channels.
# out_channels (int): number of output channels.
# kernel_size (int): kernel size of convolution.
# stride (int): stride of convolution.
# padding (int): padding to apply to the _input.
# bias (bool): if True, use a bias term.
# ..note::
# This module is faster than `paddle.nn.Conv1D` only in specific cases.
# Typically, `kernel_size` should be of the order of 256 to see any real gain,
# for a stride of 1.
# ..warning::
# Dilation and groups are not supported at the moment. This module might use
# more memory than the default Conv1D implementation.
# >>> fftconv = FFTConv1D(12, 24, 128, 4)
# >>> x = paddle.randn([4, 12, 1024])
# >>> print(list(fftconv(x).shape))
# [4, 24, 225]
# """
# def __init__(
# self,
# in_channels: int,
# out_channels: int,
# kernel_size: int,
# stride: int=1,
# padding: int=0,
# bias: bool=True, ):
# super(FFTConv1D, self).__init__()
# self.in_channels = in_channels
# self.out_channels = out_channels
# self.kernel_size = kernel_size
# self.stride = stride
# self.padding = padding
# # Create a Conv1D layer to initialize weights and bias
# conv = paddle.nn.Conv1D(
# in_channels,
# out_channels,
# kernel_size,
# stride=stride,
# padding=padding,
# bias_attr=bias)
# self.weight = conv.weight
# if bias:
# self.bias = conv.bias
# else:
# self.bias = None
# def forward(self, _input: paddle.Tensor):
# return fft_conv1d(_input, self.weight, self.bias, self.stride,
# self.padding)
class LowPassFilters(nn.Layer): class LowPassFilters(nn.Layer):
""" """
Bank of low pass filters. Bank of low pass filters.

@ -94,13 +94,10 @@ STFTParams.__new__.__defaults__ = (None, None, None, None, None)
class AudioSignal( class AudioSignal(
EffectMixin, EffectMixin,
LoudnessMixin, LoudnessMixin,
# PlayMixin,
ImpulseResponseMixin, ImpulseResponseMixin,
DSPMixin, DSPMixin,
DisplayMixin, DisplayMixin,
FFMPEGMixin, FFMPEGMixin, ):
# WhisperMixin,
):
"""This is the core object of this library. Audio is always """This is the core object of this library. Audio is always
loaded into an AudioSignal, which then enables all the features loaded into an AudioSignal, which then enables all the features
of this library, including audio augmentations, I/O, playback, of this library, including audio augmentations, I/O, playback,

@ -6,8 +6,6 @@ import paddle
from . import util from . import util
from ._julius import SplitBands from ._julius import SplitBands
# from . import _julius
class EffectMixin: class EffectMixin:
GAIN_FACTOR = np.log(10) / 20 GAIN_FACTOR = np.log(10) / 20
@ -253,152 +251,6 @@ class EffectMixin:
self.audio_data = self.audio_data * gain[:, None, None] self.audio_data = self.audio_data * gain[:, None, None]
return self return self
# def _to_2d(self):
# waveform = self.audio_data.reshape(-1, self.signal_length)
# return waveform
# def _to_3d(self, waveform):
# return waveform.reshape(self.batch_size, self.num_channels, -1)
# def pitch_shift(self, n_semitones: int, quick: bool = True):
# """Pitch shift the signal. All items in the batch
# get the same pitch shift.
# Parameters
# ----------
# n_semitones : int
# How many semitones to shift the signal by.
# quick : bool, optional
# Using quick pitch shifting, by default True
# Returns
# -------
# AudioSignal
# Pitch shifted audio signal.
# """
# device = self.device
# effects = [
# ["pitch", str(n_semitones * 100)],
# ["rate", str(self.sample_rate)],
# ]
# if quick:
# effects[0].insert(1, "-q")
# waveform = self._to_2d().cpu()
# waveform, sample_rate = torchaudio.sox_effects.apply_effects_tensor(
# waveform, self.sample_rate, effects, channels_first=True
# )
# self.sample_rate = sample_rate
# self.audio_data = self._to_3d(waveform)
# return self.to(device)
# def time_stretch(self, factor: float, quick: bool = True):
# """Time stretch the audio signal.
# Parameters
# ----------
# factor : float
# Factor by which to stretch the AudioSignal. Typically
# between 0.8 and 1.2.
# quick : bool, optional
# Whether to use quick time stretching, by default True
# Returns
# -------
# AudioSignal
# Time-stretched AudioSignal.
# """
# device = self.device
# effects = [
# ["tempo", str(factor)],
# ["rate", str(self.sample_rate)],
# ]
# if quick:
# effects[0].insert(1, "-q")
# waveform = self._to_2d().cpu()
# waveform, sample_rate = torchaudio.sox_effects.apply_effects_tensor(
# waveform, self.sample_rate, effects, channels_first=True
# )
# self.sample_rate = sample_rate
# self.audio_data = self._to_3d(waveform)
# return self.to(device)
# def apply_codec(
# self,
# preset: str = None,
# format: str = "wav",
# encoding: str = None,
# bits_per_sample: int = None,
# compression: int = None,
# ): # pragma: no cover
# """Applies an audio codec to the signal.
# Parameters
# ----------
# preset : str, optional
# One of the keys in ``self.CODEC_PRESETS``, by default None
# format : str, optional
# Format for audio codec, by default "wav"
# encoding : str, optional
# Encoding to use, by default None
# bits_per_sample : int, optional
# How many bits per sample, by default None
# compression : int, optional
# Compression amount of codec, by default None
# Returns
# -------
# AudioSignal
# AudioSignal with codec applied.
# Raises
# ------
# ValueError
# If preset is not in ``self.CODEC_PRESETS``, an error
# is thrown.
# """
# torchaudio_version_070 = "0.7" in torchaudio.__version__
# if torchaudio_version_070:
# return self
# kwargs = {
# "format": format,
# "encoding": encoding,
# "bits_per_sample": bits_per_sample,
# "compression": compression,
# }
# if preset is not None:
# if preset in self.CODEC_PRESETS:
# kwargs = self.CODEC_PRESETS[preset]
# else:
# raise ValueError(
# f"Unknown preset: {preset}. "
# f"Known presets: {list(self.CODEC_PRESETS.keys())}"
# )
# waveform = self._to_2d()
# if kwargs["format"] in ["vorbis", "mp3", "ogg", "amr-nb"]:
# # Apply it in a for loop
# augmented = torch.cat(
# [
# torchaudio.functional.apply_codec(
# waveform[i][None, :], self.sample_rate, **kwargs
# )
# for i in range(waveform.shape[0])
# ],
# dim=0,
# )
# else:
# augmented = torchaudio.functional.apply_codec(
# waveform, self.sample_rate, **kwargs
# )
# augmented = self._to_3d(augmented)
# self.audio_data = augmented
# return self
def mel_filterbank(self, n_bands: int): def mel_filterbank(self, n_bands: int):
"""Breaks signal into mel bands. """Breaks signal into mel bands.

@ -478,21 +478,6 @@ class ConcatDataset(AudioDataset):
return dataset[idx // len(self.datasets)] return dataset[idx // len(self.datasets)]
# class ResumableDistributedSampler(DistributedSampler): # pragma: no cover
# """Distributed sampler that can be resumed from a given start index."""
# def __init__(self, dataset, start_idx: int = None, **kwargs):
# super().__init__(dataset, **kwargs)
# # Start index, allows to resume an experiment at the index it was
# self.start_idx = start_idx // self.num_replicas if start_idx is not None else 0
# def __iter__(self):
# for i, idx in enumerate(super().__iter__()):
# if i >= self.start_idx:
# yield idx
# self.start_idx = 0 # set the index back to 0 so for the next epoch
class ResumableDistributedSampler(DistributedBatchSampler): # pragma: no cover class ResumableDistributedSampler(DistributedBatchSampler): # pragma: no cover
"""Distributed sampler that can be resumed from a given start index.""" """Distributed sampler that can be resumed from a given start index."""

@ -608,108 +608,6 @@ class Equalizer(BaseTransform):
return signal.equalizer(eq) return signal.equalizer(eq)
# class Quantization(BaseTransform):
# """Applies quantization to the input waveform. Corresponds
# to :py:func:`audiotools.core.effects.EffectMixin.quantization`.
# Parameters
# ----------
# channels : tuple, optional
# Number of evenly spaced quantization channels to quantize
# to, by default ("choice", [8, 32, 128, 256, 1024])
# name : str, optional
# Name of this transform, used to identify it in the dictionary
# produced by ``self.instantiate``, by default None
# prob : float, optional
# Probability of applying this transform, by default 1.0
# """
# def __init__(
# self,
# channels: tuple = ("choice", [8, 32, 128, 256, 1024]),
# name: str = None,
# prob: float = 1.0,
# ):
# super().__init__(name=name, prob=prob)
# self.channels = channels
# def _instantiate(self, state: RandomState):
# return {"channels": util.sample_from_dist(self.channels, state)}
# def _transform(self, signal, channels):
# return signal.quantization(channels)
# class MuLawQuantization(BaseTransform):
# """Applies mu-law quantization to the input waveform. Corresponds
# to :py:func:`audiotools.core.effects.EffectMixin.mulaw_quantization`.
# Parameters
# ----------
# channels : tuple, optional
# Number of mu-law spaced quantization channels to quantize
# to, by default ("choice", [8, 32, 128, 256, 1024])
# name : str, optional
# Name of this transform, used to identify it in the dictionary
# produced by ``self.instantiate``, by default None
# prob : float, optional
# Probability of applying this transform, by default 1.0
# """
# def __init__(
# self,
# channels: tuple = ("choice", [8, 32, 128, 256, 1024]),
# name: str = None,
# prob: float = 1.0,
# ):
# super().__init__(name=name, prob=prob)
# self.channels = channels
# def _instantiate(self, state: RandomState):
# return {"channels": util.sample_from_dist(self.channels, state)}
# def _transform(self, signal, channels):
# return signal.mulaw_quantization(channels)
# class NoiseFloor(BaseTransform):
# """Adds a noise floor of Gaussian noise to the signal at a specified
# dB.
# Parameters
# ----------
# db : tuple, optional
# Level of noise to add to signal, by default ("const", -50.0)
# name : str, optional
# Name of this transform, used to identify it in the dictionary
# produced by ``self.instantiate``, by default None
# prob : float, optional
# Probability of applying this transform, by default 1.0
# """
# def __init__(
# self,
# db: tuple = ("const", -50.0),
# name: str = None,
# prob: float = 1.0,
# ):
# super().__init__(name=name, prob=prob)
# self.db = db
# def _instantiate(self, state: RandomState, signal: AudioSignal):
# db = util.sample_from_dist(self.db, state)
# audio_data = state.randn(signal.num_channels, signal.signal_length)
# nz_signal = AudioSignal(audio_data, signal.sample_rate)
# nz_signal.normalize(db)
# return {"nz_signal": nz_signal}
# def _transform(self, signal, nz_signal):
# # Clone bg_signal so that transform can be repeatedly applied
# # to different signals with the same effect.
# return signal + nz_signal
class BackgroundNoise(BaseTransform): class BackgroundNoise(BaseTransform):
"""Adds background noise from audio specified by a set of CSV files. """Adds background noise from audio specified by a set of CSV files.
A valid CSV file looks like, and is typically generated by A valid CSV file looks like, and is typically generated by
@ -796,68 +694,6 @@ class BackgroundNoise(BaseTransform):
return signal.mix(bg_signal.clone(), snr, eq) return signal.mix(bg_signal.clone(), snr, eq)
# class CrossTalk(BaseTransform):
# """Adds crosstalk between speakers, whose audio is drawn from a CSV file
# that was produced via :py:func:`audiotools.data.preprocess.create_csv`.
# This transform calls :py:func:`audiotools.core.effects.EffectMixin.mix`
# under the hood.
# Parameters
# ----------
# snr : tuple, optional
# How loud cross-talk speaker is relative to original signal in dB,
# by default ("uniform", 0.0, 10.0)
# sources : List[str], optional
# Sources containing folders, or CSVs with paths to audio files,
# by default None
# weights : List[float], optional
# Weights to sample audio files from each source, by default None
# name : str, optional
# Name of this transform, used to identify it in the dictionary
# produced by ``self.instantiate``, by default None
# prob : float, optional
# Probability of applying this transform, by default 1.0
# loudness_cutoff : float, optional
# Loudness cutoff when loading from audio files, by default -40
# """
# def __init__(
# self,
# snr: tuple = ("uniform", 0.0, 10.0),
# sources: List[str] = None,
# weights: List[float] = None,
# name: str = None,
# prob: float = 1.0,
# loudness_cutoff: float = -40,
# ):
# super().__init__(name=name, prob=prob)
# self.snr = snr
# self.loader = AudioLoader(sources, weights)
# self.loudness_cutoff = loudness_cutoff
# def _instantiate(self, state: RandomState, signal: AudioSignal):
# snr = util.sample_from_dist(self.snr, state)
# crosstalk_signal = self.loader(
# state,
# signal.sample_rate,
# duration=signal.signal_duration,
# loudness_cutoff=self.loudness_cutoff,
# num_channels=signal.num_channels,
# )["signal"]
# return {"crosstalk_signal": crosstalk_signal, "snr": snr}
# def _transform(self, signal, crosstalk_signal, snr):
# # Clone bg_signal so that transform can be repeatedly applied
# # to different signals with the same effect.
# loudness = signal.loudness()
# mix = signal.mix(crosstalk_signal.clone(), snr)
# mix.normalize(loudness)
# return mix
class RoomImpulseResponse(BaseTransform): class RoomImpulseResponse(BaseTransform):
"""Convolves signal with a room impulse response, at a specified """Convolves signal with a room impulse response, at a specified
direct-to-reverberant ratio, with equalization applied. Room impulse direct-to-reverberant ratio, with equalization applied. Room impulse
@ -942,38 +778,6 @@ class RoomImpulseResponse(BaseTransform):
use_original_phase=self.use_original_phase) use_original_phase=self.use_original_phase)
# class VolumeChange(BaseTransform):
# """Changes the volume of the input signal.
# Uses :py:func:`audiotools.core.effects.EffectMixin.volume_change`.
# Parameters
# ----------
# db : tuple, optional
# Change in volume in decibels, by default ("uniform", -12.0, 0.0)
# name : str, optional
# Name of this transform, used to identify it in the dictionary
# produced by ``self.instantiate``, by default None
# prob : float, optional
# Probability of applying this transform, by default 1.0
# """
# def __init__(
# self,
# db: tuple = ("uniform", -12.0, 0.0),
# name: str = None,
# prob: float = 1.0,
# ):
# super().__init__(name=name, prob=prob)
# self.db = db
# def _instantiate(self, state: RandomState):
# return {"db": util.sample_from_dist(self.db, state)}
# def _transform(self, signal, db):
# return signal.volume_change(db)
class VolumeNorm(BaseTransform): class VolumeNorm(BaseTransform):
"""Normalizes the volume of the excerpt to a specified decibel. """Normalizes the volume of the excerpt to a specified decibel.
@ -1169,111 +973,6 @@ class HighPass(BaseTransform):
return signal.high_pass(cutoff, zeros=self.zeros) return signal.high_pass(cutoff, zeros=self.zeros)
# class RescaleAudio(BaseTransform):
# """Rescales the audio so it is in between ``-val`` and ``val``
# only if the original audio exceeds those bounds. Useful if
# transforms have caused the audio to clip.
# Uses :py:func:`audiotools.core.effects.EffectMixin.ensure_max_of_audio`.
# Parameters
# ----------
# val : float, optional
# Max absolute value of signal, by default 1.0
# name : str, optional
# Name of this transform, used to identify it in the dictionary
# produced by ``self.instantiate``, by default None
# prob : float, optional
# Probability of applying this transform, by default 1.0
# """
# def __init__(self, val: float = 1.0, name: str = None, prob: float = 1):
# super().__init__(name=name, prob=prob)
# self.val = val
# def _transform(self, signal):
# return signal.ensure_max_of_audio(self.val)
# class ShiftPhase(SpectralTransform):
# """Shifts the phase of the audio.
# Uses :py:func:`audiotools.core.dsp.DSPMixin.shift)phase`.
# Parameters
# ----------
# shift : tuple, optional
# How much to shift phase by, by default ("uniform", -np.pi, np.pi)
# name : str, optional
# Name of this transform, used to identify it in the dictionary
# produced by ``self.instantiate``, by default None
# prob : float, optional
# Probability of applying this transform, by default 1.0
# """
# def __init__(
# self,
# shift: tuple = ("uniform", -np.pi, np.pi),
# name: str = None,
# prob: float = 1,
# ):
# super().__init__(name=name, prob=prob)
# self.shift = shift
# def _instantiate(self, state: RandomState):
# return {"shift": util.sample_from_dist(self.shift, state)}
# def _transform(self, signal, shift):
# return signal.shift_phase(shift)
# class InvertPhase(ShiftPhase):
# """Inverts the phase of the audio.
# Uses :py:func:`audiotools.core.dsp.DSPMixin.shift_phase`.
# Parameters
# ----------
# name : str, optional
# Name of this transform, used to identify it in the dictionary
# produced by ``self.instantiate``, by default None
# prob : float, optional
# Probability of applying this transform, by default 1.0
# """
# def __init__(self, name: str = None, prob: float = 1):
# super().__init__(shift=("const", np.pi), name=name, prob=prob)
# class CorruptPhase(SpectralTransform):
# """Corrupts the phase of the audio.
# Uses :py:func:`audiotools.core.dsp.DSPMixin.corrupt_phase`.
# Parameters
# ----------
# scale : tuple, optional
# How much to corrupt phase by, by default ("uniform", 0, np.pi)
# name : str, optional
# Name of this transform, used to identify it in the dictionary
# produced by ``self.instantiate``, by default None
# prob : float, optional
# Probability of applying this transform, by default 1.0
# """
# def __init__(
# self, scale: tuple = ("uniform", 0, np.pi), name: str = None, prob: float = 1
# ):
# super().__init__(name=name, prob=prob)
# self.scale = scale
# def _instantiate(self, state: RandomState, signal: AudioSignal = None):
# scale = util.sample_from_dist(self.scale, state)
# corruption = state.normal(scale=scale, size=signal.phase.shape[1:])
# return {"corruption": corruption.astype("float32")}
# def _transform(self, signal, corruption):
# return signal.shift_phase(shift=corruption)
class FrequencyMask(SpectralTransform): class FrequencyMask(SpectralTransform):
"""Masks a band of frequencies at a center frequency """Masks a band of frequencies at a center frequency
from the audio. from the audio.
@ -1363,39 +1062,6 @@ class TimeMask(SpectralTransform):
return signal.mask_timesteps(tmin_s=tmin_s, tmax_s=tmax_s) return signal.mask_timesteps(tmin_s=tmin_s, tmax_s=tmax_s)
# class MaskLowMagnitudes(SpectralTransform):
# """Masks low magnitude regions out of signal.
# Uses :py:func:`audiotools.core.dsp.DSPMixin.mask_low_magnitudes`.
# Parameters
# ----------
# db_cutoff : tuple, optional
# Decibel value for which things below it will be masked away,
# by default ("uniform", -10, 10)
# name : str, optional
# Name of this transform, used to identify it in the dictionary
# produced by ``self.instantiate``, by default None
# prob : float, optional
# Probability of applying this transform, by default 1.0
# """
# def __init__(
# self,
# db_cutoff: tuple = ("uniform", -10, 10),
# name: str = None,
# prob: float = 1,
# ):
# super().__init__(name=name, prob=prob)
# self.db_cutoff = db_cutoff
# def _instantiate(self, state: RandomState, signal: AudioSignal = None):
# return {"db_cutoff": util.sample_from_dist(self.db_cutoff, state)}
# def _transform(self, signal, db_cutoff: float):
# return signal.mask_low_magnitudes(db_cutoff)
class Smoothing(BaseTransform): class Smoothing(BaseTransform):
"""Convolves the signal with a smoothing window. """Convolves the signal with a smoothing window.
@ -1445,48 +1111,6 @@ class Smoothing(BaseTransform):
return out return out
# class TimeNoise(TimeMask):
# """Similar to :py:func:`audiotools.data.transforms.TimeMask`, but
# replaces with noise instead of zeros.
# Parameters
# ----------
# t_center : tuple, optional
# Center time in terms of 0.0 and 1.0 (duration of signal),
# by default ("uniform", 0.0, 1.0)
# t_width : tuple, optional
# Width of dropped out portion, by default ("const", 0.025)
# name : str, optional
# Name of this transform, used to identify it in the dictionary
# produced by ``self.instantiate``, by default None
# prob : float, optional
# Probability of applying this transform, by default 1.0
# """
# def __init__(
# self,
# t_center: tuple = ("uniform", 0.0, 1.0),
# t_width: tuple = ("const", 0.025),
# name: str = None,
# prob: float = 1,
# ):
# super().__init__(t_center=t_center, t_width=t_width, name=name, prob=prob)
# def _transform(self, signal, tmin_s: float, tmax_s: float):
# signal = signal.mask_timesteps(tmin_s=tmin_s, tmax_s=tmax_s, val=0.0)
# mag, phase = signal.magnitude, signal.phase
# mag_r, phase_r = torch.randn_like(mag), torch.randn_like(phase)
# mask = (mag == 0.0) * (phase == 0.0)
# mag[mask] = mag_r[mask]
# phase[mask] = phase_r[mask]
# signal.magnitude = mag
# signal.phase = phase
# return signal
class FrequencyNoise(FrequencyMask): class FrequencyNoise(FrequencyMask):
"""Similar to :py:func:`audiotools.data.transforms.FrequencyMask`, but """Similar to :py:func:`audiotools.data.transforms.FrequencyMask`, but
replaces with noise instead of zeros. replaces with noise instead of zeros.
@ -1530,59 +1154,3 @@ class FrequencyNoise(FrequencyMask):
signal.magnitude = mag signal.magnitude = mag
signal.phase = phase signal.phase = phase
return signal return signal
# class SpectralDenoising(Equalizer):
# """Applies denoising algorithm detailed in
# :py:func:`audiotools.ml.layers.spectral_gate.SpectralGate`,
# using a randomly generated noise signal for denoising.
# Parameters
# ----------
# eq_amount : tuple, optional
# Amount of eq to apply to noise signal, by default ("const", 1.0)
# denoise_amount : tuple, optional
# Amount to denoise by, by default ("uniform", 0.8, 1.0)
# nz_volume : float, optional
# Volume of noise to denoise with, by default -40
# n_bands : int, optional
# Number of bands in equalizer, by default 6
# n_freq : int, optional
# Number of frequency bins to smooth by, by default 3
# n_time : int, optional
# Number of time bins to smooth by, by default 5
# name : str, optional
# Name of this transform, used to identify it in the dictionary
# produced by ``self.instantiate``, by default None
# prob : float, optional
# Probability of applying this transform, by default 1.0
# """
# def __init__(
# self,
# eq_amount: tuple = ("const", 1.0),
# denoise_amount: tuple = ("uniform", 0.8, 1.0),
# nz_volume: float = -40,
# n_bands: int = 6,
# n_freq: int = 3,
# n_time: int = 5,
# name: str = None,
# prob: float = 1,
# ):
# super().__init__(eq_amount=eq_amount, n_bands=n_bands, name=name, prob=prob)
# self.nz_volume = nz_volume
# self.denoise_amount = denoise_amount
# self.spectral_gate = ml.layers.SpectralGate(n_freq, n_time)
# def _transform(self, signal, nz, eq, denoise_amount):
# nz = nz.normalize(self.nz_volume).equalizer(eq)
# self.spectral_gate = self.spectral_gate.to(signal.device)
# signal = self.spectral_gate(signal, nz, denoise_amount)
# return signal
# def _instantiate(self, state: RandomState):
# kwargs = super()._instantiate(state)
# kwargs["denoise_amount"] = util.sample_from_dist(self.denoise_amount, state)
# kwargs["nz"] = AudioSignal(state.randn(22050), 44100)
# return kwargs

@ -1,6 +1,4 @@
""" """
Functions for comparing AudioSignal objects to one another. Functions for comparing AudioSignal objects to one another.
""" """
# from . import distance
from . import quality from . import quality
# from . import spectral

@ -1,5 +1,3 @@
from . import decorators from . import decorators
from .accelerator import Accelerator from .accelerator import Accelerator
from .basemodel import BaseModel from .basemodel import BaseModel
# from . import layers
# from .experiment import Experiment

Loading…
Cancel
Save