rm some useless comment

pull/3900/head
drryanhuang 9 months ago
parent 1bfc9bc2b2
commit 3599089040

@ -245,177 +245,6 @@ def pure_tone(freq: float, sr: float=128, dur: float=4, device=None):
return paddle.cos(2 * math.pi * freq * time)
# def _new_rfft(x: paddle.Tensor):
# z = paddle.fft.rfft(x, axis=-1)
# z_real = paddle.real(z)
# z_imag = paddle.imag(z)
# z_view_as_real = paddle.stack([z_real, z_imag], axis=-1)
# return z_view_as_real
# def _new_irfft(x: paddle.Tensor, length: int):
# x_real = x[..., 0]
# x_imag = x[..., 1]
# x_view_as_complex = paddle.complex(x_real, x_imag)
# return paddle.fft.irfft(x_view_as_complex, n=length, axis=-1)
# def _compl_mul_conjugate(a: paddle.Tensor, b: paddle.Tensor):
# """
# Given a and b two tensors of dimension 4
# with the last dimension being the real and imaginary part,
# returns a multiplied by the conjugate of b, the multiplication
# being with respect to the second dimension.
# PaddlePaddle does not have direct support for complex number operations
# using einsum in the same manner as PyTorch, but we can manually compute
# the equivalent result.
# """
# # Extract the real and imaginary parts of a and b
# real_a = a[..., 0]
# imag_a = a[..., 1]
# real_b = b[..., 0]
# imag_b = b[..., 1]
# # Compute the multiplication with respect to the second dimension manually
# real_part = paddle.einsum("bcft,dct->bdft", real_a, real_b) + paddle.einsum(
# "bcft,dct->bdft", imag_a, imag_b)
# imag_part = paddle.einsum("bcft,dct->bdft", imag_a, real_b) - paddle.einsum(
# "bcft,dct->bdft", real_a, imag_b)
# # Stack the real and imaginary parts together
# result = paddle.stack([real_part, imag_part], axis=-1)
# return result
# def fft_conv1d(
# _input: paddle.Tensor,
# weight: paddle.Tensor,
# bias: Optional[paddle.Tensor]=None,
# stride: int=1,
# padding: int=0,
# block_ratio: float=5, ):
# """
# Same as `paddle.nn.functional.conv1d` but using FFT for the convolution.
# Please check PaddlePaddle documentation for more information.
# Args:
# _input (Tensor): _input signal of shape `[B, C, T]`.
# weight (Tensor): weight of the convolution `[D, C, K]` with `D` the number
# of output channels.
# bias (Tensor or None): if not None, bias term for the convolution.
# stride (int): stride of convolution.
# padding (int): padding to apply to the _input.
# block_ratio (float): can be tuned for speed. The _input is splitted in chunks
# with a size of `int(block_ratio * kernel_size)`.
# Shape:
# - Inputs: `_input` is `[B, C, T]`, `weight` is `[D, C, K]` and bias is `[D]`.
# - Output: `(*, T)`
# ..note::
# This function is faster than `paddle.nn.functional.conv1d` only in specific cases.
# Typically, the kernel size should be of the order of 256 to see any real gain,
# for a stride of 1.
# ..Warning::
# Dilation and groups are not supported at the moment. This function might use
# more memory than the default Conv1d implementation.
# """
# _input = F.pad(_input, (padding, padding), data_format="NCL")
# batch, channels, length = _input.shape
# out_channels, _, kernel_size = weight.shape
# if length < kernel_size:
# raise RuntimeError(
# f"Input should be at least as large as the kernel size {kernel_size}, "
# f"but it is only {length} samples long.")
# if block_ratio < 1:
# raise RuntimeError("Block ratio must be greater than 1.")
# block_size: int = min(int(kernel_size * block_ratio), length)
# fold_stride = block_size - kernel_size + 1
# weight = pad_to(weight, block_size)
# weight_z = _new_rfft(weight)
# # We pad the _input and get the different frames, on which
# frames = unfold(_input, block_size, fold_stride)
# frames_z = _new_rfft(frames)
# out_z = _compl_mul_conjugate(frames_z, weight_z)
# out = _new_irfft(out_z, block_size)
# # The last bit is invalid, because FFT will do a circular convolution.
# out = out[..., :-kernel_size + 1]
# out = out.reshape([batch, out_channels, -1])
# out = out[..., ::stride]
# target_length = (length - kernel_size) // stride + 1
# out = out[..., :target_length]
# if bias is not None:
# out += bias[:, None]
# return out
# class FFTConv1D(paddle.nn.Layer):
# """
# Same as `paddle.nn.Conv1D` but based on a custom FFT-based convolution.
# Please check PaddlePaddle documentation for more information on `paddle.nn.Conv1D`.
# Args:
# in_channels (int): number of _input channels.
# out_channels (int): number of output channels.
# kernel_size (int): kernel size of convolution.
# stride (int): stride of convolution.
# padding (int): padding to apply to the _input.
# bias (bool): if True, use a bias term.
# ..note::
# This module is faster than `paddle.nn.Conv1D` only in specific cases.
# Typically, `kernel_size` should be of the order of 256 to see any real gain,
# for a stride of 1.
# ..warning::
# Dilation and groups are not supported at the moment. This module might use
# more memory than the default Conv1D implementation.
# >>> fftconv = FFTConv1D(12, 24, 128, 4)
# >>> x = paddle.randn([4, 12, 1024])
# >>> print(list(fftconv(x).shape))
# [4, 24, 225]
# """
# def __init__(
# self,
# in_channels: int,
# out_channels: int,
# kernel_size: int,
# stride: int=1,
# padding: int=0,
# bias: bool=True, ):
# super(FFTConv1D, self).__init__()
# self.in_channels = in_channels
# self.out_channels = out_channels
# self.kernel_size = kernel_size
# self.stride = stride
# self.padding = padding
# # Create a Conv1D layer to initialize weights and bias
# conv = paddle.nn.Conv1D(
# in_channels,
# out_channels,
# kernel_size,
# stride=stride,
# padding=padding,
# bias_attr=bias)
# self.weight = conv.weight
# if bias:
# self.bias = conv.bias
# else:
# self.bias = None
# def forward(self, _input: paddle.Tensor):
# return fft_conv1d(_input, self.weight, self.bias, self.stride,
# self.padding)
class LowPassFilters(nn.Layer):
"""
Bank of low pass filters.

@ -94,13 +94,10 @@ STFTParams.__new__.__defaults__ = (None, None, None, None, None)
class AudioSignal(
EffectMixin,
LoudnessMixin,
# PlayMixin,
ImpulseResponseMixin,
DSPMixin,
DisplayMixin,
FFMPEGMixin,
# WhisperMixin,
):
FFMPEGMixin, ):
"""This is the core object of this library. Audio is always
loaded into an AudioSignal, which then enables all the features
of this library, including audio augmentations, I/O, playback,

@ -6,8 +6,6 @@ import paddle
from . import util
from ._julius import SplitBands
# from . import _julius
class EffectMixin:
GAIN_FACTOR = np.log(10) / 20
@ -253,152 +251,6 @@ class EffectMixin:
self.audio_data = self.audio_data * gain[:, None, None]
return self
# def _to_2d(self):
# waveform = self.audio_data.reshape(-1, self.signal_length)
# return waveform
# def _to_3d(self, waveform):
# return waveform.reshape(self.batch_size, self.num_channels, -1)
# def pitch_shift(self, n_semitones: int, quick: bool = True):
# """Pitch shift the signal. All items in the batch
# get the same pitch shift.
# Parameters
# ----------
# n_semitones : int
# How many semitones to shift the signal by.
# quick : bool, optional
# Using quick pitch shifting, by default True
# Returns
# -------
# AudioSignal
# Pitch shifted audio signal.
# """
# device = self.device
# effects = [
# ["pitch", str(n_semitones * 100)],
# ["rate", str(self.sample_rate)],
# ]
# if quick:
# effects[0].insert(1, "-q")
# waveform = self._to_2d().cpu()
# waveform, sample_rate = torchaudio.sox_effects.apply_effects_tensor(
# waveform, self.sample_rate, effects, channels_first=True
# )
# self.sample_rate = sample_rate
# self.audio_data = self._to_3d(waveform)
# return self.to(device)
# def time_stretch(self, factor: float, quick: bool = True):
# """Time stretch the audio signal.
# Parameters
# ----------
# factor : float
# Factor by which to stretch the AudioSignal. Typically
# between 0.8 and 1.2.
# quick : bool, optional
# Whether to use quick time stretching, by default True
# Returns
# -------
# AudioSignal
# Time-stretched AudioSignal.
# """
# device = self.device
# effects = [
# ["tempo", str(factor)],
# ["rate", str(self.sample_rate)],
# ]
# if quick:
# effects[0].insert(1, "-q")
# waveform = self._to_2d().cpu()
# waveform, sample_rate = torchaudio.sox_effects.apply_effects_tensor(
# waveform, self.sample_rate, effects, channels_first=True
# )
# self.sample_rate = sample_rate
# self.audio_data = self._to_3d(waveform)
# return self.to(device)
# def apply_codec(
# self,
# preset: str = None,
# format: str = "wav",
# encoding: str = None,
# bits_per_sample: int = None,
# compression: int = None,
# ): # pragma: no cover
# """Applies an audio codec to the signal.
# Parameters
# ----------
# preset : str, optional
# One of the keys in ``self.CODEC_PRESETS``, by default None
# format : str, optional
# Format for audio codec, by default "wav"
# encoding : str, optional
# Encoding to use, by default None
# bits_per_sample : int, optional
# How many bits per sample, by default None
# compression : int, optional
# Compression amount of codec, by default None
# Returns
# -------
# AudioSignal
# AudioSignal with codec applied.
# Raises
# ------
# ValueError
# If preset is not in ``self.CODEC_PRESETS``, an error
# is thrown.
# """
# torchaudio_version_070 = "0.7" in torchaudio.__version__
# if torchaudio_version_070:
# return self
# kwargs = {
# "format": format,
# "encoding": encoding,
# "bits_per_sample": bits_per_sample,
# "compression": compression,
# }
# if preset is not None:
# if preset in self.CODEC_PRESETS:
# kwargs = self.CODEC_PRESETS[preset]
# else:
# raise ValueError(
# f"Unknown preset: {preset}. "
# f"Known presets: {list(self.CODEC_PRESETS.keys())}"
# )
# waveform = self._to_2d()
# if kwargs["format"] in ["vorbis", "mp3", "ogg", "amr-nb"]:
# # Apply it in a for loop
# augmented = torch.cat(
# [
# torchaudio.functional.apply_codec(
# waveform[i][None, :], self.sample_rate, **kwargs
# )
# for i in range(waveform.shape[0])
# ],
# dim=0,
# )
# else:
# augmented = torchaudio.functional.apply_codec(
# waveform, self.sample_rate, **kwargs
# )
# augmented = self._to_3d(augmented)
# self.audio_data = augmented
# return self
def mel_filterbank(self, n_bands: int):
"""Breaks signal into mel bands.

@ -478,21 +478,6 @@ class ConcatDataset(AudioDataset):
return dataset[idx // len(self.datasets)]
# class ResumableDistributedSampler(DistributedSampler): # pragma: no cover
# """Distributed sampler that can be resumed from a given start index."""
# def __init__(self, dataset, start_idx: int = None, **kwargs):
# super().__init__(dataset, **kwargs)
# # Start index, allows to resume an experiment at the index it was
# self.start_idx = start_idx // self.num_replicas if start_idx is not None else 0
# def __iter__(self):
# for i, idx in enumerate(super().__iter__()):
# if i >= self.start_idx:
# yield idx
# self.start_idx = 0 # set the index back to 0 so for the next epoch
class ResumableDistributedSampler(DistributedBatchSampler): # pragma: no cover
"""Distributed sampler that can be resumed from a given start index."""

@ -608,108 +608,6 @@ class Equalizer(BaseTransform):
return signal.equalizer(eq)
# class Quantization(BaseTransform):
# """Applies quantization to the input waveform. Corresponds
# to :py:func:`audiotools.core.effects.EffectMixin.quantization`.
# Parameters
# ----------
# channels : tuple, optional
# Number of evenly spaced quantization channels to quantize
# to, by default ("choice", [8, 32, 128, 256, 1024])
# name : str, optional
# Name of this transform, used to identify it in the dictionary
# produced by ``self.instantiate``, by default None
# prob : float, optional
# Probability of applying this transform, by default 1.0
# """
# def __init__(
# self,
# channels: tuple = ("choice", [8, 32, 128, 256, 1024]),
# name: str = None,
# prob: float = 1.0,
# ):
# super().__init__(name=name, prob=prob)
# self.channels = channels
# def _instantiate(self, state: RandomState):
# return {"channels": util.sample_from_dist(self.channels, state)}
# def _transform(self, signal, channels):
# return signal.quantization(channels)
# class MuLawQuantization(BaseTransform):
# """Applies mu-law quantization to the input waveform. Corresponds
# to :py:func:`audiotools.core.effects.EffectMixin.mulaw_quantization`.
# Parameters
# ----------
# channels : tuple, optional
# Number of mu-law spaced quantization channels to quantize
# to, by default ("choice", [8, 32, 128, 256, 1024])
# name : str, optional
# Name of this transform, used to identify it in the dictionary
# produced by ``self.instantiate``, by default None
# prob : float, optional
# Probability of applying this transform, by default 1.0
# """
# def __init__(
# self,
# channels: tuple = ("choice", [8, 32, 128, 256, 1024]),
# name: str = None,
# prob: float = 1.0,
# ):
# super().__init__(name=name, prob=prob)
# self.channels = channels
# def _instantiate(self, state: RandomState):
# return {"channels": util.sample_from_dist(self.channels, state)}
# def _transform(self, signal, channels):
# return signal.mulaw_quantization(channels)
# class NoiseFloor(BaseTransform):
# """Adds a noise floor of Gaussian noise to the signal at a specified
# dB.
# Parameters
# ----------
# db : tuple, optional
# Level of noise to add to signal, by default ("const", -50.0)
# name : str, optional
# Name of this transform, used to identify it in the dictionary
# produced by ``self.instantiate``, by default None
# prob : float, optional
# Probability of applying this transform, by default 1.0
# """
# def __init__(
# self,
# db: tuple = ("const", -50.0),
# name: str = None,
# prob: float = 1.0,
# ):
# super().__init__(name=name, prob=prob)
# self.db = db
# def _instantiate(self, state: RandomState, signal: AudioSignal):
# db = util.sample_from_dist(self.db, state)
# audio_data = state.randn(signal.num_channels, signal.signal_length)
# nz_signal = AudioSignal(audio_data, signal.sample_rate)
# nz_signal.normalize(db)
# return {"nz_signal": nz_signal}
# def _transform(self, signal, nz_signal):
# # Clone bg_signal so that transform can be repeatedly applied
# # to different signals with the same effect.
# return signal + nz_signal
class BackgroundNoise(BaseTransform):
"""Adds background noise from audio specified by a set of CSV files.
A valid CSV file looks like, and is typically generated by
@ -796,68 +694,6 @@ class BackgroundNoise(BaseTransform):
return signal.mix(bg_signal.clone(), snr, eq)
# class CrossTalk(BaseTransform):
# """Adds crosstalk between speakers, whose audio is drawn from a CSV file
# that was produced via :py:func:`audiotools.data.preprocess.create_csv`.
# This transform calls :py:func:`audiotools.core.effects.EffectMixin.mix`
# under the hood.
# Parameters
# ----------
# snr : tuple, optional
# How loud cross-talk speaker is relative to original signal in dB,
# by default ("uniform", 0.0, 10.0)
# sources : List[str], optional
# Sources containing folders, or CSVs with paths to audio files,
# by default None
# weights : List[float], optional
# Weights to sample audio files from each source, by default None
# name : str, optional
# Name of this transform, used to identify it in the dictionary
# produced by ``self.instantiate``, by default None
# prob : float, optional
# Probability of applying this transform, by default 1.0
# loudness_cutoff : float, optional
# Loudness cutoff when loading from audio files, by default -40
# """
# def __init__(
# self,
# snr: tuple = ("uniform", 0.0, 10.0),
# sources: List[str] = None,
# weights: List[float] = None,
# name: str = None,
# prob: float = 1.0,
# loudness_cutoff: float = -40,
# ):
# super().__init__(name=name, prob=prob)
# self.snr = snr
# self.loader = AudioLoader(sources, weights)
# self.loudness_cutoff = loudness_cutoff
# def _instantiate(self, state: RandomState, signal: AudioSignal):
# snr = util.sample_from_dist(self.snr, state)
# crosstalk_signal = self.loader(
# state,
# signal.sample_rate,
# duration=signal.signal_duration,
# loudness_cutoff=self.loudness_cutoff,
# num_channels=signal.num_channels,
# )["signal"]
# return {"crosstalk_signal": crosstalk_signal, "snr": snr}
# def _transform(self, signal, crosstalk_signal, snr):
# # Clone bg_signal so that transform can be repeatedly applied
# # to different signals with the same effect.
# loudness = signal.loudness()
# mix = signal.mix(crosstalk_signal.clone(), snr)
# mix.normalize(loudness)
# return mix
class RoomImpulseResponse(BaseTransform):
"""Convolves signal with a room impulse response, at a specified
direct-to-reverberant ratio, with equalization applied. Room impulse
@ -942,38 +778,6 @@ class RoomImpulseResponse(BaseTransform):
use_original_phase=self.use_original_phase)
# class VolumeChange(BaseTransform):
# """Changes the volume of the input signal.
# Uses :py:func:`audiotools.core.effects.EffectMixin.volume_change`.
# Parameters
# ----------
# db : tuple, optional
# Change in volume in decibels, by default ("uniform", -12.0, 0.0)
# name : str, optional
# Name of this transform, used to identify it in the dictionary
# produced by ``self.instantiate``, by default None
# prob : float, optional
# Probability of applying this transform, by default 1.0
# """
# def __init__(
# self,
# db: tuple = ("uniform", -12.0, 0.0),
# name: str = None,
# prob: float = 1.0,
# ):
# super().__init__(name=name, prob=prob)
# self.db = db
# def _instantiate(self, state: RandomState):
# return {"db": util.sample_from_dist(self.db, state)}
# def _transform(self, signal, db):
# return signal.volume_change(db)
class VolumeNorm(BaseTransform):
"""Normalizes the volume of the excerpt to a specified decibel.
@ -1169,111 +973,6 @@ class HighPass(BaseTransform):
return signal.high_pass(cutoff, zeros=self.zeros)
# class RescaleAudio(BaseTransform):
# """Rescales the audio so it is in between ``-val`` and ``val``
# only if the original audio exceeds those bounds. Useful if
# transforms have caused the audio to clip.
# Uses :py:func:`audiotools.core.effects.EffectMixin.ensure_max_of_audio`.
# Parameters
# ----------
# val : float, optional
# Max absolute value of signal, by default 1.0
# name : str, optional
# Name of this transform, used to identify it in the dictionary
# produced by ``self.instantiate``, by default None
# prob : float, optional
# Probability of applying this transform, by default 1.0
# """
# def __init__(self, val: float = 1.0, name: str = None, prob: float = 1):
# super().__init__(name=name, prob=prob)
# self.val = val
# def _transform(self, signal):
# return signal.ensure_max_of_audio(self.val)
# class ShiftPhase(SpectralTransform):
# """Shifts the phase of the audio.
# Uses :py:func:`audiotools.core.dsp.DSPMixin.shift)phase`.
# Parameters
# ----------
# shift : tuple, optional
# How much to shift phase by, by default ("uniform", -np.pi, np.pi)
# name : str, optional
# Name of this transform, used to identify it in the dictionary
# produced by ``self.instantiate``, by default None
# prob : float, optional
# Probability of applying this transform, by default 1.0
# """
# def __init__(
# self,
# shift: tuple = ("uniform", -np.pi, np.pi),
# name: str = None,
# prob: float = 1,
# ):
# super().__init__(name=name, prob=prob)
# self.shift = shift
# def _instantiate(self, state: RandomState):
# return {"shift": util.sample_from_dist(self.shift, state)}
# def _transform(self, signal, shift):
# return signal.shift_phase(shift)
# class InvertPhase(ShiftPhase):
# """Inverts the phase of the audio.
# Uses :py:func:`audiotools.core.dsp.DSPMixin.shift_phase`.
# Parameters
# ----------
# name : str, optional
# Name of this transform, used to identify it in the dictionary
# produced by ``self.instantiate``, by default None
# prob : float, optional
# Probability of applying this transform, by default 1.0
# """
# def __init__(self, name: str = None, prob: float = 1):
# super().__init__(shift=("const", np.pi), name=name, prob=prob)
# class CorruptPhase(SpectralTransform):
# """Corrupts the phase of the audio.
# Uses :py:func:`audiotools.core.dsp.DSPMixin.corrupt_phase`.
# Parameters
# ----------
# scale : tuple, optional
# How much to corrupt phase by, by default ("uniform", 0, np.pi)
# name : str, optional
# Name of this transform, used to identify it in the dictionary
# produced by ``self.instantiate``, by default None
# prob : float, optional
# Probability of applying this transform, by default 1.0
# """
# def __init__(
# self, scale: tuple = ("uniform", 0, np.pi), name: str = None, prob: float = 1
# ):
# super().__init__(name=name, prob=prob)
# self.scale = scale
# def _instantiate(self, state: RandomState, signal: AudioSignal = None):
# scale = util.sample_from_dist(self.scale, state)
# corruption = state.normal(scale=scale, size=signal.phase.shape[1:])
# return {"corruption": corruption.astype("float32")}
# def _transform(self, signal, corruption):
# return signal.shift_phase(shift=corruption)
class FrequencyMask(SpectralTransform):
"""Masks a band of frequencies at a center frequency
from the audio.
@ -1363,39 +1062,6 @@ class TimeMask(SpectralTransform):
return signal.mask_timesteps(tmin_s=tmin_s, tmax_s=tmax_s)
# class MaskLowMagnitudes(SpectralTransform):
# """Masks low magnitude regions out of signal.
# Uses :py:func:`audiotools.core.dsp.DSPMixin.mask_low_magnitudes`.
# Parameters
# ----------
# db_cutoff : tuple, optional
# Decibel value for which things below it will be masked away,
# by default ("uniform", -10, 10)
# name : str, optional
# Name of this transform, used to identify it in the dictionary
# produced by ``self.instantiate``, by default None
# prob : float, optional
# Probability of applying this transform, by default 1.0
# """
# def __init__(
# self,
# db_cutoff: tuple = ("uniform", -10, 10),
# name: str = None,
# prob: float = 1,
# ):
# super().__init__(name=name, prob=prob)
# self.db_cutoff = db_cutoff
# def _instantiate(self, state: RandomState, signal: AudioSignal = None):
# return {"db_cutoff": util.sample_from_dist(self.db_cutoff, state)}
# def _transform(self, signal, db_cutoff: float):
# return signal.mask_low_magnitudes(db_cutoff)
class Smoothing(BaseTransform):
"""Convolves the signal with a smoothing window.
@ -1445,48 +1111,6 @@ class Smoothing(BaseTransform):
return out
# class TimeNoise(TimeMask):
# """Similar to :py:func:`audiotools.data.transforms.TimeMask`, but
# replaces with noise instead of zeros.
# Parameters
# ----------
# t_center : tuple, optional
# Center time in terms of 0.0 and 1.0 (duration of signal),
# by default ("uniform", 0.0, 1.0)
# t_width : tuple, optional
# Width of dropped out portion, by default ("const", 0.025)
# name : str, optional
# Name of this transform, used to identify it in the dictionary
# produced by ``self.instantiate``, by default None
# prob : float, optional
# Probability of applying this transform, by default 1.0
# """
# def __init__(
# self,
# t_center: tuple = ("uniform", 0.0, 1.0),
# t_width: tuple = ("const", 0.025),
# name: str = None,
# prob: float = 1,
# ):
# super().__init__(t_center=t_center, t_width=t_width, name=name, prob=prob)
# def _transform(self, signal, tmin_s: float, tmax_s: float):
# signal = signal.mask_timesteps(tmin_s=tmin_s, tmax_s=tmax_s, val=0.0)
# mag, phase = signal.magnitude, signal.phase
# mag_r, phase_r = torch.randn_like(mag), torch.randn_like(phase)
# mask = (mag == 0.0) * (phase == 0.0)
# mag[mask] = mag_r[mask]
# phase[mask] = phase_r[mask]
# signal.magnitude = mag
# signal.phase = phase
# return signal
class FrequencyNoise(FrequencyMask):
"""Similar to :py:func:`audiotools.data.transforms.FrequencyMask`, but
replaces with noise instead of zeros.
@ -1530,59 +1154,3 @@ class FrequencyNoise(FrequencyMask):
signal.magnitude = mag
signal.phase = phase
return signal
# class SpectralDenoising(Equalizer):
# """Applies denoising algorithm detailed in
# :py:func:`audiotools.ml.layers.spectral_gate.SpectralGate`,
# using a randomly generated noise signal for denoising.
# Parameters
# ----------
# eq_amount : tuple, optional
# Amount of eq to apply to noise signal, by default ("const", 1.0)
# denoise_amount : tuple, optional
# Amount to denoise by, by default ("uniform", 0.8, 1.0)
# nz_volume : float, optional
# Volume of noise to denoise with, by default -40
# n_bands : int, optional
# Number of bands in equalizer, by default 6
# n_freq : int, optional
# Number of frequency bins to smooth by, by default 3
# n_time : int, optional
# Number of time bins to smooth by, by default 5
# name : str, optional
# Name of this transform, used to identify it in the dictionary
# produced by ``self.instantiate``, by default None
# prob : float, optional
# Probability of applying this transform, by default 1.0
# """
# def __init__(
# self,
# eq_amount: tuple = ("const", 1.0),
# denoise_amount: tuple = ("uniform", 0.8, 1.0),
# nz_volume: float = -40,
# n_bands: int = 6,
# n_freq: int = 3,
# n_time: int = 5,
# name: str = None,
# prob: float = 1,
# ):
# super().__init__(eq_amount=eq_amount, n_bands=n_bands, name=name, prob=prob)
# self.nz_volume = nz_volume
# self.denoise_amount = denoise_amount
# self.spectral_gate = ml.layers.SpectralGate(n_freq, n_time)
# def _transform(self, signal, nz, eq, denoise_amount):
# nz = nz.normalize(self.nz_volume).equalizer(eq)
# self.spectral_gate = self.spectral_gate.to(signal.device)
# signal = self.spectral_gate(signal, nz, denoise_amount)
# return signal
# def _instantiate(self, state: RandomState):
# kwargs = super()._instantiate(state)
# kwargs["denoise_amount"] = util.sample_from_dist(self.denoise_amount, state)
# kwargs["nz"] = AudioSignal(state.randn(22050), 44100)
# return kwargs

@ -1,6 +1,4 @@
"""
Functions for comparing AudioSignal objects to one another.
"""
# from . import distance
from . import quality
# from . import spectral

@ -1,5 +1,3 @@
from . import decorators
from .accelerator import Accelerator
from .basemodel import BaseModel
# from . import layers
# from .experiment import Experiment

Loading…
Cancel
Save