Adapt to paddle3.0 && update readme

pull/3900/head
drryanhuang 8 months ago
parent ff8e4bb4fe
commit e16999b3c3

@ -2,12 +2,67 @@ Audiotools is a comprehensive toolkit designed for audio processing and analysis
### Directory Structure ### Directory Structure
- **core directory**: Contains the core class AudioSignal, which is responsible for the fundamental representation and manipulation of audio signals. ```
.
├── audiotools
│ ├── README.md
│ ├── __init__.py
│ ├── core
│ │ ├── __init__.py
│ │ ├── _julius.py
│ │ ├── audio_signal.py
│ │ ├── display.py
│ │ ├── dsp.py
│ │ ├── effects.py
│ │ ├── ffmpeg.py
│ │ ├── loudness.py
│ │ └── util.py
│ ├── data
│ │ ├── __init__.py
│ │ ├── datasets.py
│ │ ├── preprocess.py
│ │ └── transforms.py
│ ├── metrics
│ │ ├── __init__.py
│ │ └── quality.py
│ ├── ml
│ │ ├── __init__.py
│ │ ├── accelerator.py
│ │ ├── basemodel.py
│ │ └── decorators.py
│ ├── requirements.txt
│ └── post.py
├── tests
│ └── audiotools
│ ├── core
│ │ ├── test_audio_signal.py
│ │ ├── test_bands.py
│ │ ├── test_display.py
│ │ ├── test_dsp.py
│ │ ├── test_effects.py
│ │ ├── test_fftconv.py
│ │ ├── test_grad.py
│ │ ├── test_highpass.py
│ │ ├── test_loudness.py
│ │ ├── test_lowpass.py
│ │ └── test_util.py
│ ├── data
│ │ ├── test_datasets.py
│ │ ├── test_preprocess.py
│ │ └── test_transforms.py
│ ├── ml
│ │ ├── test_decorators.py
│ │ └── test_model.py
│ └── test_post.py
- **data directory**: Primarily dedicated to storing and processing datasets, including classes and functions for data preprocessing, ensuring efficient loading and transformation of audio data. ```
- **metrics directory**: Implements functions for various audio evaluation metrics, enabling precise assessment of the performance of audio models and processing algorithms. - **core**: Contains the core class AudioSignal, which is responsible for the fundamental representation and manipulation of audio signals.
- **ml directory**: Comprises classes and methods related to model training, supporting the construction, training, and optimization of machine learning models in the context of audio. - **data**: Primarily dedicated to storing and processing datasets, including classes and functions for data preprocessing, ensuring efficient loading and transformation of audio data.
- **metrics**: Implements functions for various audio evaluation metrics, enabling precise assessment of the performance of audio models and processing algorithms.
- **ml**: Comprises classes and methods related to model training, supporting the construction, training, and optimization of machine learning models in the context of audio.
This project aims to provide developers and researchers with an efficient and flexible framework to foster innovation and exploration across various domains of audio technology. This project aims to provide developers and researchers with an efficient and flexible framework to foster innovation and exploration across various domains of audio technology.

@ -349,6 +349,9 @@ class DSPMixin:
nbins, ) nbins, )
bins_hz = bins_hz[None, None, :, None].tile( bins_hz = bins_hz[None, None, :, None].tile(
[self.batch_size, 1, 1, mag.shape[-1]]) [self.batch_size, 1, 1, mag.shape[-1]])
fmin_hz, fmax_hz = fmin_hz.astype(bins_hz.dtype), fmax_hz.astype(
bins_hz.dtype)
mask = (fmin_hz <= bins_hz) & (bins_hz < fmax_hz) mask = (fmin_hz <= bins_hz) & (bins_hz < fmax_hz)
mag = paddle.where(mask, paddle.full_like(mag, val), mag) mag = paddle.where(mask, paddle.full_like(mag, val), mag)
@ -429,6 +432,7 @@ class DSPMixin:
log_mag = self.log_magnitude() log_mag = self.log_magnitude()
db_cutoff = util.ensure_tensor(db_cutoff, ndim=mag.ndim) db_cutoff = util.ensure_tensor(db_cutoff, ndim=mag.ndim)
db_cutoff = db_cutoff.astype(log_mag.dtype)
mask = log_mag < db_cutoff mask = log_mag < db_cutoff
# mag = mag.masked_fill(mask, val) # mag = mag.masked_fill(mask, val)
mag = paddle.where(mask, mag, val * paddle.ones_like(mag)) mag = paddle.where(mask, mag, val * paddle.ones_like(mag))
@ -452,6 +456,7 @@ class DSPMixin:
masked audio data. masked audio data.
""" """
shift = util.ensure_tensor(shift, ndim=self.phase.ndim) shift = util.ensure_tensor(shift, ndim=self.phase.ndim)
shift = shift.astype(self.phase.dtype)
self.phase = self.phase + shift self.phase = self.phase + shift
return self return self

@ -266,7 +266,7 @@ class EffectMixin:
""" """
db = util.ensure_tensor(db) db = util.ensure_tensor(db)
ref_db = self.loudness() ref_db = self.loudness()
gain = db - ref_db gain = db.astype(ref_db.dtype) - ref_db
gain = util.exp_compat(gain * self.GAIN_FACTOR) gain = util.exp_compat(gain * self.GAIN_FACTOR)
self.audio_data = self.audio_data * gain[:, None, None] self.audio_data = self.audio_data * gain[:, None, None]
@ -388,6 +388,7 @@ class EffectMixin:
quantization_channels, ndim=3) quantization_channels, ndim=3)
x = self.audio_data x = self.audio_data
quantization_channels = quantization_channels.astype(x.dtype)
x = (x + 1) / 2 x = (x + 1) / 2
x = x * quantization_channels x = x * quantization_channels
x = x.floor() x = x.floor()
@ -424,7 +425,7 @@ class EffectMixin:
x = ((x + 1) / 2 * mu + 0.5).astype("int64") x = ((x + 1) / 2 * mu + 0.5).astype("int64")
# unquantize # unquantize
x = (x / mu) * 2 - 1.0 x = (x.astype(mu.dtype) / mu) * 2 - 1.0
x = paddle.sign(x) * ( x = paddle.sign(x) * (
util.exp_compat(paddle.abs(x) * paddle.log1p(mu)) - 1.0) / mu util.exp_compat(paddle.abs(x) * paddle.log1p(mu)) - 1.0) / mu

@ -317,7 +317,7 @@ class Meter(paddle.nn.Layer):
z_avg_gated = z z_avg_gated = z
z_avg_gated[l <= Gamma_a] = 0 z_avg_gated[l <= Gamma_a] = 0
masked = l > Gamma_a masked = l > Gamma_a
z_avg_gated = z_avg_gated.sum(2) / masked.sum(2) z_avg_gated = z_avg_gated.sum(2) / masked.sum(2).astype("float32")
# calculate the relative threshold value (see eq. 6) # calculate the relative threshold value (see eq. 6)
Gamma_r = -0.691 + 10.0 * paddle.log10( Gamma_r = -0.691 + 10.0 * paddle.log10(

@ -338,7 +338,7 @@ def _close_temp_files(tmpfiles: list):
_close() _close()
AUDIO_EXTENSIONS = [".wav", ".flac", ".mp3", ".mp4"] AUDIO_EXTENSIONS = [".wav", ".flac", ".mp3"]
def find_audio(folder: str, ext: List[str]=AUDIO_EXTENSIONS): def find_audio(folder: str, ext: List[str]=AUDIO_EXTENSIONS):
@ -869,7 +869,7 @@ def hz_to_bin(hz: paddle.Tensor, n_fft: int, sample_rate: int):
shape = hz.shape shape = hz.shape
hz = hz.reshape([-1]) hz = hz.reshape([-1])
freqs = paddle.linspace(0, sample_rate / 2, 2 + n_fft // 2) freqs = paddle.linspace(0, sample_rate / 2, 2 + n_fft // 2)
hz = paddle.clip(hz, max=sample_rate / 2) hz = paddle.clip(hz, max=sample_rate / 2).astype(freqs.dtype)
closest = (hz[None, :] - freqs[:, None]).abs() closest = (hz[None, :] - freqs[:, None]).abs()
closest_bins = closest.argmin(axis=0) closest_bins = closest.argmin(axis=0)

@ -88,7 +88,7 @@ def test_seed():
def test_hz_to_bin(): def test_hz_to_bin():
hz = paddle.to_tensor(np.array([100, 200, 300])) hz = paddle.to_tensor(np.array([100, 200, 300]), dtype="float32")
sr = 1000 sr = 1000
n_fft = 2048 n_fft = 2048

Loading…
Cancel
Save