Adapt to paddle3.0 && update readme

pull/3900/head
drryanhuang 8 months ago
parent ff8e4bb4fe
commit e16999b3c3

@ -2,12 +2,67 @@ Audiotools is a comprehensive toolkit designed for audio processing and analysis
### Directory Structure
- **core directory**: Contains the core class AudioSignal, which is responsible for the fundamental representation and manipulation of audio signals.
```
.
├── audiotools
│ ├── README.md
│ ├── __init__.py
│ ├── core
│ │ ├── __init__.py
│ │ ├── _julius.py
│ │ ├── audio_signal.py
│ │ ├── display.py
│ │ ├── dsp.py
│ │ ├── effects.py
│ │ ├── ffmpeg.py
│ │ ├── loudness.py
│ │ └── util.py
│ ├── data
│ │ ├── __init__.py
│ │ ├── datasets.py
│ │ ├── preprocess.py
│ │ └── transforms.py
│ ├── metrics
│ │ ├── __init__.py
│ │ └── quality.py
│ ├── ml
│ │ ├── __init__.py
│ │ ├── accelerator.py
│ │ ├── basemodel.py
│ │ └── decorators.py
│ ├── requirements.txt
│ └── post.py
├── tests
│ └── audiotools
│ ├── core
│ │ ├── test_audio_signal.py
│ │ ├── test_bands.py
│ │ ├── test_display.py
│ │ ├── test_dsp.py
│ │ ├── test_effects.py
│ │ ├── test_fftconv.py
│ │ ├── test_grad.py
│ │ ├── test_highpass.py
│ │ ├── test_loudness.py
│ │ ├── test_lowpass.py
│ │ └── test_util.py
│ ├── data
│ │ ├── test_datasets.py
│ │ ├── test_preprocess.py
│ │ └── test_transforms.py
│ ├── ml
│ │ ├── test_decorators.py
│ │ └── test_model.py
│ └── test_post.py
- **data directory**: Primarily dedicated to storing and processing datasets, including classes and functions for data preprocessing, ensuring efficient loading and transformation of audio data.
```
- **metrics directory**: Implements functions for various audio evaluation metrics, enabling precise assessment of the performance of audio models and processing algorithms.
- **core**: Contains the core class AudioSignal, which is responsible for the fundamental representation and manipulation of audio signals.
- **ml directory**: Comprises classes and methods related to model training, supporting the construction, training, and optimization of machine learning models in the context of audio.
- **data**: Primarily dedicated to storing and processing datasets, including classes and functions for data preprocessing, ensuring efficient loading and transformation of audio data.
- **metrics**: Implements functions for various audio evaluation metrics, enabling precise assessment of the performance of audio models and processing algorithms.
- **ml**: Comprises classes and methods related to model training, supporting the construction, training, and optimization of machine learning models in the context of audio.
This project aims to provide developers and researchers with an efficient and flexible framework to foster innovation and exploration across various domains of audio technology.

@ -349,6 +349,9 @@ class DSPMixin:
nbins, )
bins_hz = bins_hz[None, None, :, None].tile(
[self.batch_size, 1, 1, mag.shape[-1]])
fmin_hz, fmax_hz = fmin_hz.astype(bins_hz.dtype), fmax_hz.astype(
bins_hz.dtype)
mask = (fmin_hz <= bins_hz) & (bins_hz < fmax_hz)
mag = paddle.where(mask, paddle.full_like(mag, val), mag)
@ -429,6 +432,7 @@ class DSPMixin:
log_mag = self.log_magnitude()
db_cutoff = util.ensure_tensor(db_cutoff, ndim=mag.ndim)
db_cutoff = db_cutoff.astype(log_mag.dtype)
mask = log_mag < db_cutoff
# mag = mag.masked_fill(mask, val)
mag = paddle.where(mask, mag, val * paddle.ones_like(mag))
@ -452,6 +456,7 @@ class DSPMixin:
masked audio data.
"""
shift = util.ensure_tensor(shift, ndim=self.phase.ndim)
shift = shift.astype(self.phase.dtype)
self.phase = self.phase + shift
return self

@ -266,7 +266,7 @@ class EffectMixin:
"""
db = util.ensure_tensor(db)
ref_db = self.loudness()
gain = db - ref_db
gain = db.astype(ref_db.dtype) - ref_db
gain = util.exp_compat(gain * self.GAIN_FACTOR)
self.audio_data = self.audio_data * gain[:, None, None]
@ -388,6 +388,7 @@ class EffectMixin:
quantization_channels, ndim=3)
x = self.audio_data
quantization_channels = quantization_channels.astype(x.dtype)
x = (x + 1) / 2
x = x * quantization_channels
x = x.floor()
@ -424,7 +425,7 @@ class EffectMixin:
x = ((x + 1) / 2 * mu + 0.5).astype("int64")
# unquantize
x = (x / mu) * 2 - 1.0
x = (x.astype(mu.dtype) / mu) * 2 - 1.0
x = paddle.sign(x) * (
util.exp_compat(paddle.abs(x) * paddle.log1p(mu)) - 1.0) / mu

@ -317,7 +317,7 @@ class Meter(paddle.nn.Layer):
z_avg_gated = z
z_avg_gated[l <= Gamma_a] = 0
masked = l > Gamma_a
z_avg_gated = z_avg_gated.sum(2) / masked.sum(2)
z_avg_gated = z_avg_gated.sum(2) / masked.sum(2).astype("float32")
# calculate the relative threshold value (see eq. 6)
Gamma_r = -0.691 + 10.0 * paddle.log10(

@ -338,7 +338,7 @@ def _close_temp_files(tmpfiles: list):
_close()
AUDIO_EXTENSIONS = [".wav", ".flac", ".mp3", ".mp4"]
AUDIO_EXTENSIONS = [".wav", ".flac", ".mp3"]
def find_audio(folder: str, ext: List[str]=AUDIO_EXTENSIONS):
@ -869,7 +869,7 @@ def hz_to_bin(hz: paddle.Tensor, n_fft: int, sample_rate: int):
shape = hz.shape
hz = hz.reshape([-1])
freqs = paddle.linspace(0, sample_rate / 2, 2 + n_fft // 2)
hz = paddle.clip(hz, max=sample_rate / 2)
hz = paddle.clip(hz, max=sample_rate / 2).astype(freqs.dtype)
closest = (hz[None, :] - freqs[:, None]).abs()
closest_bins = closest.argmin(axis=0)

@ -88,7 +88,7 @@ def test_seed():
def test_hz_to_bin():
hz = paddle.to_tensor(np.array([100, 200, 300]))
hz = paddle.to_tensor(np.array([100, 200, 300]), dtype="float32")
sr = 1000
n_fft = 2048

Loading…
Cancel
Save