add 2 file test

pull/3900/head
drryanhuang 9 months ago
parent 643f1c6071
commit 080bd7f5db

@ -17,14 +17,13 @@ import soundfile
from . import util from . import util
from ._julius import resample_frac from ._julius import resample_frac
from .display import DisplayMixin
from .dsp import DSPMixin from .dsp import DSPMixin
from .effects import EffectMixin from .effects import EffectMixin
from .effects import ImpulseResponseMixin from .effects import ImpulseResponseMixin
from .ffmpeg import FFMPEGMixin from .ffmpeg import FFMPEGMixin
from .loudness import LoudnessMixin from .loudness import LoudnessMixin
# from .display import DisplayMixin
# from .playback import PlayMixin # from .playback import PlayMixin
# from .whisper import WhisperMixin # from .whisper import WhisperMixin
@ -98,7 +97,7 @@ class AudioSignal(
# PlayMixin, # PlayMixin,
ImpulseResponseMixin, ImpulseResponseMixin,
DSPMixin, DSPMixin,
# DisplayMixin, DisplayMixin,
FFMPEGMixin, FFMPEGMixin,
# WhisperMixin, # WhisperMixin,
): ):
@ -1498,6 +1497,8 @@ class AudioSignal(
amin = amin**2 amin = amin**2
log_spec = 10.0 * paddle.log10(magnitude.pow(2).clip(min=amin)) log_spec = 10.0 * paddle.log10(magnitude.pow(2).clip(min=amin))
if paddle.is_tensor(ref_value):
ref_value = ref_value.item()
log_spec -= 10.0 * np.log10(np.maximum(amin, ref_value)) log_spec -= 10.0 * np.log10(np.maximum(amin, ref_value))
if top_db is not None: if top_db is not None:

@ -0,0 +1,191 @@
import inspect
import typing
from functools import wraps
from . import util
def format_figure(func):
"""Decorator for formatting figures produced by the code below.
See :py:func:`audiotools.core.util.format_figure` for more.
Parameters
----------
func : Callable
Plotting function that is decorated by this function.
"""
@wraps(func)
def wrapper(*args, **kwargs):
f_keys = inspect.signature(util.format_figure).parameters.keys()
f_kwargs = {}
for k, v in list(kwargs.items()):
if k in f_keys:
kwargs.pop(k)
f_kwargs[k] = v
func(*args, **kwargs)
util.format_figure(**f_kwargs)
return wrapper
class DisplayMixin:
@format_figure
def specshow(
self,
preemphasis: bool=False,
x_axis: str="time",
y_axis: str="linear",
n_mels: int=128,
**kwargs, ):
"""Displays a spectrogram, using ``librosa.display.specshow``.
Parameters
----------
preemphasis : bool, optional
Whether or not to apply preemphasis, which makes high
frequency detail easier to see, by default False
x_axis : str, optional
How to label the x axis, by default "time"
y_axis : str, optional
How to label the y axis, by default "linear"
n_mels : int, optional
If displaying a mel spectrogram with ``y_axis = "mel"``,
this controls the number of mels, by default 128.
kwargs : dict, optional
Keyword arguments to :py:func:`audiotools.core.util.format_figure`.
"""
import librosa
import librosa.display
# Always re-compute the STFT data before showing it, in case
# it changed.
signal = self.clone()
signal.stft_data = None
if preemphasis:
signal.preemphasis()
ref = signal.magnitude.max()
log_mag = signal.log_magnitude(ref_value=ref)
if y_axis == "mel":
log_mag = 20 * signal.mel_spectrogram(n_mels).clip(1e-5).log10()
log_mag -= log_mag.max()
librosa.display.specshow(
log_mag.numpy()[0].mean(axis=0),
x_axis=x_axis,
y_axis=y_axis,
sr=signal.sample_rate,
**kwargs, )
@format_figure
def waveplot(self, x_axis: str="time", **kwargs):
"""Displays a waveform plot, using ``librosa.display.waveshow``.
Parameters
----------
x_axis : str, optional
How to label the x axis, by default "time"
kwargs : dict, optional
Keyword arguments to :py:func:`audiotools.core.util.format_figure`.
"""
import librosa
import librosa.display
audio_data = self.audio_data[0].mean(axis=0)
audio_data = audio_data.cpu().numpy()
plot_fn = "waveshow" if hasattr(librosa.display,
"waveshow") else "waveplot"
wave_plot_fn = getattr(librosa.display, plot_fn)
wave_plot_fn(audio_data, x_axis=x_axis, sr=self.sample_rate, **kwargs)
@format_figure
def wavespec(self, x_axis: str="time", **kwargs):
"""Displays a waveform plot, using ``librosa.display.waveshow``.
Parameters
----------
x_axis : str, optional
How to label the x axis, by default "time"
kwargs : dict, optional
Keyword arguments to :py:func:`audiotools.core.display.DisplayMixin.specshow`.
"""
import matplotlib.pyplot as plt
from matplotlib.gridspec import GridSpec
gs = GridSpec(6, 1)
plt.subplot(gs[0, :])
self.waveplot(x_axis=x_axis)
plt.subplot(gs[1:, :])
self.specshow(x_axis=x_axis, **kwargs)
def write_audio_to_tb(
self,
tag: str,
writer,
step: int=None,
plot_fn: typing.Union[typing.Callable, str]="specshow",
**kwargs, ):
"""Writes a signal and its spectrogram to Tensorboard. Will show up
under the Audio and Images tab in Tensorboard.
Parameters
----------
tag : str
Tag to write signal to (e.g. ``clean/sample_0.wav``). The image will be
written to the corresponding ``.png`` file (e.g. ``clean/sample_0.png``).
writer : SummaryWriter
A SummaryWriter object from PyTorch library.
step : int, optional
The step to write the signal to, by default None
plot_fn : typing.Union[typing.Callable, str], optional
How to create the image. Set to ``None`` to avoid plotting, by default "specshow"
kwargs : dict, optional
Keyword arguments to :py:func:`audiotools.core.display.DisplayMixin.specshow` or
whatever ``plot_fn`` is set to.
"""
import matplotlib.pyplot as plt
audio_data = self.audio_data[0, 0].detach().cpu().numpy()
sample_rate = self.sample_rate
writer.add_audio(tag, audio_data, step, sample_rate)
if plot_fn is not None:
if isinstance(plot_fn, str):
plot_fn = getattr(self, plot_fn)
fig = plt.figure()
plt.clf()
plot_fn(**kwargs)
writer.add_figure(tag.replace("wav", "png"), fig, step)
def save_image(
self,
image_path: str,
plot_fn: typing.Union[typing.Callable, str]="specshow",
**kwargs, ):
"""Save AudioSignal spectrogram (or whatever ``plot_fn`` is set to) to
a specified file.
Parameters
----------
image_path : str
Where to save the file to.
plot_fn : typing.Union[typing.Callable, str], optional
How to create the image. Set to ``None`` to avoid plotting, by default "specshow"
kwargs : dict, optional
Keyword arguments to :py:func:`audiotools.core.display.DisplayMixin.specshow` or
whatever ``plot_fn`` is set to.
"""
import matplotlib.pyplot as plt
if isinstance(plot_fn, str):
plot_fn = getattr(self, plot_fn)
plt.clf()
plot_fn(**kwargs)
plt.savefig(image_path, bbox_inches="tight", pad_inches=0)
plt.close()

@ -7,148 +7,201 @@ from . import _julius
from . import util from . import util
def _unfold(x, kernel_sizes, strides):
# https://github.com/PaddlePaddle/Paddle/pull/70102
if 1 == kernel_sizes[0]:
x_zeros = paddle.zeros_like(x)
x = paddle.concat([x, x_zeros], axis=2)
kernel_sizes = (2, kernel_sizes[1])
unfolded = paddle.nn.functional.unfold(
x,
kernel_sizes=kernel_sizes,
strides=strides, )
if 2 == kernel_sizes[0]:
unfolded = unfolded[:, :kernel_sizes[1]]
return unfolded
def _fold(x, output_sizes, kernel_sizes, strides):
# https://github.com/PaddlePaddle/Paddle/pull/70102
if 1 == output_sizes[0] and 1 == kernel_sizes[0]:
x_zeros = paddle.zeros_like(x)
x = paddle.concat([x, x_zeros], axis=1)
output_sizes = (2, output_sizes[1])
kernel_sizes = (2, kernel_sizes[1])
fold = paddle.nn.functional.fold(
x,
output_sizes=output_sizes,
kernel_sizes=kernel_sizes,
strides=strides, )
if 2 == kernel_sizes[0]:
fold = fold[:, :, :1]
return fold
class DSPMixin: class DSPMixin:
_original_batch_size = None _original_batch_size = None
_original_num_channels = None _original_num_channels = None
_padded_signal_length = None _padded_signal_length = None
# def _preprocess_signal_for_windowing(self, window_duration, hop_duration): def _preprocess_signal_for_windowing(self, window_duration, hop_duration):
# self._original_batch_size = self.batch_size self._original_batch_size = self.batch_size
# self._original_num_channels = self.num_channels self._original_num_channels = self.num_channels
# window_length = int(window_duration * self.sample_rate)
# hop_length = int(hop_duration * self.sample_rate)
# if window_length % hop_length != 0:
# factor = window_length // hop_length
# window_length = factor * hop_length
# self.zero_pad(hop_length, hop_length)
# self._padded_signal_length = self.signal_length
# return window_length, hop_length
# def windows(
# self, window_duration: float, hop_duration: float, preprocess: bool = True
# ):
# """Generator which yields windows of specified duration from signal with a specified
# hop length.
# Parameters
# ----------
# window_duration : float
# Duration of every window in seconds.
# hop_duration : float
# Hop between windows in seconds.
# preprocess : bool, optional
# Whether to preprocess the signal, so that the first sample is in
# the middle of the first window, by default True
# Yields
# ------
# AudioSignal
# Each window is returned as an AudioSignal.
# """
# if preprocess:
# window_length, hop_length = self._preprocess_signal_for_windowing(
# window_duration, hop_duration
# )
# self.audio_data = self.audio_data.reshape(-1, 1, self.signal_length) window_length = int(window_duration * self.sample_rate)
hop_length = int(hop_duration * self.sample_rate)
# for b in range(self.batch_size):
# i = 0 if window_length % hop_length != 0:
# start_idx = i * hop_length factor = window_length // hop_length
# while True: window_length = factor * hop_length
# start_idx = i * hop_length
# i += 1 self.zero_pad(hop_length, hop_length)
# end_idx = start_idx + window_length self._padded_signal_length = self.signal_length
# if end_idx > self.signal_length:
# break return window_length, hop_length
# yield self[b, ..., start_idx:end_idx]
def windows(self,
# def collect_windows( window_duration: float,
# self, window_duration: float, hop_duration: float, preprocess: bool = True hop_duration: float,
# ): preprocess: bool=True):
# """Reshapes signal into windows of specified duration from signal with a specified """Generator which yields windows of specified duration from signal with a specified
# hop length. Window are placed along the batch dimension. Use with hop length.
# :py:func:`audiotools.core.dsp.DSPMixin.overlap_and_add` to reconstruct the
# original signal. Parameters
----------
# Parameters window_duration : float
# ---------- Duration of every window in seconds.
# window_duration : float hop_duration : float
# Duration of every window in seconds. Hop between windows in seconds.
# hop_duration : float preprocess : bool, optional
# Hop between windows in seconds. Whether to preprocess the signal, so that the first sample is in
# preprocess : bool, optional the middle of the first window, by default True
# Whether to preprocess the signal, so that the first sample is in
# the middle of the first window, by default True Yields
------
# Returns AudioSignal
# ------- Each window is returned as an AudioSignal.
# AudioSignal """
# AudioSignal unfolded with shape ``(nb * nch * num_windows, 1, window_length)`` if preprocess:
# """ window_length, hop_length = self._preprocess_signal_for_windowing(
# if preprocess: window_duration, hop_duration)
# window_length, hop_length = self._preprocess_signal_for_windowing(
# window_duration, hop_duration self.audio_data = self.audio_data.reshape([-1, 1, self.signal_length])
# )
for b in range(self.batch_size):
i = 0
start_idx = i * hop_length
while True:
start_idx = i * hop_length
i += 1
end_idx = start_idx + window_length
if end_idx > self.signal_length:
break
yield self[b, ..., start_idx:end_idx]
def collect_windows(self,
window_duration: float,
hop_duration: float,
preprocess: bool=True):
"""Reshapes signal into windows of specified duration from signal with a specified
hop length. Window are placed along the batch dimension. Use with
:py:func:`audiotools.core.dsp.DSPMixin.overlap_and_add` to reconstruct the
original signal.
Parameters
----------
window_duration : float
Duration of every window in seconds.
hop_duration : float
Hop between windows in seconds.
preprocess : bool, optional
Whether to preprocess the signal, so that the first sample is in
the middle of the first window, by default True
Returns
-------
AudioSignal
AudioSignal unfolded with shape ``(nb * nch * num_windows, 1, window_length)``
"""
if preprocess:
window_length, hop_length = self._preprocess_signal_for_windowing(
window_duration, hop_duration)
# # self.audio_data: (nb, nch, nt). # self.audio_data: (nb, nch, nt).
# unfolded = paddle.nn.functional.unfold( # unfolded = paddle.nn.functional.unfold(
# self.audio_data.reshape(-1, 1, 1, self.signal_length), # self.audio_data.reshape([-1, 1, 1, self.signal_length]),
# kernel_size=(1, window_length), # kernel_sizes=(1, window_length),
# stride=(1, hop_length), # strides=(1, hop_length),
# ) # )
# # unfolded: (nb * nch, window_length, num_windows). unfolded = _unfold(
# # -> (nb * nch * num_windows, 1, window_length) self.audio_data.reshape([-1, 1, 1, self.signal_length]),
# unfolded = unfolded.permute(0, 2, 1).reshape(-1, 1, window_length) kernel_sizes=(1, window_length),
# self.audio_data = unfolded strides=(1, hop_length), )
# return self # unfolded: (nb * nch, window_length, num_windows).
# -> (nb * nch * num_windows, 1, window_length)
# def overlap_and_add(self, hop_duration: float): unfolded = unfolded.transpose([0, 2, 1]).reshape([-1, 1, window_length])
# """Function which takes a list of windows and overlap adds them into a self.audio_data = unfolded
# signal the same length as ``audio_signal``. return self
# Parameters def overlap_and_add(self, hop_duration: float):
# ---------- """Function which takes a list of windows and overlap adds them into a
# hop_duration : float signal the same length as ``audio_signal``.
# How much to shift for each window
# (overlap is window_duration - hop_duration) in seconds. Parameters
----------
# Returns hop_duration : float
# ------- How much to shift for each window
# AudioSignal (overlap is window_duration - hop_duration) in seconds.
# overlap-and-added signal.
# """ Returns
# hop_length = int(hop_duration * self.sample_rate) -------
# window_length = self.signal_length AudioSignal
overlap-and-added signal.
# nb, nch = self._original_batch_size, self._original_num_channels """
hop_length = int(hop_duration * self.sample_rate)
# unfolded = self.audio_data.reshape(nb * nch, -1, window_length).permute(0, 2, 1) window_length = self.signal_length
nb, nch = self._original_batch_size, self._original_num_channels
unfolded = self.audio_data.reshape(
[nb * nch, -1, window_length]).transpose([0, 2, 1])
# folded = paddle.nn.functional.fold( # folded = paddle.nn.functional.fold(
# unfolded, # unfolded,
# output_size=(1, self._padded_signal_length), # output_sizes=(1, self._padded_signal_length),
# kernel_size=(1, window_length), # kernel_sizes=(1, window_length),
# stride=(1, hop_length), # strides=(1, hop_length),
# ) # )
folded = _fold(
unfolded,
output_sizes=(1, self._padded_signal_length),
kernel_sizes=(1, window_length),
strides=(1, hop_length), )
# norm = paddle.ones_like(unfolded, device=unfolded.device) norm = paddle.ones_like(unfolded)
# norm = paddle.nn.functional.fold( # norm = paddle.nn.functional.fold(
# norm, # norm,
# output_size=(1, self._padded_signal_length), # output_sizes=(1, self._padded_signal_length),
# kernel_size=(1, window_length), # kernel_sizes=(1, window_length),
# stride=(1, hop_length), # strides=(1, hop_length),
# ) # )
norm = _fold(
norm,
output_sizes=(1, self._padded_signal_length),
kernel_sizes=(1, window_length),
strides=(1, hop_length), )
# folded = folded / norm folded = folded / norm
# folded = folded.reshape(nb, nch, -1) folded = folded.reshape([nb, nch, -1])
# self.audio_data = folded self.audio_data = folded
# self.trim(hop_length, hop_length) self.trim(hop_length, hop_length)
# return self return self
def low_pass(self, def low_pass(self,
cutoffs: typing.Union[paddle.Tensor, np.ndarray, float], cutoffs: typing.Union[paddle.Tensor, np.ndarray, float],
@ -312,87 +365,92 @@ class DSPMixin:
self.stft_data = mag * paddle.exp(1j * phase) self.stft_data = mag * paddle.exp(1j * phase)
return self return self
# def mask_low_magnitudes( def mask_low_magnitudes(
# self, db_cutoff: typing.Union[paddle.Tensor, np.ndarray, float], val: float = 0.0 self,
# ): db_cutoff: typing.Union[paddle.Tensor, np.ndarray, float],
# """Mask away magnitudes below a specified threshold, which val: float=0.0):
# can be different for every item in the batch. """Mask away magnitudes below a specified threshold, which
can be different for every item in the batch.
# Parameters
# ----------
# db_cutoff : typing.Union[paddle.Tensor, np.ndarray, float]
# Decibel value for which things below it will be masked away.
# val : float, optional
# Value to fill in for masked portions, by default 0.0
# Returns
# -------
# AudioSignal
# Signal with ``stft_data`` manipulated. Apply ``.istft()`` to get the
# masked audio data.
# """
# mag = self.magnitude
# log_mag = self.log_magnitude()
# db_cutoff = util.ensure_tensor(db_cutoff, ndim=mag.ndim)
# mask = log_mag < db_cutoff
# mag = mag.masked_fill(mask, val)
# self.magnitude = mag Parameters
# return self ----------
db_cutoff : typing.Union[paddle.Tensor, np.ndarray, float]
# def shift_phase(self, shift: typing.Union[paddle.Tensor, np.ndarray, float]): Decibel value for which things below it will be masked away.
# """Shifts the phase by a constant value. val : float, optional
Value to fill in for masked portions, by default 0.0
# Parameters
# ---------- Returns
# shift : typing.Union[paddle.Tensor, np.ndarray, float] -------
# What to shift the phase by. AudioSignal
Signal with ``stft_data`` manipulated. Apply ``.istft()`` to get the
# Returns masked audio data.
# ------- """
# AudioSignal mag = self.magnitude
# Signal with ``stft_data`` manipulated. Apply ``.istft()`` to get the log_mag = self.log_magnitude()
# masked audio data.
# """ db_cutoff = util.ensure_tensor(db_cutoff, ndim=mag.ndim)
# shift = util.ensure_tensor(shift, ndim=self.phase.ndim) mask = log_mag < db_cutoff
# self.phase = self.phase + shift mag = mag.masked_fill(mask, val)
# return self
self.magnitude = mag
# def corrupt_phase(self, scale: typing.Union[paddle.Tensor, np.ndarray, float]): return self
# """Corrupts the phase randomly by some scaled value.
def shift_phase(self,
# Parameters shift: typing.Union[paddle.Tensor, np.ndarray, float]):
# ---------- """Shifts the phase by a constant value.
# scale : typing.Union[paddle.Tensor, np.ndarray, float]
# Standard deviation of noise to add to the phase. Parameters
----------
# Returns shift : typing.Union[paddle.Tensor, np.ndarray, float]
# ------- What to shift the phase by.
# AudioSignal
# Signal with ``stft_data`` manipulated. Apply ``.istft()`` to get the Returns
# masked audio data. -------
# """ AudioSignal
# scale = util.ensure_tensor(scale, ndim=self.phase.ndim) Signal with ``stft_data`` manipulated. Apply ``.istft()`` to get the
# self.phase = self.phase + scale * paddle.randn_like(self.phase) masked audio data.
# return self """
shift = util.ensure_tensor(shift, ndim=self.phase.ndim)
# def preemphasis(self, coef: float = 0.85): self.phase = self.phase + shift
# """Applies pre-emphasis to audio signal. return self
# Parameters def corrupt_phase(self,
# ---------- scale: typing.Union[paddle.Tensor, np.ndarray, float]):
# coef : float, optional """Corrupts the phase randomly by some scaled value.
# How much pre-emphasis to apply, lower values do less. 0 does nothing.
# by default 0.85 Parameters
----------
# Returns scale : typing.Union[paddle.Tensor, np.ndarray, float]
# ------- Standard deviation of noise to add to the phase.
# AudioSignal
# Pre-emphasized signal. Returns
# """ -------
# kernel = paddle.to_tensor([1, -coef, 0]).view(1, 1, -1).to(self.device) AudioSignal
# x = self.audio_data.reshape(-1, 1, self.signal_length) Signal with ``stft_data`` manipulated. Apply ``.istft()`` to get the
# x = paddle.nn.functional.conv1d(x, kernel, padding=1) masked audio data.
# self.audio_data = x.reshape(*self.audio_data.shape) """
# return self scale = util.ensure_tensor(scale, ndim=self.phase.ndim)
self.phase = self.phase + scale * paddle.randn(
shape=self.phase.shape, dtype=self.phase.dtype)
return self
def preemphasis(self, coef: float=0.85):
"""Applies pre-emphasis to audio signal.
Parameters
----------
coef : float, optional
How much pre-emphasis to apply, lower values do less. 0 does nothing.
by default 0.85
Returns
-------
AudioSignal
Pre-emphasized signal.
"""
kernel = paddle.to_tensor([1, -coef, 0]).reshape([1, 1, -1])
x = self.audio_data.reshape([-1, 1, self.signal_length])
x = paddle.nn.functional.conv1d(
x.astype(kernel.dtype), kernel, padding=1)
self.audio_data = x.reshape(self.audio_data.shape)
return self

@ -1,8 +1,8 @@
flatten_dict flatten_dict
gradio gradio
IPython IPython
librosa librosa==0.8.1markdown2
markdown2 numpy==1.23.5
pyloudnorm pyloudnorm
pytest pytest
pytest-xdist pytest-xdist

@ -13,7 +13,7 @@ from audiotools import AudioSignal
def test_io(): def test_io():
audio_path = "tests/audiotools/audio/spk/f10_script4_produced.wav" audio_path = "./audio/spk/f10_script4_produced.wav"
signal = AudioSignal(pathlib.Path(audio_path)) signal = AudioSignal(pathlib.Path(audio_path))
with tempfile.NamedTemporaryFile(suffix=".wav") as f: with tempfile.NamedTemporaryFile(suffix=".wav") as f:
@ -61,7 +61,7 @@ def test_io():
assert signal.audio_data.ndim == 3 assert signal.audio_data.ndim == 3
assert paddle.all(signal.samples == signal.audio_data) assert paddle.all(signal.samples == signal.audio_data)
audio_path = "tests/audiotools/audio/spk/f10_script4_produced.wav" audio_path = "./audio/spk/f10_script4_produced.wav"
assert AudioSignal(audio_path).hash() == AudioSignal(audio_path).hash() assert AudioSignal(audio_path).hash() == AudioSignal(audio_path).hash()
assert AudioSignal(audio_path).hash() != AudioSignal(audio_path).normalize( assert AudioSignal(audio_path).hash() != AudioSignal(audio_path).normalize(
-20).hash() -20).hash()
@ -71,7 +71,7 @@ def test_io():
def test_copy_and_clone(): def test_copy_and_clone():
audio_path = "tests/audiotools/audio/spk/f10_script4_produced.wav" audio_path = "./audio/spk/f10_script4_produced.wav"
signal = AudioSignal(audio_path) signal = AudioSignal(audio_path)
signal.stft() signal.stft()
signal.loudness() signal.loudness()
@ -369,7 +369,7 @@ def test_trim():
def test_to_from_ops(): def test_to_from_ops():
audio_path = "tests/audiotools/audio/spk/f10_script4_produced.wav" audio_path = "./audio/spk/f10_script4_produced.wav"
signal = AudioSignal(audio_path) signal = AudioSignal(audio_path)
signal.stft() signal.stft()
signal.loudness() signal.loudness()
@ -384,7 +384,7 @@ def test_to_from_ops():
def test_device(): def test_device():
audio_path = "tests/audiotools/audio/spk/f10_script4_produced.wav" audio_path = "./audio/spk/f10_script4_produced.wav"
signal = AudioSignal(audio_path) signal = AudioSignal(audio_path)
signal.to("cpu") signal.to("cpu")
@ -397,7 +397,7 @@ def test_device():
def test_stft(window_length, hop_length, window_type): def test_stft(window_length, hop_length, window_type):
if hop_length >= window_length: if hop_length >= window_length:
hop_length = window_length // 2 hop_length = window_length // 2
audio_path = "tests/audiotools/audio/spk/f10_script4_produced.wav" audio_path = "./audio/spk/f10_script4_produced.wav"
stft_params = audiotools.STFTParams( stft_params = audiotools.STFTParams(
window_length=window_length, window_length=window_length,
hop_length=hop_length, hop_length=hop_length,
@ -456,7 +456,7 @@ def test_stft(window_length, hop_length, window_type):
def test_log_magnitude(): def test_log_magnitude():
audio_path = "tests/audiotools/audio/spk/f10_script4_produced.wav" audio_path = "./audio/spk/f10_script4_produced.wav"
for _ in range(10): for _ in range(10):
signal = AudioSignal.excerpt(audio_path, duration=5.0) signal = AudioSignal.excerpt(audio_path, duration=5.0)
magnitude = signal.magnitude.numpy()[0, 0] magnitude = signal.magnitude.numpy()[0, 0]
@ -474,7 +474,7 @@ def test_log_magnitude():
def test_mel_spectrogram(n_mels, window_length, hop_length, window_type): def test_mel_spectrogram(n_mels, window_length, hop_length, window_type):
if hop_length >= window_length: if hop_length >= window_length:
hop_length = window_length // 2 hop_length = window_length // 2
audio_path = "tests/audiotools/audio/spk/f10_script4_produced.wav" audio_path = "./audio/spk/f10_script4_produced.wav"
stft_params = audiotools.STFTParams( stft_params = audiotools.STFTParams(
window_length=window_length, window_length=window_length,
hop_length=hop_length, hop_length=hop_length,
@ -492,7 +492,7 @@ def test_mel_spectrogram(n_mels, window_length, hop_length, window_type):
def test_mfcc(n_mfcc, n_mels, window_length, hop_length): def test_mfcc(n_mfcc, n_mels, window_length, hop_length):
if hop_length >= window_length: if hop_length >= window_length:
hop_length = window_length // 2 hop_length = window_length // 2
audio_path = "tests/audiotools/audio/spk/f10_script4_produced.wav" audio_path = "./audio/spk/f10_script4_produced.wav"
stft_params = audiotools.STFTParams( stft_params = audiotools.STFTParams(
window_length=window_length, hop_length=hop_length) window_length=window_length, hop_length=hop_length)
for _stft_params in [None, stft_params]: for _stft_params in [None, stft_params]:

@ -0,0 +1,48 @@
import sys
from pathlib import Path
import numpy as np
sys.path.append("/home/aistudio/PaddleSpeech/audio")
from audiotools import AudioSignal
from visualdl import LogWriter
def test_specshow():
array = np.zeros((1, 16000))
AudioSignal(array, sample_rate=16000).specshow()
AudioSignal(array, sample_rate=16000).specshow(preemphasis=True)
AudioSignal(
array, sample_rate=16000).specshow(
title="test", preemphasis=True)
AudioSignal(
array, sample_rate=16000).specshow(
format=False, preemphasis=True)
AudioSignal(
array, sample_rate=16000).specshow(
format=False, preemphasis=False, y_axis="mel")
def test_waveplot():
array = np.zeros((1, 16000))
AudioSignal(array, sample_rate=16000).waveplot()
def test_wavespec():
array = np.zeros((1, 16000))
AudioSignal(array, sample_rate=16000).wavespec()
def test_write_audio_to_tb():
signal = AudioSignal("./audio/spk/f10_script4_produced.mp3", duration=5)
Path("./scratch").mkdir(parents=True, exist_ok=True)
writer = LogWriter("./scratch/")
signal.write_audio_to_tb("tag", writer)
def test_save_image():
signal = AudioSignal(
"./audio/spk/f10_script4_produced.wav", duration=10, offset=10)
Path("./scratch").mkdir(parents=True, exist_ok=True)
signal.save_image("./scratch/image.png")

@ -0,0 +1,178 @@
import sys
import numpy as np
import paddle
import pytest
sys.path.append("/home/aistudio/PaddleSpeech/audio")
from audiotools import AudioSignal
from audiotools.core.util import sample_from_dist
@pytest.mark.parametrize("window_duration", [0.1, 0.25, 0.5, 1.0])
@pytest.mark.parametrize("sample_rate", [8000, 16000, 22050, 44100])
@pytest.mark.parametrize("duration", [0.5, 1.0, 2.0, 10.0])
def test_overlap_add(duration, sample_rate, window_duration):
np.random.seed(0)
if duration > window_duration:
spk_signal = AudioSignal.batch([
AudioSignal.excerpt(
"./audio/spk/f10_script4_produced.wav", duration=duration)
for _ in range(16)
])
spk_signal.resample(sample_rate)
noise = paddle.randn([16, 1, int(duration * sample_rate)])
nz_signal = AudioSignal(noise, sample_rate=sample_rate)
def _test(signal):
hop_duration = window_duration / 2
windowed_signal = signal.deepcopy().collect_windows(window_duration,
hop_duration)
recombined = windowed_signal.overlap_and_add(hop_duration)
assert recombined == signal
assert np.allclose(recombined.audio_data, signal.audio_data, 1e-3)
_test(nz_signal)
_test(spk_signal)
@pytest.mark.parametrize("window_duration", [0.1, 0.25, 0.5, 1.0])
@pytest.mark.parametrize("sample_rate", [8000, 16000, 22050, 44100])
@pytest.mark.parametrize("duration", [0.5, 1.0, 2.0, 10.0])
def test_inplace_overlap_add(duration, sample_rate, window_duration):
np.random.seed(0)
if duration > window_duration:
spk_signal = AudioSignal.batch([
AudioSignal.excerpt(
"./audio/spk/f10_script4_produced.wav", duration=duration)
for _ in range(16)
])
spk_signal.resample(sample_rate)
noise = paddle.randn([16, 1, int(duration * sample_rate)])
nz_signal = AudioSignal(noise, sample_rate=sample_rate)
def _test(signal):
hop_duration = window_duration / 2
windowed_signal = signal.deepcopy().collect_windows(window_duration,
hop_duration)
# Compare in-place with unfold results
for i, window in enumerate(
signal.deepcopy().windows(window_duration, hop_duration)):
assert np.allclose(window.audio_data,
windowed_signal.audio_data[i])
_test(nz_signal)
_test(spk_signal)
def test_low_pass():
sample_rate = 44100
f = 440
t = paddle.arange(0, 1, 1 / sample_rate)
sine_wave = paddle.sin(2 * np.pi * f * t)
window = AudioSignal.get_window("hann", sine_wave.shape[-1])
sine_wave = sine_wave * window
signal = AudioSignal(sine_wave.unsqueeze(0), sample_rate=sample_rate)
out = signal.deepcopy().low_pass(220)
assert out.audio_data.abs().max() < 1e-4
out = signal.deepcopy().low_pass(880)
assert (out - signal).audio_data.abs().max() < 1e-3
batch = AudioSignal.batch(
[signal.deepcopy(), signal.deepcopy(), signal.deepcopy()])
cutoffs = [220, 880, 220]
out = batch.deepcopy().low_pass(cutoffs)
assert out.audio_data[0].abs().max() < 1e-4
assert out.audio_data[2].abs().max() < 1e-4
assert (out - batch).audio_data[1].abs().max() < 1e-3
def test_high_pass():
sample_rate = 44100
f = 440
t = paddle.arange(0, 1, 1 / sample_rate)
sine_wave = paddle.sin(2 * np.pi * f * t)
window = AudioSignal.get_window("hann", sine_wave.shape[-1])
sine_wave = sine_wave * window
signal = AudioSignal(sine_wave.unsqueeze(0), sample_rate=sample_rate)
out = signal.deepcopy().high_pass(220)
assert (signal - out).audio_data.abs().max() < 1e-4
def test_mask_frequencies():
sample_rate = 44100
fs = paddle.to_tensor([500.0, 2000.0, 8000.0, 32000.0])[None]
t = paddle.arange(0, 1, 1 / sample_rate)[:, None]
sine_wave = paddle.sin(2 * np.pi * t @ fs).sum(axis=-1)
sine_wave = AudioSignal(sine_wave, sample_rate)
masked_sine_wave = sine_wave.mask_frequencies(fmin_hz=1500, fmax_hz=10000)
fs2 = paddle.to_tensor([500.0, 32000.0])[None]
sine_wave2 = paddle.sin(2 * np.pi * t @ fs).sum(axis=-1)
sine_wave2 = AudioSignal(sine_wave2, sample_rate)
assert paddle.allclose(masked_sine_wave.audio_data, sine_wave2.audio_data)
def test_mask_timesteps():
sample_rate = 44100
f = 440
t = paddle.linspace(0, 1, sample_rate)
sine_wave = paddle.sin(2 * np.pi * f * t)
sine_wave = AudioSignal(sine_wave, sample_rate)
masked_sine_wave = sine_wave.mask_timesteps(tmin_s=0.25, tmax_s=0.75)
masked_sine_wave.istft()
mask = ((0.3 < t) & (t < 0.7))[None, None]
assert paddle.allclose(
masked_sine_wave.audio_data[mask],
paddle.zeros_like(masked_sine_wave.audio_data[mask]), )
def test_shift_phase():
sample_rate = 44100
f = 440
t = paddle.linspace(0, 1, sample_rate)
sine_wave = paddle.sin(2 * np.pi * f * t)
sine_wave = AudioSignal(sine_wave, sample_rate)
sine_wave2 = sine_wave.clone()
shifted_sine_wave = sine_wave.shift_phase(np.pi)
shifted_sine_wave.istft()
sine_wave2.phase = sine_wave2.phase + np.pi
sine_wave2.istft()
assert paddle.allclose(shifted_sine_wave.audio_data, sine_wave2.audio_data)
def test_corrupt_phase():
sample_rate = 44100
f = 440
t = paddle.linspace(0, 1, sample_rate)
sine_wave = paddle.sin(2 * np.pi * f * t)
sine_wave = AudioSignal(sine_wave, sample_rate)
sine_wave2 = sine_wave.clone()
shifted_sine_wave = sine_wave.corrupt_phase(scale=np.pi)
shifted_sine_wave.istft()
assert (sine_wave2.phase - shifted_sine_wave.phase).abs().mean() > 0.0
assert ((sine_wave2.phase - shifted_sine_wave.phase).std() / np.pi) < 1.0
def test_preemphasis():
x = AudioSignal.excerpt("./audio/spk/f10_script4_produced.wav", duration=5)
import matplotlib.pyplot as plt
x.specshow(preemphasis=False)
x.specshow(preemphasis=True)
x.preemphasis()

@ -8,7 +8,7 @@ from audiotools import AudioSignal
def test_normalize(): def test_normalize():
audio_path = "tests/audiotools/audio/spk/f10_script4_produced.wav" audio_path = "./audio/spk/f10_script4_produced.wav"
signal = AudioSignal(audio_path, offset=10, duration=10) signal = AudioSignal(audio_path, offset=10, duration=10)
signal = signal.normalize() signal = signal.normalize()
assert np.allclose(signal.loudness(), -24, atol=1e-1) assert np.allclose(signal.loudness(), -24, atol=1e-1)
@ -35,7 +35,7 @@ def test_normalize():
def test_volume_change(): def test_volume_change():
audio_path = "tests/audiotools/audio/spk/f10_script4_produced.wav" audio_path = "./audio/spk/f10_script4_produced.wav"
signal = AudioSignal(audio_path, offset=10, duration=10) signal = AudioSignal(audio_path, offset=10, duration=10)
boost = 3 boost = 3
@ -50,10 +50,10 @@ def test_volume_change():
def test_mix(): def test_mix():
audio_path = "tests/audiotools/audio/spk/f10_script4_produced.wav" audio_path = "./audio/spk/f10_script4_produced.wav"
spk = AudioSignal(audio_path, offset=10, duration=10) spk = AudioSignal(audio_path, offset=10, duration=10)
audio_path = "tests/audiotools/audio/nz/f5_script2_ipad_balcony1_room_tone.wav" audio_path = "./audio/nz/f5_script2_ipad_balcony1_room_tone.wav"
nz = AudioSignal(audio_path, offset=10, duration=10) nz = AudioSignal(audio_path, offset=10, duration=10)
spk.deepcopy().mix(nz, snr=-10) spk.deepcopy().mix(nz, snr=-10)
@ -61,10 +61,10 @@ def test_mix():
assert np.allclose(snr, -10, atol=1) assert np.allclose(snr, -10, atol=1)
# Test in batch # Test in batch
audio_path = "tests/audiotools/audio/spk/f10_script4_produced.wav" audio_path = "./audio/spk/f10_script4_produced.wav"
spk = AudioSignal(audio_path, offset=10, duration=10) spk = AudioSignal(audio_path, offset=10, duration=10)
audio_path = "tests/audiotools/audio/nz/f5_script2_ipad_balcony1_room_tone.wav" audio_path = "./audio/nz/f5_script2_ipad_balcony1_room_tone.wav"
nz = AudioSignal(audio_path, offset=10, duration=10) nz = AudioSignal(audio_path, offset=10, duration=10)
batch_size = 4 batch_size = 4
@ -86,7 +86,7 @@ def test_mix():
def test_convolve(): def test_convolve():
np.random.seed(6) # Found a failing seed np.random.seed(6) # Found a failing seed
audio_path = "tests/audiotools/audio/spk/f10_script4_produced.wav" audio_path = "./audio/spk/f10_script4_produced.wav"
spk = AudioSignal(audio_path, offset=10, duration=10) spk = AudioSignal(audio_path, offset=10, duration=10)
impulse = np.zeros((1, 16000), dtype="float32") impulse = np.zeros((1, 16000), dtype="float32")
@ -106,7 +106,7 @@ def test_convolve():
assert convolved == spk_batch assert convolved == spk_batch
# Short duration # Short duration
audio_path = "tests/audiotools/audio/spk/f10_script4_produced.wav" audio_path = "./audio/spk/f10_script4_produced.wav"
spk = AudioSignal(audio_path, offset=10, duration=0.1) spk = AudioSignal(audio_path, offset=10, duration=0.1)
impulse = np.zeros((1, 16000), dtype="float32") impulse = np.zeros((1, 16000), dtype="float32")
@ -128,14 +128,14 @@ def test_convolve():
def test_pipeline(): def test_pipeline():
# An actual IR, no batching # An actual IR, no batching
audio_path = "tests/audiotools/audio/spk/f10_script4_produced.wav" audio_path = "./audio/spk/f10_script4_produced.wav"
spk = AudioSignal(audio_path, offset=10, duration=5) spk = AudioSignal(audio_path, offset=10, duration=5)
audio_path = "tests/audiotools/audio/ir/h179_Bar_1txts.wav" audio_path = "./audio/ir/h179_Bar_1txts.wav"
ir = AudioSignal(audio_path) ir = AudioSignal(audio_path)
spk.deepcopy().convolve(ir) spk.deepcopy().convolve(ir)
audio_path = "tests/audiotools/audio/nz/f5_script2_ipad_balcony1_room_tone.wav" audio_path = "./audio/nz/f5_script2_ipad_balcony1_room_tone.wav"
nz = AudioSignal(audio_path, offset=10, duration=5) nz = AudioSignal(audio_path, offset=10, duration=5)
batch_size = 16 batch_size = 16
@ -146,7 +146,7 @@ def test_pipeline():
# def test_codec(): # def test_codec():
# audio_path = "tests/audiotools/audio/spk/f10_script4_produced.wav" # audio_path = "./audio/spk/f10_script4_produced.wav"
# spk = AudioSignal(audio_path, offset=10, duration=10) # spk = AudioSignal(audio_path, offset=10, duration=10)
# with pytest.raises(ValueError): # with pytest.raises(ValueError):
@ -156,7 +156,7 @@ def test_pipeline():
# out = spk.deepcopy().apply_codec("8-bit") # out = spk.deepcopy().apply_codec("8-bit")
# def test_pitch_shift(): # def test_pitch_shift():
# audio_path = "tests/audiotools/audio/spk/f10_script4_produced.wav" # audio_path = "./audio/spk/f10_script4_produced.wav"
# spk = AudioSignal(audio_path, offset=10, duration=1) # spk = AudioSignal(audio_path, offset=10, duration=1)
# single = spk.deepcopy().pitch_shift(5) # single = spk.deepcopy().pitch_shift(5)
@ -169,7 +169,7 @@ def test_pipeline():
# assert np.allclose(batched[0].audio_data, single[0].audio_data) # assert np.allclose(batched[0].audio_data, single[0].audio_data)
# def test_time_stretch(): # def test_time_stretch():
# audio_path = "tests/audiotools/audio/spk/f10_script4_produced.wav" # audio_path = "./audio/spk/f10_script4_produced.wav"
# spk = AudioSignal(audio_path, offset=10, duration=1) # spk = AudioSignal(audio_path, offset=10, duration=1)
# single = spk.deepcopy().time_stretch(0.8) # single = spk.deepcopy().time_stretch(0.8)
@ -184,7 +184,7 @@ def test_pipeline():
@pytest.mark.parametrize("n_bands", [1, 2, 4, 8, 12, 16]) @pytest.mark.parametrize("n_bands", [1, 2, 4, 8, 12, 16])
def test_mel_filterbank(n_bands): def test_mel_filterbank(n_bands):
audio_path = "tests/audiotools/audio/spk/f10_script4_produced.wav" audio_path = "./audio/spk/f10_script4_produced.wav"
spk = AudioSignal(audio_path, offset=10, duration=1) spk = AudioSignal(audio_path, offset=10, duration=1)
fbank = spk.deepcopy().mel_filterbank(n_bands) fbank = spk.deepcopy().mel_filterbank(n_bands)
@ -192,8 +192,7 @@ def test_mel_filterbank(n_bands):
# Check if it works in batches. # Check if it works in batches.
spk_batch = AudioSignal.batch([ spk_batch = AudioSignal.batch([
AudioSignal.excerpt( AudioSignal.excerpt("./audio/spk/f10_script4_produced.wav", duration=2)
"tests/audiotools/audio/spk/f10_script4_produced.wav", duration=2)
for _ in range(16) for _ in range(16)
]) ])
fbank = spk_batch.deepcopy().mel_filterbank(n_bands) fbank = spk_batch.deepcopy().mel_filterbank(n_bands)
@ -203,7 +202,7 @@ def test_mel_filterbank(n_bands):
@pytest.mark.parametrize("n_bands", [1, 2, 4, 8, 12, 16]) @pytest.mark.parametrize("n_bands", [1, 2, 4, 8, 12, 16])
def test_equalizer(n_bands): def test_equalizer(n_bands):
audio_path = "tests/audiotools/audio/spk/f10_script4_produced.wav" audio_path = "./audio/spk/f10_script4_produced.wav"
spk = AudioSignal(audio_path, offset=10, duration=10) spk = AudioSignal(audio_path, offset=10, duration=10)
db = -3 + 1 * paddle.rand([n_bands]) db = -3 + 1 * paddle.rand([n_bands])
@ -212,15 +211,14 @@ def test_equalizer(n_bands):
db = -3 + 1 * np.random.rand(n_bands) db = -3 + 1 * np.random.rand(n_bands)
spk.deepcopy().equalizer(db) spk.deepcopy().equalizer(db)
audio_path = "tests/audiotools/audio/ir/h179_Bar_1txts.wav" audio_path = "./audio/ir/h179_Bar_1txts.wav"
ir = AudioSignal(audio_path) ir = AudioSignal(audio_path)
db = -3 + 1 * paddle.rand([n_bands]) db = -3 + 1 * paddle.rand([n_bands])
spk.deepcopy().convolve(ir.equalizer(db)) spk.deepcopy().convolve(ir.equalizer(db))
spk_batch = AudioSignal.batch([ spk_batch = AudioSignal.batch([
AudioSignal.excerpt( AudioSignal.excerpt("./audio/spk/f10_script4_produced.wav", duration=2)
"tests/audiotools/audio/spk/f10_script4_produced.wav", duration=2)
for _ in range(16) for _ in range(16)
]) ])
@ -231,13 +229,12 @@ def test_equalizer(n_bands):
def test_clip_distortion(): def test_clip_distortion():
audio_path = "tests/audiotools/audio/spk/f10_script4_produced.wav" audio_path = "./audio/spk/f10_script4_produced.wav"
spk = AudioSignal(audio_path, offset=10, duration=2) spk = AudioSignal(audio_path, offset=10, duration=2)
clipped = spk.deepcopy().clip_distortion(0.05) clipped = spk.deepcopy().clip_distortion(0.05)
spk_batch = AudioSignal.batch([ spk_batch = AudioSignal.batch([
AudioSignal.excerpt( AudioSignal.excerpt("./audio/spk/f10_script4_produced.wav", duration=2)
"tests/audiotools/audio/spk/f10_script4_produced.wav", duration=2)
for _ in range(16) for _ in range(16)
]) ])
percs = paddle.to_tensor(np.random.uniform(size=(16, ))).astype("float32") percs = paddle.to_tensor(np.random.uniform(size=(16, ))).astype("float32")
@ -249,7 +246,7 @@ def test_clip_distortion():
@pytest.mark.parametrize("quant_ch", [2, 4, 8, 16, 32, 64, 128]) @pytest.mark.parametrize("quant_ch", [2, 4, 8, 16, 32, 64, 128])
def test_quantization(quant_ch): def test_quantization(quant_ch):
audio_path = "tests/audiotools/audio/spk/f10_script4_produced.wav" audio_path = "./audio/spk/f10_script4_produced.wav"
spk = AudioSignal(audio_path, offset=10, duration=2) spk = AudioSignal(audio_path, offset=10, duration=2)
quantized = spk.deepcopy().quantization(quant_ch) quantized = spk.deepcopy().quantization(quant_ch)
@ -260,8 +257,7 @@ def test_quantization(quant_ch):
assert found_quant_ch <= quant_ch assert found_quant_ch <= quant_ch
spk_batch = AudioSignal.batch([ spk_batch = AudioSignal.batch([
AudioSignal.excerpt( AudioSignal.excerpt("./audio/spk/f10_script4_produced.wav", duration=2)
"tests/audiotools/audio/spk/f10_script4_produced.wav", duration=2)
for _ in range(16) for _ in range(16)
]) ])
@ -277,7 +273,7 @@ def test_quantization(quant_ch):
@pytest.mark.parametrize("quant_ch", [2, 4, 8, 16, 32, 64, 128]) @pytest.mark.parametrize("quant_ch", [2, 4, 8, 16, 32, 64, 128])
def test_mulaw_quantization(quant_ch): def test_mulaw_quantization(quant_ch):
audio_path = "tests/audiotools/audio/spk/f10_script4_produced.wav" audio_path = "./audio/spk/f10_script4_produced.wav"
spk = AudioSignal(audio_path, offset=10, duration=2) spk = AudioSignal(audio_path, offset=10, duration=2)
quantized = spk.deepcopy().mulaw_quantization(quant_ch) quantized = spk.deepcopy().mulaw_quantization(quant_ch)
@ -288,8 +284,7 @@ def test_mulaw_quantization(quant_ch):
assert found_quant_ch <= quant_ch assert found_quant_ch <= quant_ch
spk_batch = AudioSignal.batch([ spk_batch = AudioSignal.batch([
AudioSignal.excerpt( AudioSignal.excerpt("./audio/spk/f10_script4_produced.wav", duration=2)
"tests/audiotools/audio/spk/f10_script4_produced.wav", duration=2)
for _ in range(16) for _ in range(16)
]) ])
@ -304,7 +299,7 @@ def test_mulaw_quantization(quant_ch):
def test_impulse_response_augmentation(): def test_impulse_response_augmentation():
audio_path = "tests/audiotools/audio/ir/h179_Bar_1txts.wav" audio_path = "./audio/ir/h179_Bar_1txts.wav"
batch_size = 16 batch_size = 16
ir = AudioSignal(audio_path) ir = AudioSignal(audio_path)
ir_batch = AudioSignal.batch([ir for _ in range(batch_size)]) ir_batch = AudioSignal.batch([ir for _ in range(batch_size)])
@ -330,8 +325,8 @@ def test_impulse_response_augmentation():
def test_apply_ir(): def test_apply_ir():
audio_path = "tests/audiotools/audio/spk/f10_script4_produced.wav" audio_path = "./audio/spk/f10_script4_produced.wav"
ir_path = "tests/audiotools/audio/ir/h179_Bar_1txts.wav" ir_path = "./audio/ir/h179_Bar_1txts.wav"
spk = AudioSignal(audio_path, offset=10, duration=2) spk = AudioSignal(audio_path, offset=10, duration=2)
ir = AudioSignal(ir_path) ir = AudioSignal(ir_path)

@ -9,8 +9,8 @@ from audiotools import AudioSignal
def test_audio_grad(): def test_audio_grad():
audio_path = "tests/audiotools/audio/spk/f10_script4_produced.wav" audio_path = "./audio/spk/f10_script4_produced.wav"
ir_path = "tests/audiotools/audio/ir/h179_Bar_1txts.wav" ir_path = "./audio/ir/h179_Bar_1txts.wav"
def _test_audio_grad(attr: str, target=True, kwargs: dict={}): def _test_audio_grad(attr: str, target=True, kwargs: dict={}):
signal = AudioSignal(audio_path) signal = AudioSignal(audio_path)
@ -153,7 +153,7 @@ def test_audio_grad():
def test_batch_grad(): def test_batch_grad():
audio_path = "tests/audiotools/audio/spk/f10_script4_produced.wav" audio_path = "./audio/spk/f10_script4_produced.wav"
signal = AudioSignal(audio_path) signal = AudioSignal(audio_path)
signal.audio_data.stop_gradient = False signal.audio_data.stop_gradient = False

@ -97,75 +97,5 @@ class TestHighPassFilters(_BaseTest):
self.assertSimilar(y, y2, x) self.assertSimilar(y, y2, x)
# class TestBandPassFilters(_BaseTest):
# def setUp(self):
# paddle.seed(1234)
# random.seed(1234)
# def test_keep_or_kill(self):
# for _ in range(10):
# freq = random.uniform(0.01, 0.4)
# sr = 1024
# tone = pure_tone(freq * sr, sr=sr, dur=10)
# # For this test we accept 5% tolerance in amplitude, or -26dB in power.
# tol = 5
# zeros = 16
# y_pass = filters.bandpass_filter(tone, 0.9 * freq, 1.1 * freq, zeros=zeros)
# self.assertSimilar(y_pass, tone, tone, f"freq={freq}, pass", tol=tol)
# y_killed = filters.bandpass_filter(tone, 1.1 * freq, 1.2 * freq, zeros=zeros)
# self.assertSimilar(y_killed, 0 * tone, tone, f"freq={freq}, kill", tol=tol)
# y_killed = filters.bandpass_filter(tone, 0.8 * freq, 0.9 * freq, zeros=zeros)
# self.assertSimilar(y_killed, 0 * tone, tone, f"freq={freq}, kill", tol=tol)
# def test_fft_nofft(self):
# for _ in range(10):
# x = paddle.randn([1024])
# freq = random.uniform(0.01, 0.5)
# freq2 = random.uniform(freq, 0.5)
# y_fft = filters.bandpass_filter(x, freq, freq2, fft=True)
# y_ref = filters.bandpass_filter(x, freq, freq2, fft=False)
# self.assertSimilar(y_fft, y_ref, x, f"freq={freq}", tol=0.01)
# def test_constant(self):
# x = paddle.ones([2048])
# for zeros in [4, 10]:
# for freq in [0.01, 0.1]:
# y = filters.bandpass_filter(x, freq, 1.2 * freq, zeros=zeros)
# self.assertLessEqual(y.abs().mean(), 1e-6, (zeros, freq))
# def test_stride(self):
# x = paddle.randn([1024])
# y = filters.bandpass_filter(x, 0.1, 0.2, stride=1)[::3]
# y2 = filters.bandpass_filter(x, 0.1, 0.2, stride=3)
# self.assertEqual(y.shape, y2.shape)
# self.assertSimilar(y, y2, x)
# y = filters.bandpass_filter(x, 0.1, 0.2, stride=1, pad=False)[::3]
# y2 = filters.bandpass_filter(x, 0.1, 0.2, stride=3, pad=False)
# self.assertEqual(y.shape, y2.shape)
# self.assertSimilar(y, y2, x)
# def test_same_as_highpass(self):
# x = paddle.randn([1024])
# y_ref = highpass_filter(x, 0.2)
# y = filters.bandpass_filter(x, 0.2, 0.5)
# self.assertSimilar(y, y_ref, x)
# def test_same_as_lowpass(self):
# x = paddle.randn([1024])
# y_ref = filters.lowpass_filter(x, 0.2)
# y = filters.bandpass_filter(x, 0.0, 0.2)
# self.assertSimilar(y, y_ref, x)
if __name__ == "__main__": if __name__ == "__main__":
unittest.main() unittest.main()

@ -13,7 +13,7 @@ ATOL = 1e-1
def test_loudness_against_pyln(): def test_loudness_against_pyln():
audio_path = "tests/audiotools/audio/spk/f10_script4_produced.wav" audio_path = "./audio/spk/f10_script4_produced.wav"
signal = AudioSignal(audio_path, offset=5, duration=10) signal = AudioSignal(audio_path, offset=5, duration=10)
signal_loudness = signal.loudness() signal_loudness = signal.loudness()
@ -24,7 +24,7 @@ def test_loudness_against_pyln():
def test_loudness_short(): def test_loudness_short():
audio_path = "tests/audiotools/audio/spk/f10_script4_produced.wav" audio_path = "./audio/spk/f10_script4_produced.wav"
signal = AudioSignal(audio_path, offset=10, duration=0.25) signal = AudioSignal(audio_path, offset=10, duration=0.25)
signal_loudness = signal.loudness() signal_loudness = signal.loudness()
@ -58,7 +58,7 @@ def test_batch_loudness():
# Tests below are copied from pyloudnorm # Tests below are copied from pyloudnorm
def test_integrated_loudness(): def test_integrated_loudness():
data, rate = sf.read("tests/audiotools/audio/loudness/sine_1000.wav") data, rate = sf.read("./audio/loudness/sine_1000.wav")
meter = Meter(rate) meter = Meter(rate)
loudness = meter(data) loudness = meter(data)
@ -67,8 +67,7 @@ def test_integrated_loudness():
def test_rel_gate_test(): def test_rel_gate_test():
data, rate = sf.read( data, rate = sf.read("./audio/loudness/1770-2_Comp_RelGateTest.wav")
"tests/audiotools/audio/loudness/1770-2_Comp_RelGateTest.wav")
meter = Meter(rate) meter = Meter(rate)
loudness = meter.integrated_loudness(data) loudness = meter.integrated_loudness(data)
@ -77,8 +76,7 @@ def test_rel_gate_test():
def test_abs_gate_test(): def test_abs_gate_test():
data, rate = sf.read( data, rate = sf.read("./audio/loudness/1770-2_Comp_AbsGateTest.wav")
"tests/audiotools/audio/loudness/1770-2_Comp_AbsGateTest.wav")
meter = Meter(rate) meter = Meter(rate)
loudness = meter.integrated_loudness(data) loudness = meter.integrated_loudness(data)
@ -87,8 +85,7 @@ def test_abs_gate_test():
def test_24LKFS_25Hz_2ch(): def test_24LKFS_25Hz_2ch():
data, rate = sf.read( data, rate = sf.read("./audio/loudness/1770-2_Comp_24LKFS_25Hz_2ch.wav")
"tests/audiotools/audio/loudness/1770-2_Comp_24LKFS_25Hz_2ch.wav")
meter = Meter(rate) meter = Meter(rate)
loudness = meter.integrated_loudness(data) loudness = meter.integrated_loudness(data)
@ -97,8 +94,7 @@ def test_24LKFS_25Hz_2ch():
def test_24LKFS_100Hz_2ch(): def test_24LKFS_100Hz_2ch():
data, rate = sf.read( data, rate = sf.read("./audio/loudness/1770-2_Comp_24LKFS_100Hz_2ch.wav")
"tests/audiotools/audio/loudness/1770-2_Comp_24LKFS_100Hz_2ch.wav")
meter = Meter(rate) meter = Meter(rate)
loudness = meter.integrated_loudness(data) loudness = meter.integrated_loudness(data)
@ -107,8 +103,7 @@ def test_24LKFS_100Hz_2ch():
def test_24LKFS_500Hz_2ch(): def test_24LKFS_500Hz_2ch():
data, rate = sf.read( data, rate = sf.read("./audio/loudness/1770-2_Comp_24LKFS_500Hz_2ch.wav")
"tests/audiotools/audio/loudness/1770-2_Comp_24LKFS_500Hz_2ch.wav")
meter = Meter(rate) meter = Meter(rate)
loudness = meter.integrated_loudness(data) loudness = meter.integrated_loudness(data)
@ -117,8 +112,7 @@ def test_24LKFS_500Hz_2ch():
def test_24LKFS_1000Hz_2ch(): def test_24LKFS_1000Hz_2ch():
data, rate = sf.read( data, rate = sf.read("./audio/loudness/1770-2_Comp_24LKFS_1000Hz_2ch.wav")
"tests/audiotools/audio/loudness/1770-2_Comp_24LKFS_1000Hz_2ch.wav")
meter = Meter(rate) meter = Meter(rate)
loudness = meter.integrated_loudness(data) loudness = meter.integrated_loudness(data)
@ -127,8 +121,7 @@ def test_24LKFS_1000Hz_2ch():
def test_24LKFS_2000Hz_2ch(): def test_24LKFS_2000Hz_2ch():
data, rate = sf.read( data, rate = sf.read("./audio/loudness/1770-2_Comp_24LKFS_2000Hz_2ch.wav")
"tests/audiotools/audio/loudness/1770-2_Comp_24LKFS_2000Hz_2ch.wav")
meter = Meter(rate) meter = Meter(rate)
loudness = meter.integrated_loudness(data) loudness = meter.integrated_loudness(data)
@ -137,8 +130,7 @@ def test_24LKFS_2000Hz_2ch():
def test_24LKFS_10000Hz_2ch(): def test_24LKFS_10000Hz_2ch():
data, rate = sf.read( data, rate = sf.read("./audio/loudness/1770-2_Comp_24LKFS_10000Hz_2ch.wav")
"tests/audiotools/audio/loudness/1770-2_Comp_24LKFS_10000Hz_2ch.wav")
meter = Meter(rate) meter = Meter(rate)
loudness = meter.integrated_loudness(data) loudness = meter.integrated_loudness(data)
@ -147,8 +139,7 @@ def test_24LKFS_10000Hz_2ch():
def test_23LKFS_25Hz_2ch(): def test_23LKFS_25Hz_2ch():
data, rate = sf.read( data, rate = sf.read("./audio/loudness/1770-2_Comp_23LKFS_25Hz_2ch.wav")
"tests/audiotools/audio/loudness/1770-2_Comp_23LKFS_25Hz_2ch.wav")
meter = Meter(rate) meter = Meter(rate)
loudness = meter.integrated_loudness(data) loudness = meter.integrated_loudness(data)
@ -157,8 +148,7 @@ def test_23LKFS_25Hz_2ch():
def test_23LKFS_100Hz_2ch(): def test_23LKFS_100Hz_2ch():
data, rate = sf.read( data, rate = sf.read("./audio/loudness/1770-2_Comp_23LKFS_100Hz_2ch.wav")
"tests/audiotools/audio/loudness/1770-2_Comp_23LKFS_100Hz_2ch.wav")
meter = Meter(rate) meter = Meter(rate)
loudness = meter.integrated_loudness(data) loudness = meter.integrated_loudness(data)
@ -167,8 +157,7 @@ def test_23LKFS_100Hz_2ch():
def test_23LKFS_500Hz_2ch(): def test_23LKFS_500Hz_2ch():
data, rate = sf.read( data, rate = sf.read("./audio/loudness/1770-2_Comp_23LKFS_500Hz_2ch.wav")
"tests/audiotools/audio/loudness/1770-2_Comp_23LKFS_500Hz_2ch.wav")
meter = Meter(rate) meter = Meter(rate)
loudness = meter.integrated_loudness(data) loudness = meter.integrated_loudness(data)
@ -177,8 +166,7 @@ def test_23LKFS_500Hz_2ch():
def test_23LKFS_1000Hz_2ch(): def test_23LKFS_1000Hz_2ch():
data, rate = sf.read( data, rate = sf.read("./audio/loudness/1770-2_Comp_23LKFS_1000Hz_2ch.wav")
"tests/audiotools/audio/loudness/1770-2_Comp_23LKFS_1000Hz_2ch.wav")
meter = Meter(rate) meter = Meter(rate)
loudness = meter.integrated_loudness(data) loudness = meter.integrated_loudness(data)
@ -187,8 +175,7 @@ def test_23LKFS_1000Hz_2ch():
def test_23LKFS_2000Hz_2ch(): def test_23LKFS_2000Hz_2ch():
data, rate = sf.read( data, rate = sf.read("./audio/loudness/1770-2_Comp_23LKFS_2000Hz_2ch.wav")
"tests/audiotools/audio/loudness/1770-2_Comp_23LKFS_2000Hz_2ch.wav")
meter = Meter(rate) meter = Meter(rate)
loudness = meter.integrated_loudness(data) loudness = meter.integrated_loudness(data)
@ -197,8 +184,7 @@ def test_23LKFS_2000Hz_2ch():
def test_23LKFS_10000Hz_2ch(): def test_23LKFS_10000Hz_2ch():
data, rate = sf.read( data, rate = sf.read("./audio/loudness/1770-2_Comp_23LKFS_10000Hz_2ch.wav")
"tests/audiotools/audio/loudness/1770-2_Comp_23LKFS_10000Hz_2ch.wav")
meter = Meter(rate) meter = Meter(rate)
loudness = meter.integrated_loudness(data) loudness = meter.integrated_loudness(data)
@ -208,7 +194,7 @@ def test_23LKFS_10000Hz_2ch():
def test_18LKFS_frequency_sweep(): def test_18LKFS_frequency_sweep():
data, rate = sf.read( data, rate = sf.read(
"tests/audiotools/audio/loudness/1770-2_Comp_18LKFS_FrequencySweep.wav") "./audio/loudness/1770-2_Comp_18LKFS_FrequencySweep.wav")
meter = Meter(rate) meter = Meter(rate)
loudness = meter.integrated_loudness(data) loudness = meter.integrated_loudness(data)
@ -218,7 +204,7 @@ def test_18LKFS_frequency_sweep():
def test_conf_stereo_vinL_R_23LKFS(): def test_conf_stereo_vinL_R_23LKFS():
data, rate = sf.read( data, rate = sf.read(
"tests/audiotools/audio/loudness/1770-2_Conf_Stereo_VinL+R-23LKFS.wav") "./audio/loudness/1770-2_Conf_Stereo_VinL+R-23LKFS.wav")
meter = Meter(rate) meter = Meter(rate)
loudness = meter.integrated_loudness(data) loudness = meter.integrated_loudness(data)
@ -228,8 +214,7 @@ def test_conf_stereo_vinL_R_23LKFS():
def test_conf_monovoice_music_24LKFS(): def test_conf_monovoice_music_24LKFS():
data, rate = sf.read( data, rate = sf.read(
"tests/audiotools/audio/loudness/1770-2_Conf_Mono_Voice+Music-24LKFS.wav" "./audio/loudness/1770-2_Conf_Mono_Voice+Music-24LKFS.wav")
)
meter = Meter(rate) meter = Meter(rate)
loudness = meter.integrated_loudness(data) loudness = meter.integrated_loudness(data)
@ -239,8 +224,7 @@ def test_conf_monovoice_music_24LKFS():
def conf_monovoice_music_24LKFS(): def conf_monovoice_music_24LKFS():
data, rate = sf.read( data, rate = sf.read(
"tests/audiotools/audio/loudness/1770-2_Conf_Mono_Voice+Music-24LKFS.wav" "./audio/loudness/1770-2_Conf_Mono_Voice+Music-24LKFS.wav")
)
meter = Meter(rate) meter = Meter(rate)
loudness = meter.integrated_loudness(data) loudness = meter.integrated_loudness(data)
@ -250,8 +234,7 @@ def conf_monovoice_music_24LKFS():
def test_conf_monovoice_music_23LKFS(): def test_conf_monovoice_music_23LKFS():
data, rate = sf.read( data, rate = sf.read(
"tests/audiotools/audio/loudness/1770-2_Conf_Mono_Voice+Music-23LKFS.wav" "./audio/loudness/1770-2_Conf_Mono_Voice+Music-23LKFS.wav")
)
meter = Meter(rate) meter = Meter(rate)
loudness = meter.integrated_loudness(data) loudness = meter.integrated_loudness(data)
@ -266,7 +249,7 @@ def test_fir_accuracy():
transforms.HighPass(prob=0.5), transforms.HighPass(prob=0.5),
transforms.Equalizer(prob=0.5), transforms.Equalizer(prob=0.5),
prob=0.5, ) prob=0.5, )
loader = datasets.AudioLoader(sources=["tests/audiotools/audio/spk.csv"]) loader = datasets.AudioLoader(sources=["./audio/spk.csv"])
dataset = datasets.AudioDataset( dataset = datasets.AudioDataset(
loader, loader,
44100, 44100,

@ -66,8 +66,7 @@ def test_find_audio():
assert not audio_files assert not audio_files
# Make sure it works with single audio files # Make sure it works with single audio files
audio_files = util.find_audio( audio_files = util.find_audio("./audio/spk//f10_script4_produced.wav")
"tests/audiotools/audio/spk//f10_script4_produced.wav")
# Make sure it works with globs # Make sure it works with globs
audio_files = util.find_audio("tests/**/*.wav") audio_files = util.find_audio("tests/**/*.wav")

@ -45,7 +45,7 @@ def test_audio_dataset():
tfm.Silence(prob=0.5), tfm.Silence(prob=0.5),
], ) ], )
loader = audiotools.data.datasets.AudioLoader( loader = audiotools.data.datasets.AudioLoader(
sources=["tests/audiotools/audio/spk.csv"], sources=["./audio/spk.csv"],
transform=transform, ) transform=transform, )
dataset = audiotools.data.datasets.AudioDataset( dataset = audiotools.data.datasets.AudioDataset(
loader, loader,
@ -161,11 +161,10 @@ def test_loader_out_of_range():
def test_dataset_pipeline(): def test_dataset_pipeline():
transform = tfm.Compose([ transform = tfm.Compose([
tfm.RoomImpulseResponse(sources=["tests/audiotools/audio/irs.csv"]), tfm.RoomImpulseResponse(sources=["./audio/irs.csv"]),
tfm.BackgroundNoise(sources=["tests/audiotools/audio/noises.csv"]), tfm.BackgroundNoise(sources=["./audio/noises.csv"]),
]) ])
loader = audiotools.data.datasets.AudioLoader( loader = audiotools.data.datasets.AudioLoader(sources=["./audio/spk.csv"])
sources=["tests/audiotools/audio/spk.csv"])
dataset = audiotools.data.datasets.AudioDataset( dataset = audiotools.data.datasets.AudioDataset(
loader, loader,
44100, 44100,

@ -12,13 +12,11 @@ from audiotools.data import preprocess
def test_create_csv(): def test_create_csv():
with tempfile.NamedTemporaryFile(suffix=".csv") as f: with tempfile.NamedTemporaryFile(suffix=".csv") as f:
preprocess.create_csv( preprocess.create_csv(
find_audio("./tests/audiotools/audio/spk", ext=["wav"]), find_audio("././audio/spk", ext=["wav"]), f.name, loudness=True)
f.name,
loudness=True)
def test_create_csv_with_empty_rows(): def test_create_csv_with_empty_rows():
audio_files = find_audio("./tests/audiotools/audio/spk", ext=["wav"]) audio_files = find_audio("././audio/spk", ext=["wav"])
audio_files.insert(0, "") audio_files.insert(0, "")
audio_files.insert(2, "") audio_files.insert(2, "")

@ -49,13 +49,13 @@ def test_transform(transform_name):
kwargs = {} kwargs = {}
if transform_name == "BackgroundNoise": if transform_name == "BackgroundNoise":
kwargs["sources"] = ["tests/audiotools/audio/noises.csv"] kwargs["sources"] = ["./audio/noises.csv"]
if transform_name == "RoomImpulseResponse": if transform_name == "RoomImpulseResponse":
kwargs["sources"] = ["tests/audiotools/audio/irs.csv"] kwargs["sources"] = ["./audio/irs.csv"]
if transform_name == "CrossTalk": if transform_name == "CrossTalk":
kwargs["sources"] = ["tests/audiotools/audio/spk.csv"] kwargs["sources"] = ["./audio/spk.csv"]
audio_path = "tests/audiotools/audio/spk/f10_script4_produced.wav" audio_path = "./audio/spk/f10_script4_produced.wav"
signal = AudioSignal(audio_path, offset=10, duration=2) signal = AudioSignal(audio_path, offset=10, duration=2)
signal.metadata["loudness"] = AudioSignal( signal.metadata["loudness"] = AudioSignal(
audio_path).ffmpeg_loudness().item() audio_path).ffmpeg_loudness().item()
@ -102,12 +102,12 @@ def test_transform(transform_name):
def test_compose_basic(): def test_compose_basic():
seed = 0 seed = 0
audio_path = "tests/audiotools/audio/spk/f10_script4_produced.wav" audio_path = "./audio/spk/f10_script4_produced.wav"
signal = AudioSignal(audio_path, offset=10, duration=2) signal = AudioSignal(audio_path, offset=10, duration=2)
transform = tfm.Compose( transform = tfm.Compose(
[ [
tfm.RoomImpulseResponse(sources=["tests/audiotools/audio/irs.csv"]), tfm.RoomImpulseResponse(sources=["./audio/irs.csv"]),
tfm.BackgroundNoise(sources=["tests/audiotools/audio/noises.csv"]), tfm.BackgroundNoise(sources=["./audio/noises.csv"]),
], ) ], )
kwargs = transform.instantiate(seed, signal) kwargs = transform.instantiate(seed, signal)
@ -143,7 +143,7 @@ def test_compose_with_duplicate_transforms():
full_mul = np.prod(muls) full_mul = np.prod(muls)
kwargs = transform.instantiate(0) kwargs = transform.instantiate(0)
audio_path = "tests/audiotools/audio/spk/f10_script4_produced.wav" audio_path = "./audio/spk/f10_script4_produced.wav"
signal = AudioSignal(audio_path, offset=10, duration=2) signal = AudioSignal(audio_path, offset=10, duration=2)
output = transform(signal.clone(), **kwargs) output = transform(signal.clone(), **kwargs)
@ -162,7 +162,7 @@ def test_nested_compose():
full_mul = np.prod(muls) full_mul = np.prod(muls)
kwargs = transform.instantiate(0) kwargs = transform.instantiate(0)
audio_path = "tests/audiotools/audio/spk/f10_script4_produced.wav" audio_path = "./audio/spk/f10_script4_produced.wav"
signal = AudioSignal(audio_path, offset=10, duration=2) signal = AudioSignal(audio_path, offset=10, duration=2)
output = transform(signal.clone(), **kwargs) output = transform(signal.clone(), **kwargs)
@ -176,7 +176,7 @@ def test_compose_filtering():
transform = tfm.Compose([MulTransform(x, name=str(x)) for x in muls]) transform = tfm.Compose([MulTransform(x, name=str(x)) for x in muls])
kwargs = transform.instantiate(0) kwargs = transform.instantiate(0)
audio_path = "tests/audiotools/audio/spk/f10_script4_produced.wav" audio_path = "./audio/spk/f10_script4_produced.wav"
signal = AudioSignal(audio_path, offset=10, duration=2) signal = AudioSignal(audio_path, offset=10, duration=2)
for s in range(len(muls)): for s in range(len(muls)):
@ -199,7 +199,7 @@ def test_sequential_compose():
full_mul = np.prod(muls) full_mul = np.prod(muls)
kwargs = transform.instantiate(0) kwargs = transform.instantiate(0)
audio_path = "tests/audiotools/audio/spk/f10_script4_produced.wav" audio_path = "./audio/spk/f10_script4_produced.wav"
signal = AudioSignal(audio_path, offset=10, duration=2) signal = AudioSignal(audio_path, offset=10, duration=2)
output = transform(signal.clone(), **kwargs) output = transform(signal.clone(), **kwargs)
@ -210,11 +210,11 @@ def test_sequential_compose():
def test_choose_basic(): def test_choose_basic():
seed = 0 seed = 0
audio_path = "tests/audiotools/audio/spk/f10_script4_produced.wav" audio_path = "./audio/spk/f10_script4_produced.wav"
signal = AudioSignal(audio_path, offset=10, duration=2) signal = AudioSignal(audio_path, offset=10, duration=2)
transform = tfm.Choose([ transform = tfm.Choose([
tfm.RoomImpulseResponse(sources=["tests/audiotools/audio/irs.csv"]), tfm.RoomImpulseResponse(sources=["./audio/irs.csv"]),
tfm.BackgroundNoise(sources=["tests/audiotools/audio/noises.csv"]), tfm.BackgroundNoise(sources=["./audio/noises.csv"]),
]) ])
kwargs = transform.instantiate(seed, signal) kwargs = transform.instantiate(seed, signal)
@ -251,7 +251,7 @@ def test_choose_basic():
def test_choose_weighted(): def test_choose_weighted():
seed = 0 seed = 0
audio_path = "tests/audiotools/audio/spk/f10_script4_produced.wav" audio_path = "./audio/spk/f10_script4_produced.wav"
transform = tfm.Choose( transform = tfm.Choose(
[ [
MulTransform(0.0), MulTransform(0.0),
@ -277,7 +277,7 @@ def test_choose_weighted():
def test_choose_with_compose(): def test_choose_with_compose():
audio_path = "tests/audiotools/audio/spk/f10_script4_produced.wav" audio_path = "./audio/spk/f10_script4_produced.wav"
signal = AudioSignal(audio_path, offset=10, duration=2) signal = AudioSignal(audio_path, offset=10, duration=2)
transform = tfm.Choose([ transform = tfm.Choose([
@ -296,7 +296,7 @@ def test_choose_with_compose():
def test_repeat(): def test_repeat():
seed = 0 seed = 0
audio_path = "tests/audiotools/audio/spk/f10_script4_produced.wav" audio_path = "./audio/spk/f10_script4_produced.wav"
signal = AudioSignal(audio_path, offset=10, duration=2) signal = AudioSignal(audio_path, offset=10, duration=2)
kwargs = {} kwargs = {}
@ -356,7 +356,7 @@ class DummyData(paddle.io.Dataset):
def test_masking(): def test_masking():
dataset = DummyData("tests/audiotools/audio/spk/f10_script4_produced.wav") dataset = DummyData("./audio/spk/f10_script4_produced.wav")
dataloader = paddle.io.DataLoader( dataloader = paddle.io.DataLoader(
dataset, dataset,
batch_size=16, batch_size=16,
@ -385,8 +385,7 @@ def test_nested_masking():
], ],
prob=0.9, ) prob=0.9, )
loader = audiotools.data.datasets.AudioLoader( loader = audiotools.data.datasets.AudioLoader(sources=["./audio/spk.csv"])
sources=["tests/audiotools/audio/spk.csv"])
dataset = audiotools.data.datasets.AudioDataset( dataset = audiotools.data.datasets.AudioDataset(
loader, loader,
44100, 44100,

@ -0,0 +1,4 @@
python -m pip install -r ../audiotools/requirements.txt
# wget -P ./test_data https://paddlespeech.bj.bcebos.com/datasets/unit_test/asr/static_ds2online_inputs.pickle
# wget
find . -name "*✅.py" | xargs python -m pytest

@ -13,8 +13,7 @@ def test_audio_table():
audio_dict = {} audio_dict = {}
audio_dict["inputs"] = [ audio_dict["inputs"] = [
AudioSignal.excerpt( AudioSignal.excerpt("./audio/spk/f10_script4_produced.wav", duration=5)
"tests/audiotools/audio/spk/f10_script4_produced.wav", duration=5)
for _ in range(3) for _ in range(3)
] ]
audio_dict["outputs"] = [] audio_dict["outputs"] = []

@ -31,6 +31,13 @@ function main(){
cd ${speech_ci_path}/server/offline cd ${speech_ci_path}/server/offline
bash test_server_client.sh bash test_server_client.sh
echo "End server" echo "End server"
echo "Start testing audiotools"
cd ${speech_ci_path}/../../audio/tests/audiotools
bash test_audiotools.sh
echo "End testing audiotools"
} }
main main

@ -115,3 +115,4 @@ paddlespeech whisper --task translate --input ./zh.wav
paddlespeech whisper --lang en --size base --task transcribe --input ./en.wav paddlespeech whisper --lang en --size base --task transcribe --input ./en.wav
echo -e "\033[32mTest success !!!\033[0m" echo -e "\033[32mTest success !!!\033[0m"

Loading…
Cancel
Save