diff --git a/audio/audiotools/core/loudness.py b/audio/audiotools/core/loudness.py index 4d85edf62..c852dea99 100644 --- a/audio/audiotools/core/loudness.py +++ b/audio/audiotools/core/loudness.py @@ -10,9 +10,9 @@ import scipy from . import _julius -def unfold(_input, kernel_size: int, stride: int): - """1D only unfolding similar to the one from PyTorch. - However PyTorch unfold is extremely slow. +def _unfold1d(x, kernel_size, stride): + # https://github.com/PaddlePaddle/Paddle/pull/70102 + """1D only unfolding similar to the one from Paddlepaddle. Given an _input tensor of size `[*, T]` this will return a tensor `[*, F, K]` with `K` the kernel size, and `F` the number @@ -28,21 +28,28 @@ def unfold(_input, kernel_size: int, stride: int): - Inputs: `_input` is `[*, T]` - Output: `[*, F, kernel_size]` with `F = 1 + ceil((T - kernel_size) / stride)` - - ..Warning:: unlike PyTorch unfold, this will pad the _input - so that any position in `_input` is covered by at least one frame. """ - shape = list(_input.shape) - length = shape.pop(-1) + + if 3 != x.dim(): + raise NotImplementedError + + N, C, length = x.shape + x = x.reshape([N * C, 1, length]) + n_frames = math.ceil((max(length, kernel_size) - kernel_size) / stride) + 1 tgt_length = (n_frames - 1) * stride + kernel_size - padded = F.pad(_input, (0, tgt_length - length), data_format="NCL") - strides: typing.List[int] = [] - for dim in range(padded.dim()): - strides.append(padded.strides[dim]) - assert strides.pop(-1) == 1, "data should be contiguous" - strides = strides + [stride, 1] - return padded.as_strided(shape + [n_frames, kernel_size], strides) + x = F.pad(x, (0, tgt_length - length), data_format="NCL") + + x = x.unsqueeze(-1) + + unfolded = paddle.nn.functional.unfold( + x, + kernel_sizes=(kernel_size, 1), + strides=(stride, 1), ) + + unfolded = unfolded.transpose([0, 2, 1]) + unfolded = unfolded.reshape([N, C, *unfolded.shape[1:]]) + return unfolded class Meter(paddle.nn.Layer): @@ -218,8 +225,8 @@ class Meter(paddle.nn.Layer): kernel_size = int(T_g * self.rate) stride = int(T_g * self.rate * step) - print("--", kernel_size, stride) - unfolded = unfold(input_data.transpose([0, 2, 1]), kernel_size, stride) + unfolded = _unfold1d( + input_data.transpose([0, 2, 1]), kernel_size, stride) unfolded = unfolded.transpose([0, 1, 3, 2]) return unfolded