diff --git a/audio/audiotools/core/loudness.py b/audio/audiotools/core/loudness.py
index 4d85edf62..c852dea99 100644
--- a/audio/audiotools/core/loudness.py
+++ b/audio/audiotools/core/loudness.py
@@ -10,9 +10,9 @@ import scipy
 from . import _julius
 
 
-def unfold(_input, kernel_size: int, stride: int):
-    """1D only unfolding similar to the one from PyTorch.
-    However PyTorch unfold is extremely slow.
+def _unfold1d(x, kernel_size, stride):
+    # https://github.com/PaddlePaddle/Paddle/pull/70102
+    """1D only unfolding similar to the one from Paddlepaddle.
 
     Given an _input tensor of size `[*, T]` this will return
     a tensor `[*, F, K]` with `K` the kernel size, and `F` the number
@@ -28,21 +28,28 @@ def unfold(_input, kernel_size: int, stride: int):
 
         - Inputs: `_input` is `[*, T]`
         - Output: `[*, F, kernel_size]` with `F = 1 + ceil((T - kernel_size) / stride)`
-
-    ..Warning:: unlike PyTorch unfold, this will pad the _input
-        so that any position in `_input` is covered by at least one frame.
     """
-    shape = list(_input.shape)
-    length = shape.pop(-1)
+
+    if 3 != x.dim():
+        raise NotImplementedError
+
+    N, C, length = x.shape
+    x = x.reshape([N * C, 1, length])
+
     n_frames = math.ceil((max(length, kernel_size) - kernel_size) / stride) + 1
     tgt_length = (n_frames - 1) * stride + kernel_size
-    padded = F.pad(_input, (0, tgt_length - length), data_format="NCL")
-    strides: typing.List[int] = []
-    for dim in range(padded.dim()):
-        strides.append(padded.strides[dim])
-    assert strides.pop(-1) == 1, "data should be contiguous"
-    strides = strides + [stride, 1]
-    return padded.as_strided(shape + [n_frames, kernel_size], strides)
+    x = F.pad(x, (0, tgt_length - length), data_format="NCL")
+
+    x = x.unsqueeze(-1)
+
+    unfolded = paddle.nn.functional.unfold(
+        x,
+        kernel_sizes=(kernel_size, 1),
+        strides=(stride, 1), )
+
+    unfolded = unfolded.transpose([0, 2, 1])
+    unfolded = unfolded.reshape([N, C, *unfolded.shape[1:]])
+    return unfolded
 
 
 class Meter(paddle.nn.Layer):
@@ -218,8 +225,8 @@ class Meter(paddle.nn.Layer):
 
         kernel_size = int(T_g * self.rate)
         stride = int(T_g * self.rate * step)
-        print("--", kernel_size, stride)
-        unfolded = unfold(input_data.transpose([0, 2, 1]), kernel_size, stride)
+        unfolded = _unfold1d(
+            input_data.transpose([0, 2, 1]), kernel_size, stride)
         unfolded = unfolded.transpose([0, 1, 3, 2])
 
         return unfolded