From 33eba3370bd6a1e12dafe8d3a67a2a32832bdc29 Mon Sep 17 00:00:00 2001
From: drryanhuang <zihaohuang@aliyun.com>
Date: Tue, 31 Dec 2024 08:05:56 +0000
Subject: [PATCH] fix slice && tensor.to

---
 audio/audiotools/core/audio_signal.py         | 19 +++---------------
 audio/audiotools/core/util.py                 | 20 ++++++++++++++++---
 .../audiotools/core/test_audio_signal✅.py   |  8 ++++++--
 3 files changed, 26 insertions(+), 21 deletions(-)

diff --git a/audio/audiotools/core/audio_signal.py b/audio/audiotools/core/audio_signal.py
index 68de04731..92cb88353 100644
--- a/audio/audiotools/core/audio_signal.py
+++ b/audio/audiotools/core/audio_signal.py
@@ -764,19 +764,6 @@ class AudioSignal(
         self.sample_rate = sample_rate
         return self
 
-    @staticmethod
-    def move_to_device(data, device):
-        if device is None or device == "":
-            return data
-        elif device == 'cpu':
-            return paddle.to_tensor(data, place=paddle.CPUPlace())
-        elif device in ('gpu', 'cuda'):
-            return paddle.to_tensor(data, place=paddle.CUDAPlace())
-        else:
-            device = device.replace("cuda",
-                                    "gpu") if "cuda" in device else device
-            return data.to(device)
-
     # Tensor operations
     def to(self, device: str):
         """Moves all tensors contained in signal to the specified device.
@@ -793,11 +780,11 @@ class AudioSignal(
             AudioSignal with all tensors moved to specified device.
         """
         if self._loudness is not None:
-            self._loudness = self.move_to_device(self._loudness, device)
+            self._loudness = util.move_to_device(self._loudness, device)
         if self.stft_data is not None:
-            self.stft_data = self.move_to_device(self.stft_data, device)
+            self.stft_data = util.move_to_device(self.stft_data, device)
         if self.audio_data is not None:
-            self.audio_data = self.move_to_device(self.audio_data, device)
+            self.audio_data = util.move_to_device(self.audio_data, device)
         return self
 
     def float(self):
diff --git a/audio/audiotools/core/util.py b/audio/audiotools/core/util.py
index cf9f99636..0881dd0b4 100644
--- a/audio/audiotools/core/util.py
+++ b/audio/audiotools/core/util.py
@@ -79,7 +79,7 @@ def bool_index_compat(x, mask):
         - For versions below 2.6, the tensor and mask are converted to NumPy arrays, the indexing
           operation is performed using NumPy, and the result is converted back to a PaddlePaddle tensor.
     """
-    if satisfy_paddle_version("2.6") or isinstance(mask, (int, list)):
+    if satisfy_paddle_version("2.6") or isinstance(mask, (int, list, slice)):
         return x[mask]
     else:
         x_np = x.cpu().numpy()[mask.cpu().numpy()]
@@ -389,6 +389,18 @@ def chdir(newdir: typing.Union[Path, str]):
         os.chdir(curdir)
 
 
+def move_to_device(data, device):
+    if device is None or device == "":
+        return data
+    elif device == 'cpu':
+        return paddle.to_tensor(data, place=paddle.CPUPlace())
+    elif device in ('gpu', 'cuda'):
+        return paddle.to_tensor(data, place=paddle.CUDAPlace())
+    else:
+        device = device.replace("cuda", "gpu") if "cuda" in device else device
+        return data.to(device)
+
+
 def prepare_batch(batch: typing.Union[dict, list, paddle.Tensor],
                   device: str="cpu"):
     """Moves items in a batch (typically generated by a DataLoader as a list
@@ -413,12 +425,14 @@ def prepare_batch(batch: typing.Union[dict, list, paddle.Tensor],
         batch = flatten(batch)
         for key, val in batch.items():
             try:
-                batch[key] = val.to(device)
+                # batch[key] = val.to(device)
+                batch[key] = move_to_device(val, device)
             except:
                 pass
         batch = unflatten(batch)
     elif paddle.is_tensor(batch):
-        batch = batch.to(device)
+        # batch = batch.to(device)
+        batch = move_to_device(batch, device)
     elif isinstance(batch, list):
         for i in range(len(batch)):
             try:
diff --git a/audio/tests/audiotools/core/test_audio_signal✅.py b/audio/tests/audiotools/core/test_audio_signal✅.py
index 3bcb4a166..bd22b692c 100644
--- a/audio/tests/audiotools/core/test_audio_signal✅.py
+++ b/audio/tests/audiotools/core/test_audio_signal✅.py
@@ -244,7 +244,9 @@ def test_indexing():
     indexed = sig1[mask]
 
     assert np.allclose(indexed.audio_data, sig1.audio_data[mask])
-    assert np.allclose(indexed.stft_data, sig1.stft_data[mask])
+    # assert np.allclose(indexed.stft_data, sig1.stft_data[mask])
+    assert np.allclose(indexed.stft_data,
+                       util.bool_index_compat(sig1.stft_data, mask))
     assert np.allclose(indexed._loudness, sig1._loudness[mask])
 
     # Set parts of signal using tensor
@@ -282,7 +284,9 @@ def test_indexing():
         a1 = getattr(sig1, k)
         a2 = getattr(sig2, k)
 
-        assert np.allclose(a1[mask], a2[mask])
+        # assert np.allclose(a1[mask], a2[mask])
+        assert np.allclose(
+            util.bool_index_compat(a1, mask), util.bool_index_compat(a2, mask))
 
 
 def test_zeros():