From 33eba3370bd6a1e12dafe8d3a67a2a32832bdc29 Mon Sep 17 00:00:00 2001 From: drryanhuang Date: Tue, 31 Dec 2024 08:05:56 +0000 Subject: [PATCH] fix slice && tensor.to --- audio/audiotools/core/audio_signal.py | 19 +++--------------- audio/audiotools/core/util.py | 20 ++++++++++++++++--- .../audiotools/core/test_audio_signal✅.py | 8 ++++++-- 3 files changed, 26 insertions(+), 21 deletions(-) diff --git a/audio/audiotools/core/audio_signal.py b/audio/audiotools/core/audio_signal.py index 68de04731..92cb88353 100644 --- a/audio/audiotools/core/audio_signal.py +++ b/audio/audiotools/core/audio_signal.py @@ -764,19 +764,6 @@ class AudioSignal( self.sample_rate = sample_rate return self - @staticmethod - def move_to_device(data, device): - if device is None or device == "": - return data - elif device == 'cpu': - return paddle.to_tensor(data, place=paddle.CPUPlace()) - elif device in ('gpu', 'cuda'): - return paddle.to_tensor(data, place=paddle.CUDAPlace()) - else: - device = device.replace("cuda", - "gpu") if "cuda" in device else device - return data.to(device) - # Tensor operations def to(self, device: str): """Moves all tensors contained in signal to the specified device. @@ -793,11 +780,11 @@ class AudioSignal( AudioSignal with all tensors moved to specified device. """ if self._loudness is not None: - self._loudness = self.move_to_device(self._loudness, device) + self._loudness = util.move_to_device(self._loudness, device) if self.stft_data is not None: - self.stft_data = self.move_to_device(self.stft_data, device) + self.stft_data = util.move_to_device(self.stft_data, device) if self.audio_data is not None: - self.audio_data = self.move_to_device(self.audio_data, device) + self.audio_data = util.move_to_device(self.audio_data, device) return self def float(self): diff --git a/audio/audiotools/core/util.py b/audio/audiotools/core/util.py index cf9f99636..0881dd0b4 100644 --- a/audio/audiotools/core/util.py +++ b/audio/audiotools/core/util.py @@ -79,7 +79,7 @@ def bool_index_compat(x, mask): - For versions below 2.6, the tensor and mask are converted to NumPy arrays, the indexing operation is performed using NumPy, and the result is converted back to a PaddlePaddle tensor. """ - if satisfy_paddle_version("2.6") or isinstance(mask, (int, list)): + if satisfy_paddle_version("2.6") or isinstance(mask, (int, list, slice)): return x[mask] else: x_np = x.cpu().numpy()[mask.cpu().numpy()] @@ -389,6 +389,18 @@ def chdir(newdir: typing.Union[Path, str]): os.chdir(curdir) +def move_to_device(data, device): + if device is None or device == "": + return data + elif device == 'cpu': + return paddle.to_tensor(data, place=paddle.CPUPlace()) + elif device in ('gpu', 'cuda'): + return paddle.to_tensor(data, place=paddle.CUDAPlace()) + else: + device = device.replace("cuda", "gpu") if "cuda" in device else device + return data.to(device) + + def prepare_batch(batch: typing.Union[dict, list, paddle.Tensor], device: str="cpu"): """Moves items in a batch (typically generated by a DataLoader as a list @@ -413,12 +425,14 @@ def prepare_batch(batch: typing.Union[dict, list, paddle.Tensor], batch = flatten(batch) for key, val in batch.items(): try: - batch[key] = val.to(device) + # batch[key] = val.to(device) + batch[key] = move_to_device(val, device) except: pass batch = unflatten(batch) elif paddle.is_tensor(batch): - batch = batch.to(device) + # batch = batch.to(device) + batch = move_to_device(batch, device) elif isinstance(batch, list): for i in range(len(batch)): try: diff --git a/audio/tests/audiotools/core/test_audio_signal✅.py b/audio/tests/audiotools/core/test_audio_signal✅.py index 3bcb4a166..bd22b692c 100644 --- a/audio/tests/audiotools/core/test_audio_signal✅.py +++ b/audio/tests/audiotools/core/test_audio_signal✅.py @@ -244,7 +244,9 @@ def test_indexing(): indexed = sig1[mask] assert np.allclose(indexed.audio_data, sig1.audio_data[mask]) - assert np.allclose(indexed.stft_data, sig1.stft_data[mask]) + # assert np.allclose(indexed.stft_data, sig1.stft_data[mask]) + assert np.allclose(indexed.stft_data, + util.bool_index_compat(sig1.stft_data, mask)) assert np.allclose(indexed._loudness, sig1._loudness[mask]) # Set parts of signal using tensor @@ -282,7 +284,9 @@ def test_indexing(): a1 = getattr(sig1, k) a2 = getattr(sig2, k) - assert np.allclose(a1[mask], a2[mask]) + # assert np.allclose(a1[mask], a2[mask]) + assert np.allclose( + util.bool_index_compat(a1, mask), util.bool_index_compat(a2, mask)) def test_zeros():