Add librosa and soundfile unittest.

4 years ago · a85250cf16
parent 169040b4a2
commit a85250cf16
6 changed files with 363 additions and 1 deletions
--- a/paddleaudio/setup.py
+++ b/paddleaudio/setup.py
@ -82,7 +82,9 @@ setuptools.setup(
        'dtaidistance >= 2.3.6',
        'mcd >= 0.4',
    ],
-    setup_requires=['nose'],
+    setup_requires=[
+        'nose', 'librosa==0.8.1', 'soundfile==0.10.3.post1', 'filecmp'
+    ],
    cmdclass={
        'install': InstallCommand,
        'test': NoseTestCommand,
--- a/paddleaudio/tests/backends/init.py
+++ b/paddleaudio/tests/backends/init.py
@ -0,0 +1,13 @@
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
--- a/paddleaudio/tests/backends/base.py
+++ b/paddleaudio/tests/backends/base.py
@ -0,0 +1,34 @@
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import os
+import unittest
+import urllib.request
+
+mono_channel_wav = 'https://paddlespeech.bj.bcebos.com/PaddleAudio/zh.wav'
+multi_channels_wav = 'https://paddlespeech.bj.bcebos.com/PaddleAudio/cat.wav'
+
+
+class BackendTest(unittest.TestCase):
+    def setUp(self):
+        self.initWavInput()
+
+    def initWavInput(self):
+        self.files = []
+        for url in [mono_channel_wav, multi_channels_wav]:
+            if not os.path.isfile(os.path.basename(url)):
+                urllib.request.urlretrieve(url, os.path.basename(url))
+            self.files.append(os.path.basename(url))
+
+    def initParmas(self):
+        raise NotImplementedError
--- a/paddleaudio/tests/backends/soundfile/init.py
+++ b/paddleaudio/tests/backends/soundfile/init.py
@ -0,0 +1,13 @@
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
--- a/paddleaudio/tests/backends/soundfile/test_io.py
+++ b/paddleaudio/tests/backends/soundfile/test_io.py
@ -0,0 +1,73 @@
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import filecmp
+import os
+import unittest
+
+import numpy as np
+import soundfile as sf
+
+import paddleaudio
+from ..base import BackendTest
+
+
+class TestIO(BackendTest):
+    def test_load_mono_channel(self):
+        sf_data, sf_sr = sf.read(self.files[0])
+        pa_data, pa_sr = paddleaudio.load(
+            self.files[0], normal=False, dtype='float64')
+
+        self.assertEqual(sf_data.dtype, pa_data.dtype)
+        self.assertEqual(sf_sr, pa_sr)
+        np.testing.assert_array_almost_equal(sf_data, pa_data)
+
+    def test_load_multi_channels(self):
+        sf_data, sf_sr = sf.read(self.files[1])
+        sf_data = sf_data.T  # Channel dim first
+        pa_data, pa_sr = paddleaudio.load(
+            self.files[1], mono=False, normal=False, dtype='float64')
+
+        self.assertEqual(sf_data.dtype, pa_data.dtype)
+        self.assertEqual(sf_sr, pa_sr)
+        np.testing.assert_array_almost_equal(sf_data, pa_data)
+
+    def test_save_mono_channel(self):
+        waveform, sr = np.random.randint(
+            low=-32768, high=32768, size=(48000), dtype=np.int16), 16000
+        sf_tmp_file = 'sf_tmp.wav'
+        pa_tmp_file = 'pa_tmp.wav'
+
+        sf.write(sf_tmp_file, waveform, sr)
+        paddleaudio.save(waveform, sr, pa_tmp_file)
+
+        self.assertTrue(filecmp.cmp(sf_tmp_file, pa_tmp_file))
+        for file in [sf_tmp_file, pa_tmp_file]:
+            os.remove(file)
+
+    def test_save_multi_channels(self):
+        waveform, sr = np.random.randint(
+            low=-32768, high=32768, size=(2, 48000), dtype=np.int16), 16000
+        sf_tmp_file = 'sf_tmp.wav'
+        pa_tmp_file = 'pa_tmp.wav'
+
+        sf.write(sf_tmp_file, waveform.T, sr)
+        paddleaudio.save(waveform.T, sr, pa_tmp_file)
+
+        self.assertTrue(filecmp.cmp(sf_tmp_file, pa_tmp_file))
+        for file in [sf_tmp_file, pa_tmp_file]:
+            os.remove(file)
+
+
+if __name__ == '__main__':
+    unittest.main()
--- a/paddleaudio/tests/features/test_librosa.py
+++ b/paddleaudio/tests/features/test_librosa.py
@ -0,0 +1,227 @@
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import unittest
+
+import librosa
+import numpy as np
+import paddle
+
+import paddleaudio
+from .base import FeatTest
+from paddleaudio.functional.window import get_window
+
+
+class TestLibrosa(FeatTest):
+    def initParmas(self):
+        self.n_fft = 512
+        self.hop_length = 128
+        self.n_mels = 40
+        self.fmin = 0.0
+        self.window_str = 'hann'
+        self.pad_mode = 'reflect'
+
+    def test_stft(self):
+        if len(self.waveform.shape) == 2:  # (C, T)
+            self.waveform = self.waveform.squeeze(
+                0)  # 1D input for librosa.feature.melspectrogram
+
+        feature_librosa = librosa.core.stft(
+            y=self.waveform,
+            n_fft=self.n_fft,
+            hop_length=self.hop_length,
+            win_length=None,
+            window=self.window_str,
+            center=True,
+            dtype=None,
+            pad_mode=self.pad_mode, )
+        x = paddle.to_tensor(self.waveform).unsqueeze(0)
+        window = get_window(self.window_str, self.n_fft, dtype=x.dtype)
+        feature_paddle = paddle.signal.stft(
+            x=x,
+            n_fft=self.n_fft,
+            hop_length=self.hop_length,
+            win_length=None,
+            window=window,
+            center=True,
+            pad_mode=self.pad_mode,
+            normalized=False,
+            onesided=True, ).squeeze(0)
+
+        np.testing.assert_array_almost_equal(
+            feature_librosa, feature_paddle, decimal=5)
+
+    def test_istft(self):
+        if len(self.waveform.shape) == 2:  # (C, T)
+            self.waveform = self.waveform.squeeze(
+                0)  # 1D input for librosa.feature.melspectrogram
+
+        # Get stft result from librosa.
+        stft_matrix = librosa.core.stft(
+            y=self.waveform,
+            n_fft=self.n_fft,
+            hop_length=self.hop_length,
+            win_length=None,
+            window=self.window_str,
+            center=True,
+            pad_mode=self.pad_mode, )
+
+        feature_librosa = librosa.core.istft(
+            stft_matrix=stft_matrix,
+            hop_length=self.hop_length,
+            win_length=None,
+            window=self.window_str,
+            center=True,
+            dtype=None,
+            length=None, )
+
+        x = paddle.to_tensor(stft_matrix).unsqueeze(0)
+        window = get_window(
+            self.window_str,
+            self.n_fft,
+            dtype=paddle.to_tensor(self.waveform).dtype)
+        feature_paddle = paddle.signal.istft(
+            x=x,
+            n_fft=self.n_fft,
+            hop_length=self.hop_length,
+            win_length=None,
+            window=window,
+            center=True,
+            normalized=False,
+            onesided=True,
+            length=None,
+            return_complex=False, ).squeeze(0)
+
+        np.testing.assert_array_almost_equal(
+            feature_librosa, feature_paddle, decimal=5)
+
+    def test_mel(self):
+        feature_librosa = librosa.filters.mel(
+            sr=self.sr,
+            n_fft=self.n_fft,
+            n_mels=self.n_mels,
+            fmin=self.fmin,
+            fmax=None,
+            htk=False,
+            norm='slaney',
+            dtype=self.waveform.dtype, )
+        feature_compliance = paddleaudio.compliance.librosa.compute_fbank_matrix(
+            sr=self.sr,
+            n_fft=self.n_fft,
+            n_mels=self.n_mels,
+            fmin=self.fmin,
+            fmax=None,
+            htk=False,
+            norm='slaney',
+            dtype=self.waveform.dtype, )
+        x = paddle.to_tensor(self.waveform)
+        feature_functional = paddleaudio.functional.compute_fbank_matrix(
+            sr=self.sr,
+            n_fft=self.n_fft,
+            n_mels=self.n_mels,
+            f_min=self.fmin,
+            f_max=None,
+            htk=False,
+            norm='slaney',
+            dtype=x.dtype, )
+
+        np.testing.assert_array_almost_equal(feature_librosa,
+                                             feature_compliance)
+        np.testing.assert_array_almost_equal(feature_librosa,
+                                             feature_functional)
+
+    def test_melspect(self):
+        if len(self.waveform.shape) == 2:  # (C, T)
+            self.waveform = self.waveform.squeeze(
+                0)  # 1D input for librosa.feature.melspectrogram
+
+        # librosa:
+        feature_librosa = librosa.feature.melspectrogram(
+            y=self.waveform,
+            sr=self.sr,
+            n_fft=self.n_fft,
+            hop_length=self.hop_length,
+            n_mels=self.n_mels,
+            fmin=self.fmin)
+
+        # paddleaudio.compliance.librosa:
+        feature_compliance = paddleaudio.compliance.librosa.melspectrogram(
+            x=self.waveform,
+            sr=self.sr,
+            window_size=self.n_fft,
+            hop_length=self.hop_length,
+            n_mels=self.n_mels,
+            fmin=self.fmin,
+            to_db=False)
+
+        # paddleaudio.features.layer
+        x = paddle.to_tensor(
+            self.waveform, dtype=paddle.float64).unsqueeze(0)  # Add batch dim.
+        feature_extractor = paddleaudio.features.MelSpectrogram(
+            sr=self.sr,
+            n_fft=self.n_fft,
+            hop_length=self.hop_length,
+            n_mels=self.n_mels,
+            f_min=self.fmin,
+            dtype=x.dtype)
+        feature_layer = feature_extractor(x).squeeze(0).numpy()
+
+        np.testing.assert_array_almost_equal(
+            feature_librosa, feature_compliance, decimal=5)
+        np.testing.assert_array_almost_equal(
+            feature_librosa, feature_layer, decimal=5)
+
+    def test_log_melspect(self):
+        if len(self.waveform.shape) == 2:  # (C, T)
+            self.waveform = self.waveform.squeeze(
+                0)  # 1D input for librosa.feature.melspectrogram
+
+        # librosa:
+        feature_librosa = librosa.feature.melspectrogram(
+            y=self.waveform,
+            sr=self.sr,
+            n_fft=self.n_fft,
+            hop_length=self.hop_length,
+            n_mels=self.n_mels,
+            fmin=self.fmin)
+        feature_librosa = librosa.power_to_db(feature_librosa, top_db=None)
+
+        # paddleaudio.compliance.librosa:
+        feature_compliance = paddleaudio.compliance.librosa.melspectrogram(
+            x=self.waveform,
+            sr=self.sr,
+            window_size=self.n_fft,
+            hop_length=self.hop_length,
+            n_mels=self.n_mels,
+            fmin=self.fmin)
+
+        # paddleaudio.features.layer
+        x = paddle.to_tensor(
+            self.waveform, dtype=paddle.float64).unsqueeze(0)  # Add batch dim.
+        feature_extractor = paddleaudio.features.LogMelSpectrogram(
+            sr=self.sr,
+            n_fft=self.n_fft,
+            hop_length=self.hop_length,
+            n_mels=self.n_mels,
+            f_min=self.fmin,
+            dtype=x.dtype)
+        feature_layer = feature_extractor(x).squeeze(0).numpy()
+
+        np.testing.assert_array_almost_equal(
+            feature_librosa, feature_compliance, decimal=5)
+        np.testing.assert_array_almost_equal(
+            feature_librosa, feature_layer, decimal=4)
+
+
+if __name__ == '__main__':
+    unittest.main()