Add librosa and soundfile unittest.

4 years ago · a85250cf16
parent 169040b4a2
commit a85250cf16
6 changed files with 363 additions and 1 deletions
--- a/paddleaudio/setup.py
+++ b/paddleaudio/setup.py
@ -82,7 +82,9 @@ setuptools.setup(
        'dtaidistance >= 2.3.6',
        'mcd >= 0.4',
    ],
-    setup_requires=['nose'],
+    setup_requires=[
        'nose', 'librosa==0.8.1', 'soundfile==0.10.3.post1', 'filecmp'
    ],
    cmdclass={
        'install': InstallCommand,
        'test': NoseTestCommand,
--- a/paddleaudio/tests/backends/init.py
+++ b/paddleaudio/tests/backends/init.py
@ -0,0 +1,13 @@
 # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
--- a/paddleaudio/tests/backends/base.py
+++ b/paddleaudio/tests/backends/base.py
@ -0,0 +1,34 @@
 # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import os
 import unittest
 import urllib.request
 mono_channel_wav = 'https://paddlespeech.bj.bcebos.com/PaddleAudio/zh.wav'
 multi_channels_wav = 'https://paddlespeech.bj.bcebos.com/PaddleAudio/cat.wav'
 class BackendTest(unittest.TestCase):
    def setUp(self):
        self.initWavInput()
    def initWavInput(self):
        self.files = []
        for url in [mono_channel_wav, multi_channels_wav]:
            if not os.path.isfile(os.path.basename(url)):
                urllib.request.urlretrieve(url, os.path.basename(url))
            self.files.append(os.path.basename(url))
    def initParmas(self):
        raise NotImplementedError
--- a/paddleaudio/tests/backends/soundfile/init.py
+++ b/paddleaudio/tests/backends/soundfile/init.py
@ -0,0 +1,13 @@
 # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
--- a/paddleaudio/tests/backends/soundfile/test_io.py
+++ b/paddleaudio/tests/backends/soundfile/test_io.py
@ -0,0 +1,73 @@
 # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import filecmp
 import os
 import unittest
 import numpy as np
 import soundfile as sf
 import paddleaudio
 from ..base import BackendTest
 class TestIO(BackendTest):
    def test_load_mono_channel(self):
        sf_data, sf_sr = sf.read(self.files[0])
        pa_data, pa_sr = paddleaudio.load(
            self.files[0], normal=False, dtype='float64')
        self.assertEqual(sf_data.dtype, pa_data.dtype)
        self.assertEqual(sf_sr, pa_sr)
        np.testing.assert_array_almost_equal(sf_data, pa_data)
    def test_load_multi_channels(self):
        sf_data, sf_sr = sf.read(self.files[1])
        sf_data = sf_data.T  # Channel dim first
        pa_data, pa_sr = paddleaudio.load(
            self.files[1], mono=False, normal=False, dtype='float64')
        self.assertEqual(sf_data.dtype, pa_data.dtype)
        self.assertEqual(sf_sr, pa_sr)
        np.testing.assert_array_almost_equal(sf_data, pa_data)
    def test_save_mono_channel(self):
        waveform, sr = np.random.randint(
            low=-32768, high=32768, size=(48000), dtype=np.int16), 16000
        sf_tmp_file = 'sf_tmp.wav'
        pa_tmp_file = 'pa_tmp.wav'
        sf.write(sf_tmp_file, waveform, sr)
        paddleaudio.save(waveform, sr, pa_tmp_file)
        self.assertTrue(filecmp.cmp(sf_tmp_file, pa_tmp_file))
        for file in [sf_tmp_file, pa_tmp_file]:
            os.remove(file)
    def test_save_multi_channels(self):
        waveform, sr = np.random.randint(
            low=-32768, high=32768, size=(2, 48000), dtype=np.int16), 16000
        sf_tmp_file = 'sf_tmp.wav'
        pa_tmp_file = 'pa_tmp.wav'
        sf.write(sf_tmp_file, waveform.T, sr)
        paddleaudio.save(waveform.T, sr, pa_tmp_file)
        self.assertTrue(filecmp.cmp(sf_tmp_file, pa_tmp_file))
        for file in [sf_tmp_file, pa_tmp_file]:
            os.remove(file)
 if __name__ == '__main__':
    unittest.main()
--- a/paddleaudio/tests/features/test_librosa.py
+++ b/paddleaudio/tests/features/test_librosa.py
@ -0,0 +1,227 @@
 # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import unittest
 import librosa
 import numpy as np
 import paddle
 import paddleaudio
 from .base import FeatTest
 from paddleaudio.functional.window import get_window
 class TestLibrosa(FeatTest):
    def initParmas(self):
        self.n_fft = 512
        self.hop_length = 128
        self.n_mels = 40
        self.fmin = 0.0
        self.window_str = 'hann'
        self.pad_mode = 'reflect'
    def test_stft(self):
        if len(self.waveform.shape) == 2:  # (C, T)
            self.waveform = self.waveform.squeeze(
                0)  # 1D input for librosa.feature.melspectrogram
        feature_librosa = librosa.core.stft(
            y=self.waveform,
            n_fft=self.n_fft,
            hop_length=self.hop_length,
            win_length=None,
            window=self.window_str,
            center=True,
            dtype=None,
            pad_mode=self.pad_mode, )
        x = paddle.to_tensor(self.waveform).unsqueeze(0)
        window = get_window(self.window_str, self.n_fft, dtype=x.dtype)
        feature_paddle = paddle.signal.stft(
            x=x,
            n_fft=self.n_fft,
            hop_length=self.hop_length,
            win_length=None,
            window=window,
            center=True,
            pad_mode=self.pad_mode,
            normalized=False,
            onesided=True, ).squeeze(0)
        np.testing.assert_array_almost_equal(
            feature_librosa, feature_paddle, decimal=5)
    def test_istft(self):
        if len(self.waveform.shape) == 2:  # (C, T)
            self.waveform = self.waveform.squeeze(
                0)  # 1D input for librosa.feature.melspectrogram
        # Get stft result from librosa.
        stft_matrix = librosa.core.stft(
            y=self.waveform,
            n_fft=self.n_fft,
            hop_length=self.hop_length,
            win_length=None,
            window=self.window_str,
            center=True,
            pad_mode=self.pad_mode, )
        feature_librosa = librosa.core.istft(
            stft_matrix=stft_matrix,
            hop_length=self.hop_length,
            win_length=None,
            window=self.window_str,
            center=True,
            dtype=None,
            length=None, )
        x = paddle.to_tensor(stft_matrix).unsqueeze(0)
        window = get_window(
            self.window_str,
            self.n_fft,
            dtype=paddle.to_tensor(self.waveform).dtype)
        feature_paddle = paddle.signal.istft(
            x=x,
            n_fft=self.n_fft,
            hop_length=self.hop_length,
            win_length=None,
            window=window,
            center=True,
            normalized=False,
            onesided=True,
            length=None,
            return_complex=False, ).squeeze(0)
        np.testing.assert_array_almost_equal(
            feature_librosa, feature_paddle, decimal=5)
    def test_mel(self):
        feature_librosa = librosa.filters.mel(
            sr=self.sr,
            n_fft=self.n_fft,
            n_mels=self.n_mels,
            fmin=self.fmin,
            fmax=None,
            htk=False,
            norm='slaney',
            dtype=self.waveform.dtype, )
        feature_compliance = paddleaudio.compliance.librosa.compute_fbank_matrix(
            sr=self.sr,
            n_fft=self.n_fft,
            n_mels=self.n_mels,
            fmin=self.fmin,
            fmax=None,
            htk=False,
            norm='slaney',
            dtype=self.waveform.dtype, )
        x = paddle.to_tensor(self.waveform)
        feature_functional = paddleaudio.functional.compute_fbank_matrix(
            sr=self.sr,
            n_fft=self.n_fft,
            n_mels=self.n_mels,
            f_min=self.fmin,
            f_max=None,
            htk=False,
            norm='slaney',
            dtype=x.dtype, )
        np.testing.assert_array_almost_equal(feature_librosa,
                                             feature_compliance)
        np.testing.assert_array_almost_equal(feature_librosa,
                                             feature_functional)
    def test_melspect(self):
        if len(self.waveform.shape) == 2:  # (C, T)
            self.waveform = self.waveform.squeeze(
                0)  # 1D input for librosa.feature.melspectrogram
        # librosa:
        feature_librosa = librosa.feature.melspectrogram(
            y=self.waveform,
            sr=self.sr,
            n_fft=self.n_fft,
            hop_length=self.hop_length,
            n_mels=self.n_mels,
            fmin=self.fmin)
        # paddleaudio.compliance.librosa:
        feature_compliance = paddleaudio.compliance.librosa.melspectrogram(
            x=self.waveform,
            sr=self.sr,
            window_size=self.n_fft,
            hop_length=self.hop_length,
            n_mels=self.n_mels,
            fmin=self.fmin,
            to_db=False)
        # paddleaudio.features.layer
        x = paddle.to_tensor(
            self.waveform, dtype=paddle.float64).unsqueeze(0)  # Add batch dim.
        feature_extractor = paddleaudio.features.MelSpectrogram(
            sr=self.sr,
            n_fft=self.n_fft,
            hop_length=self.hop_length,
            n_mels=self.n_mels,
            f_min=self.fmin,
            dtype=x.dtype)
        feature_layer = feature_extractor(x).squeeze(0).numpy()
        np.testing.assert_array_almost_equal(
            feature_librosa, feature_compliance, decimal=5)
        np.testing.assert_array_almost_equal(
            feature_librosa, feature_layer, decimal=5)
    def test_log_melspect(self):
        if len(self.waveform.shape) == 2:  # (C, T)
            self.waveform = self.waveform.squeeze(
                0)  # 1D input for librosa.feature.melspectrogram
        # librosa:
        feature_librosa = librosa.feature.melspectrogram(
            y=self.waveform,
            sr=self.sr,
            n_fft=self.n_fft,
            hop_length=self.hop_length,
            n_mels=self.n_mels,
            fmin=self.fmin)
        feature_librosa = librosa.power_to_db(feature_librosa, top_db=None)
        # paddleaudio.compliance.librosa:
        feature_compliance = paddleaudio.compliance.librosa.melspectrogram(
            x=self.waveform,
            sr=self.sr,
            window_size=self.n_fft,
            hop_length=self.hop_length,
            n_mels=self.n_mels,
            fmin=self.fmin)
        # paddleaudio.features.layer
        x = paddle.to_tensor(
            self.waveform, dtype=paddle.float64).unsqueeze(0)  # Add batch dim.
        feature_extractor = paddleaudio.features.LogMelSpectrogram(
            sr=self.sr,
            n_fft=self.n_fft,
            hop_length=self.hop_length,
            n_mels=self.n_mels,
            f_min=self.fmin,
            dtype=x.dtype)
        feature_layer = feature_extractor(x).squeeze(0).numpy()
        np.testing.assert_array_almost_equal(
            feature_librosa, feature_compliance, decimal=5)
        np.testing.assert_array_almost_equal(
            feature_librosa, feature_layer, decimal=4)
 if __name__ == '__main__':
    unittest.main()