diff --git a/paddleaudio/setup.py b/paddleaudio/setup.py index 7efa976f..945fa9e2 100644 --- a/paddleaudio/setup.py +++ b/paddleaudio/setup.py @@ -82,7 +82,9 @@ setuptools.setup( 'dtaidistance >= 2.3.6', 'mcd >= 0.4', ], - setup_requires=['nose'], + setup_requires=[ + 'nose', 'librosa==0.8.1', 'soundfile==0.10.3.post1', 'filecmp' + ], cmdclass={ 'install': InstallCommand, 'test': NoseTestCommand, diff --git a/paddleaudio/tests/backends/__init__.py b/paddleaudio/tests/backends/__init__.py new file mode 100644 index 00000000..97043fd7 --- /dev/null +++ b/paddleaudio/tests/backends/__init__.py @@ -0,0 +1,13 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/paddleaudio/tests/backends/base.py b/paddleaudio/tests/backends/base.py new file mode 100644 index 00000000..a6719188 --- /dev/null +++ b/paddleaudio/tests/backends/base.py @@ -0,0 +1,34 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import os +import unittest +import urllib.request + +mono_channel_wav = 'https://paddlespeech.bj.bcebos.com/PaddleAudio/zh.wav' +multi_channels_wav = 'https://paddlespeech.bj.bcebos.com/PaddleAudio/cat.wav' + + +class BackendTest(unittest.TestCase): + def setUp(self): + self.initWavInput() + + def initWavInput(self): + self.files = [] + for url in [mono_channel_wav, multi_channels_wav]: + if not os.path.isfile(os.path.basename(url)): + urllib.request.urlretrieve(url, os.path.basename(url)) + self.files.append(os.path.basename(url)) + + def initParmas(self): + raise NotImplementedError diff --git a/paddleaudio/tests/backends/soundfile/__init__.py b/paddleaudio/tests/backends/soundfile/__init__.py new file mode 100644 index 00000000..97043fd7 --- /dev/null +++ b/paddleaudio/tests/backends/soundfile/__init__.py @@ -0,0 +1,13 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/paddleaudio/tests/backends/soundfile/test_io.py b/paddleaudio/tests/backends/soundfile/test_io.py new file mode 100644 index 00000000..0f7580a4 --- /dev/null +++ b/paddleaudio/tests/backends/soundfile/test_io.py @@ -0,0 +1,73 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import filecmp +import os +import unittest + +import numpy as np +import soundfile as sf + +import paddleaudio +from ..base import BackendTest + + +class TestIO(BackendTest): + def test_load_mono_channel(self): + sf_data, sf_sr = sf.read(self.files[0]) + pa_data, pa_sr = paddleaudio.load( + self.files[0], normal=False, dtype='float64') + + self.assertEqual(sf_data.dtype, pa_data.dtype) + self.assertEqual(sf_sr, pa_sr) + np.testing.assert_array_almost_equal(sf_data, pa_data) + + def test_load_multi_channels(self): + sf_data, sf_sr = sf.read(self.files[1]) + sf_data = sf_data.T # Channel dim first + pa_data, pa_sr = paddleaudio.load( + self.files[1], mono=False, normal=False, dtype='float64') + + self.assertEqual(sf_data.dtype, pa_data.dtype) + self.assertEqual(sf_sr, pa_sr) + np.testing.assert_array_almost_equal(sf_data, pa_data) + + def test_save_mono_channel(self): + waveform, sr = np.random.randint( + low=-32768, high=32768, size=(48000), dtype=np.int16), 16000 + sf_tmp_file = 'sf_tmp.wav' + pa_tmp_file = 'pa_tmp.wav' + + sf.write(sf_tmp_file, waveform, sr) + paddleaudio.save(waveform, sr, pa_tmp_file) + + self.assertTrue(filecmp.cmp(sf_tmp_file, pa_tmp_file)) + for file in [sf_tmp_file, pa_tmp_file]: + os.remove(file) + + def test_save_multi_channels(self): + waveform, sr = np.random.randint( + low=-32768, high=32768, size=(2, 48000), dtype=np.int16), 16000 + sf_tmp_file = 'sf_tmp.wav' + pa_tmp_file = 'pa_tmp.wav' + + sf.write(sf_tmp_file, waveform.T, sr) + paddleaudio.save(waveform.T, sr, pa_tmp_file) + + self.assertTrue(filecmp.cmp(sf_tmp_file, pa_tmp_file)) + for file in [sf_tmp_file, pa_tmp_file]: + os.remove(file) + + +if __name__ == '__main__': + unittest.main() diff --git a/paddleaudio/tests/features/test_librosa.py b/paddleaudio/tests/features/test_librosa.py new file mode 100644 index 00000000..6500eb2b --- /dev/null +++ b/paddleaudio/tests/features/test_librosa.py @@ -0,0 +1,227 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import unittest + +import librosa +import numpy as np +import paddle + +import paddleaudio +from .base import FeatTest +from paddleaudio.functional.window import get_window + + +class TestLibrosa(FeatTest): + def initParmas(self): + self.n_fft = 512 + self.hop_length = 128 + self.n_mels = 40 + self.fmin = 0.0 + self.window_str = 'hann' + self.pad_mode = 'reflect' + + def test_stft(self): + if len(self.waveform.shape) == 2: # (C, T) + self.waveform = self.waveform.squeeze( + 0) # 1D input for librosa.feature.melspectrogram + + feature_librosa = librosa.core.stft( + y=self.waveform, + n_fft=self.n_fft, + hop_length=self.hop_length, + win_length=None, + window=self.window_str, + center=True, + dtype=None, + pad_mode=self.pad_mode, ) + x = paddle.to_tensor(self.waveform).unsqueeze(0) + window = get_window(self.window_str, self.n_fft, dtype=x.dtype) + feature_paddle = paddle.signal.stft( + x=x, + n_fft=self.n_fft, + hop_length=self.hop_length, + win_length=None, + window=window, + center=True, + pad_mode=self.pad_mode, + normalized=False, + onesided=True, ).squeeze(0) + + np.testing.assert_array_almost_equal( + feature_librosa, feature_paddle, decimal=5) + + def test_istft(self): + if len(self.waveform.shape) == 2: # (C, T) + self.waveform = self.waveform.squeeze( + 0) # 1D input for librosa.feature.melspectrogram + + # Get stft result from librosa. + stft_matrix = librosa.core.stft( + y=self.waveform, + n_fft=self.n_fft, + hop_length=self.hop_length, + win_length=None, + window=self.window_str, + center=True, + pad_mode=self.pad_mode, ) + + feature_librosa = librosa.core.istft( + stft_matrix=stft_matrix, + hop_length=self.hop_length, + win_length=None, + window=self.window_str, + center=True, + dtype=None, + length=None, ) + + x = paddle.to_tensor(stft_matrix).unsqueeze(0) + window = get_window( + self.window_str, + self.n_fft, + dtype=paddle.to_tensor(self.waveform).dtype) + feature_paddle = paddle.signal.istft( + x=x, + n_fft=self.n_fft, + hop_length=self.hop_length, + win_length=None, + window=window, + center=True, + normalized=False, + onesided=True, + length=None, + return_complex=False, ).squeeze(0) + + np.testing.assert_array_almost_equal( + feature_librosa, feature_paddle, decimal=5) + + def test_mel(self): + feature_librosa = librosa.filters.mel( + sr=self.sr, + n_fft=self.n_fft, + n_mels=self.n_mels, + fmin=self.fmin, + fmax=None, + htk=False, + norm='slaney', + dtype=self.waveform.dtype, ) + feature_compliance = paddleaudio.compliance.librosa.compute_fbank_matrix( + sr=self.sr, + n_fft=self.n_fft, + n_mels=self.n_mels, + fmin=self.fmin, + fmax=None, + htk=False, + norm='slaney', + dtype=self.waveform.dtype, ) + x = paddle.to_tensor(self.waveform) + feature_functional = paddleaudio.functional.compute_fbank_matrix( + sr=self.sr, + n_fft=self.n_fft, + n_mels=self.n_mels, + f_min=self.fmin, + f_max=None, + htk=False, + norm='slaney', + dtype=x.dtype, ) + + np.testing.assert_array_almost_equal(feature_librosa, + feature_compliance) + np.testing.assert_array_almost_equal(feature_librosa, + feature_functional) + + def test_melspect(self): + if len(self.waveform.shape) == 2: # (C, T) + self.waveform = self.waveform.squeeze( + 0) # 1D input for librosa.feature.melspectrogram + + # librosa: + feature_librosa = librosa.feature.melspectrogram( + y=self.waveform, + sr=self.sr, + n_fft=self.n_fft, + hop_length=self.hop_length, + n_mels=self.n_mels, + fmin=self.fmin) + + # paddleaudio.compliance.librosa: + feature_compliance = paddleaudio.compliance.librosa.melspectrogram( + x=self.waveform, + sr=self.sr, + window_size=self.n_fft, + hop_length=self.hop_length, + n_mels=self.n_mels, + fmin=self.fmin, + to_db=False) + + # paddleaudio.features.layer + x = paddle.to_tensor( + self.waveform, dtype=paddle.float64).unsqueeze(0) # Add batch dim. + feature_extractor = paddleaudio.features.MelSpectrogram( + sr=self.sr, + n_fft=self.n_fft, + hop_length=self.hop_length, + n_mels=self.n_mels, + f_min=self.fmin, + dtype=x.dtype) + feature_layer = feature_extractor(x).squeeze(0).numpy() + + np.testing.assert_array_almost_equal( + feature_librosa, feature_compliance, decimal=5) + np.testing.assert_array_almost_equal( + feature_librosa, feature_layer, decimal=5) + + def test_log_melspect(self): + if len(self.waveform.shape) == 2: # (C, T) + self.waveform = self.waveform.squeeze( + 0) # 1D input for librosa.feature.melspectrogram + + # librosa: + feature_librosa = librosa.feature.melspectrogram( + y=self.waveform, + sr=self.sr, + n_fft=self.n_fft, + hop_length=self.hop_length, + n_mels=self.n_mels, + fmin=self.fmin) + feature_librosa = librosa.power_to_db(feature_librosa, top_db=None) + + # paddleaudio.compliance.librosa: + feature_compliance = paddleaudio.compliance.librosa.melspectrogram( + x=self.waveform, + sr=self.sr, + window_size=self.n_fft, + hop_length=self.hop_length, + n_mels=self.n_mels, + fmin=self.fmin) + + # paddleaudio.features.layer + x = paddle.to_tensor( + self.waveform, dtype=paddle.float64).unsqueeze(0) # Add batch dim. + feature_extractor = paddleaudio.features.LogMelSpectrogram( + sr=self.sr, + n_fft=self.n_fft, + hop_length=self.hop_length, + n_mels=self.n_mels, + f_min=self.fmin, + dtype=x.dtype) + feature_layer = feature_extractor(x).squeeze(0).numpy() + + np.testing.assert_array_almost_equal( + feature_librosa, feature_compliance, decimal=5) + np.testing.assert_array_almost_equal( + feature_librosa, feature_layer, decimal=4) + + +if __name__ == '__main__': + unittest.main()