parent
169040b4a2
commit
a85250cf16
@ -0,0 +1,13 @@
|
||||
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
@ -0,0 +1,34 @@
|
||||
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
import os
|
||||
import unittest
|
||||
import urllib.request
|
||||
|
||||
mono_channel_wav = 'https://paddlespeech.bj.bcebos.com/PaddleAudio/zh.wav'
|
||||
multi_channels_wav = 'https://paddlespeech.bj.bcebos.com/PaddleAudio/cat.wav'
|
||||
|
||||
|
||||
class BackendTest(unittest.TestCase):
|
||||
def setUp(self):
|
||||
self.initWavInput()
|
||||
|
||||
def initWavInput(self):
|
||||
self.files = []
|
||||
for url in [mono_channel_wav, multi_channels_wav]:
|
||||
if not os.path.isfile(os.path.basename(url)):
|
||||
urllib.request.urlretrieve(url, os.path.basename(url))
|
||||
self.files.append(os.path.basename(url))
|
||||
|
||||
def initParmas(self):
|
||||
raise NotImplementedError
|
@ -0,0 +1,13 @@
|
||||
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
@ -0,0 +1,73 @@
|
||||
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
import filecmp
|
||||
import os
|
||||
import unittest
|
||||
|
||||
import numpy as np
|
||||
import soundfile as sf
|
||||
|
||||
import paddleaudio
|
||||
from ..base import BackendTest
|
||||
|
||||
|
||||
class TestIO(BackendTest):
|
||||
def test_load_mono_channel(self):
|
||||
sf_data, sf_sr = sf.read(self.files[0])
|
||||
pa_data, pa_sr = paddleaudio.load(
|
||||
self.files[0], normal=False, dtype='float64')
|
||||
|
||||
self.assertEqual(sf_data.dtype, pa_data.dtype)
|
||||
self.assertEqual(sf_sr, pa_sr)
|
||||
np.testing.assert_array_almost_equal(sf_data, pa_data)
|
||||
|
||||
def test_load_multi_channels(self):
|
||||
sf_data, sf_sr = sf.read(self.files[1])
|
||||
sf_data = sf_data.T # Channel dim first
|
||||
pa_data, pa_sr = paddleaudio.load(
|
||||
self.files[1], mono=False, normal=False, dtype='float64')
|
||||
|
||||
self.assertEqual(sf_data.dtype, pa_data.dtype)
|
||||
self.assertEqual(sf_sr, pa_sr)
|
||||
np.testing.assert_array_almost_equal(sf_data, pa_data)
|
||||
|
||||
def test_save_mono_channel(self):
|
||||
waveform, sr = np.random.randint(
|
||||
low=-32768, high=32768, size=(48000), dtype=np.int16), 16000
|
||||
sf_tmp_file = 'sf_tmp.wav'
|
||||
pa_tmp_file = 'pa_tmp.wav'
|
||||
|
||||
sf.write(sf_tmp_file, waveform, sr)
|
||||
paddleaudio.save(waveform, sr, pa_tmp_file)
|
||||
|
||||
self.assertTrue(filecmp.cmp(sf_tmp_file, pa_tmp_file))
|
||||
for file in [sf_tmp_file, pa_tmp_file]:
|
||||
os.remove(file)
|
||||
|
||||
def test_save_multi_channels(self):
|
||||
waveform, sr = np.random.randint(
|
||||
low=-32768, high=32768, size=(2, 48000), dtype=np.int16), 16000
|
||||
sf_tmp_file = 'sf_tmp.wav'
|
||||
pa_tmp_file = 'pa_tmp.wav'
|
||||
|
||||
sf.write(sf_tmp_file, waveform.T, sr)
|
||||
paddleaudio.save(waveform.T, sr, pa_tmp_file)
|
||||
|
||||
self.assertTrue(filecmp.cmp(sf_tmp_file, pa_tmp_file))
|
||||
for file in [sf_tmp_file, pa_tmp_file]:
|
||||
os.remove(file)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
@ -0,0 +1,227 @@
|
||||
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
import unittest
|
||||
|
||||
import librosa
|
||||
import numpy as np
|
||||
import paddle
|
||||
|
||||
import paddleaudio
|
||||
from .base import FeatTest
|
||||
from paddleaudio.functional.window import get_window
|
||||
|
||||
|
||||
class TestLibrosa(FeatTest):
|
||||
def initParmas(self):
|
||||
self.n_fft = 512
|
||||
self.hop_length = 128
|
||||
self.n_mels = 40
|
||||
self.fmin = 0.0
|
||||
self.window_str = 'hann'
|
||||
self.pad_mode = 'reflect'
|
||||
|
||||
def test_stft(self):
|
||||
if len(self.waveform.shape) == 2: # (C, T)
|
||||
self.waveform = self.waveform.squeeze(
|
||||
0) # 1D input for librosa.feature.melspectrogram
|
||||
|
||||
feature_librosa = librosa.core.stft(
|
||||
y=self.waveform,
|
||||
n_fft=self.n_fft,
|
||||
hop_length=self.hop_length,
|
||||
win_length=None,
|
||||
window=self.window_str,
|
||||
center=True,
|
||||
dtype=None,
|
||||
pad_mode=self.pad_mode, )
|
||||
x = paddle.to_tensor(self.waveform).unsqueeze(0)
|
||||
window = get_window(self.window_str, self.n_fft, dtype=x.dtype)
|
||||
feature_paddle = paddle.signal.stft(
|
||||
x=x,
|
||||
n_fft=self.n_fft,
|
||||
hop_length=self.hop_length,
|
||||
win_length=None,
|
||||
window=window,
|
||||
center=True,
|
||||
pad_mode=self.pad_mode,
|
||||
normalized=False,
|
||||
onesided=True, ).squeeze(0)
|
||||
|
||||
np.testing.assert_array_almost_equal(
|
||||
feature_librosa, feature_paddle, decimal=5)
|
||||
|
||||
def test_istft(self):
|
||||
if len(self.waveform.shape) == 2: # (C, T)
|
||||
self.waveform = self.waveform.squeeze(
|
||||
0) # 1D input for librosa.feature.melspectrogram
|
||||
|
||||
# Get stft result from librosa.
|
||||
stft_matrix = librosa.core.stft(
|
||||
y=self.waveform,
|
||||
n_fft=self.n_fft,
|
||||
hop_length=self.hop_length,
|
||||
win_length=None,
|
||||
window=self.window_str,
|
||||
center=True,
|
||||
pad_mode=self.pad_mode, )
|
||||
|
||||
feature_librosa = librosa.core.istft(
|
||||
stft_matrix=stft_matrix,
|
||||
hop_length=self.hop_length,
|
||||
win_length=None,
|
||||
window=self.window_str,
|
||||
center=True,
|
||||
dtype=None,
|
||||
length=None, )
|
||||
|
||||
x = paddle.to_tensor(stft_matrix).unsqueeze(0)
|
||||
window = get_window(
|
||||
self.window_str,
|
||||
self.n_fft,
|
||||
dtype=paddle.to_tensor(self.waveform).dtype)
|
||||
feature_paddle = paddle.signal.istft(
|
||||
x=x,
|
||||
n_fft=self.n_fft,
|
||||
hop_length=self.hop_length,
|
||||
win_length=None,
|
||||
window=window,
|
||||
center=True,
|
||||
normalized=False,
|
||||
onesided=True,
|
||||
length=None,
|
||||
return_complex=False, ).squeeze(0)
|
||||
|
||||
np.testing.assert_array_almost_equal(
|
||||
feature_librosa, feature_paddle, decimal=5)
|
||||
|
||||
def test_mel(self):
|
||||
feature_librosa = librosa.filters.mel(
|
||||
sr=self.sr,
|
||||
n_fft=self.n_fft,
|
||||
n_mels=self.n_mels,
|
||||
fmin=self.fmin,
|
||||
fmax=None,
|
||||
htk=False,
|
||||
norm='slaney',
|
||||
dtype=self.waveform.dtype, )
|
||||
feature_compliance = paddleaudio.compliance.librosa.compute_fbank_matrix(
|
||||
sr=self.sr,
|
||||
n_fft=self.n_fft,
|
||||
n_mels=self.n_mels,
|
||||
fmin=self.fmin,
|
||||
fmax=None,
|
||||
htk=False,
|
||||
norm='slaney',
|
||||
dtype=self.waveform.dtype, )
|
||||
x = paddle.to_tensor(self.waveform)
|
||||
feature_functional = paddleaudio.functional.compute_fbank_matrix(
|
||||
sr=self.sr,
|
||||
n_fft=self.n_fft,
|
||||
n_mels=self.n_mels,
|
||||
f_min=self.fmin,
|
||||
f_max=None,
|
||||
htk=False,
|
||||
norm='slaney',
|
||||
dtype=x.dtype, )
|
||||
|
||||
np.testing.assert_array_almost_equal(feature_librosa,
|
||||
feature_compliance)
|
||||
np.testing.assert_array_almost_equal(feature_librosa,
|
||||
feature_functional)
|
||||
|
||||
def test_melspect(self):
|
||||
if len(self.waveform.shape) == 2: # (C, T)
|
||||
self.waveform = self.waveform.squeeze(
|
||||
0) # 1D input for librosa.feature.melspectrogram
|
||||
|
||||
# librosa:
|
||||
feature_librosa = librosa.feature.melspectrogram(
|
||||
y=self.waveform,
|
||||
sr=self.sr,
|
||||
n_fft=self.n_fft,
|
||||
hop_length=self.hop_length,
|
||||
n_mels=self.n_mels,
|
||||
fmin=self.fmin)
|
||||
|
||||
# paddleaudio.compliance.librosa:
|
||||
feature_compliance = paddleaudio.compliance.librosa.melspectrogram(
|
||||
x=self.waveform,
|
||||
sr=self.sr,
|
||||
window_size=self.n_fft,
|
||||
hop_length=self.hop_length,
|
||||
n_mels=self.n_mels,
|
||||
fmin=self.fmin,
|
||||
to_db=False)
|
||||
|
||||
# paddleaudio.features.layer
|
||||
x = paddle.to_tensor(
|
||||
self.waveform, dtype=paddle.float64).unsqueeze(0) # Add batch dim.
|
||||
feature_extractor = paddleaudio.features.MelSpectrogram(
|
||||
sr=self.sr,
|
||||
n_fft=self.n_fft,
|
||||
hop_length=self.hop_length,
|
||||
n_mels=self.n_mels,
|
||||
f_min=self.fmin,
|
||||
dtype=x.dtype)
|
||||
feature_layer = feature_extractor(x).squeeze(0).numpy()
|
||||
|
||||
np.testing.assert_array_almost_equal(
|
||||
feature_librosa, feature_compliance, decimal=5)
|
||||
np.testing.assert_array_almost_equal(
|
||||
feature_librosa, feature_layer, decimal=5)
|
||||
|
||||
def test_log_melspect(self):
|
||||
if len(self.waveform.shape) == 2: # (C, T)
|
||||
self.waveform = self.waveform.squeeze(
|
||||
0) # 1D input for librosa.feature.melspectrogram
|
||||
|
||||
# librosa:
|
||||
feature_librosa = librosa.feature.melspectrogram(
|
||||
y=self.waveform,
|
||||
sr=self.sr,
|
||||
n_fft=self.n_fft,
|
||||
hop_length=self.hop_length,
|
||||
n_mels=self.n_mels,
|
||||
fmin=self.fmin)
|
||||
feature_librosa = librosa.power_to_db(feature_librosa, top_db=None)
|
||||
|
||||
# paddleaudio.compliance.librosa:
|
||||
feature_compliance = paddleaudio.compliance.librosa.melspectrogram(
|
||||
x=self.waveform,
|
||||
sr=self.sr,
|
||||
window_size=self.n_fft,
|
||||
hop_length=self.hop_length,
|
||||
n_mels=self.n_mels,
|
||||
fmin=self.fmin)
|
||||
|
||||
# paddleaudio.features.layer
|
||||
x = paddle.to_tensor(
|
||||
self.waveform, dtype=paddle.float64).unsqueeze(0) # Add batch dim.
|
||||
feature_extractor = paddleaudio.features.LogMelSpectrogram(
|
||||
sr=self.sr,
|
||||
n_fft=self.n_fft,
|
||||
hop_length=self.hop_length,
|
||||
n_mels=self.n_mels,
|
||||
f_min=self.fmin,
|
||||
dtype=x.dtype)
|
||||
feature_layer = feature_extractor(x).squeeze(0).numpy()
|
||||
|
||||
np.testing.assert_array_almost_equal(
|
||||
feature_librosa, feature_compliance, decimal=5)
|
||||
np.testing.assert_array_almost_equal(
|
||||
feature_librosa, feature_layer, decimal=4)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
Loading…
Reference in new issue